wiki_to_hdoc_fetcher.ant 4.24 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
<?xml version="1.0" encoding="UTF-8"?>
<project name="wikiToHdoc" default="main" basedir=".">
    
    <!-- wikipediaUrl and destination are the only required parameters -->
    <property name="wikipediaUrl"            value=""/>
    <property name="filename"                value=""/>
    
    <!-- Folder paths -->
    <!-- tmp paths (removed at the end of the process) -->
    <property name="tmpFolderPath"           value="${basedir}/tmp"/>
    <property name="tmpHdocResultFolderPath" value="${tmpFolderPath}/hdoc"/>
12
    <property name="in"                      value="${basedir}/input"/>
13
14
15
    <property name="resultFolderPath"        value="${basedir}/result"/>
    <property name="xsltFolderPath"          value="${basedir}/xslt"/>
    <property name="hdocResultPath"          value="${resultFolderPath}/${filename}"/>
16
17
18
    <!--<property name="inputFile"               value="${hdocResultPath}/${filename}.xml"/>-->
    <property name="inputFile"               value="${in}/${filename}.xml"/>

19
20
21
22
23
24
25
26
27
28
29
30
31
32
    <!-- Main target -->
    <target name="main">
        <antcall target="prepare"/>
        <antcall target="prepareHdocStructure"/>
        <antcall target="transformWikiToHdoc"/>
        <antcall target="zip"/>
        <antcall target="clean"/>
    </target>
    
    <target name="prepare">
        <!-- Create tmp directory for tmp files and result directory if does not exist -->
        <mkdir dir="${tmpHdocResultFolderPath}"/>
        <mkdir dir="${hdocResultPath}"/>
        
33
        <!-- Fetching html from wikipedia 
34
35
36
        <java fork="true" failonerror="true" jar="${basedir}/javaSources/FetchWikipediaXml.jar">
            <arg value="${wikipediaUrl}"/>
            <arg value="${filename}"/>
37
38
39
40
41
        </java>-->
        <loadresource  encoding="UTF-8" property="myURL">
            <url url="${wikipediaUrl}"/>
        </loadresource>
        <echo file="${inputFile}" encoding="UTF-8">${myURL}</echo>
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
        
        <!-- Making tmp xml wikipedia file without some elements which are not useful -->
        <xslt classpath="lib/saxon9he.jar" in="${inputFile}" out="${tmpFolderPath}/${filename}_prepared.xml" style="${xsltFolderPath}/prepare_wiki_to_hdoc.xsl">
            <factory name="net.sf.saxon.TransformerFactoryImpl"/>
        </xslt>
    </target>
    
    <!-- Create the hdoc structure of the new file -->
    <target name="prepareHdocStructure">
        <mkdir dir="${tmpHdocResultFolderPath}"/>
        <mkdir dir="${tmpHdocResultFolderPath}/META-INF" />
        <touch file="${tmpHdocResultFolderPath}/META-INF/container.xml" />
        <touch file="${tmpHdocResultFolderPath}/mimetype" />
        <echo message="application/x-hdoc+zip" file="${tmpHdocResultFolderPath}/mimetype" />
        <echoxml file="${tmpFolderPath}/containerTmp.xml">
            <container version="1.0" xmlns="urn:utc.fr:ics:hdoc:container">
                <rootfiles>
                    <rootfile full-path="content.xml" media-type="text/xml" />
                </rootfiles>
            </container>
        </echoxml>
        <xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/containerTmp.xml" out="${tmpHdocResultFolderPath}/META-INF/container.xml" style="${xsltFolderPath}/addNamespaceToContainer.xsl"/>
    </target>
    
    <target name="transformWikiToHdoc">
        <!-- Applying wikipedia -> hdoc -->
        <xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${hdocResultPath}/${filename}.html" style="${xsltFolderPath}/wiki_to_hdoc.xsl">
            <factory name="net.sf.saxon.TransformerFactoryImpl"/>
        </xslt>
        
72
73
74
75
76
        <!-- Copy listing files -->
        <copy todir="${tmpHdocResultFolderPath}/listing" >  
            <fileset dir="${hdocResultPath}/listing" includes="**"/>  
        </copy>
        
77
78
79
80
81
82
83
84
85
86
87
        <!-- Copy of the html file, hdoc needs "container.xml" file, not a html. The html file can be useful to the user -->
        <copy file="${hdocResultPath}/${filename}.html" tofile="${tmpHdocResultFolderPath}/content.xml"/>
    </target>
    
    <!-- Make hdoc file -->
    <target name="zip">
        <zip destfile="${hdocResultPath}/${filename}.hdoc" basedir="${tmpHdocResultFolderPath}"/>
    </target>
    
    <!-- Delete tmp directory -->
    <target name="clean">
88
       <!-- <delete dir="${tmpFolderPath}"/>-->
89
90
    </target>
</project>