wiki_to_hdoc_fetcher.ant 6.29 KB
Newer Older
1
<?xml version="1.0" encoding="UTF-8"?>
Aghiles's avatar
Aghiles committed
2

3
<project name="wikiToHdoc" default="main" basedir="..">
4
5
6
7
8
9
10
11
12
    
    <!-- wikipediaUrl and destination are the only required parameters -->
    <property name="wikipediaUrl"            value=""/>
    <property name="filename"                value=""/>
    
    <!-- Folder paths -->
    <!-- tmp paths (removed at the end of the process) -->
    <property name="tmpFolderPath"           value="${basedir}/tmp"/>
    <property name="tmpHdocResultFolderPath" value="${tmpFolderPath}/hdoc"/>
13
    <property name="in"                      value="${basedir}/input"/>
14
15
16
    <property name="resultFolderPath"        value="${basedir}/result"/>
    <property name="xsltFolderPath"          value="${basedir}/xslt"/>
    <property name="hdocResultPath"          value="${resultFolderPath}/${filename}"/>
17
    <!--<property name="inputFile"               value="${hdocResultPath}/${filename}.xml"/>-->
18
    <property name="inputFile"               value="${filename}.xml"/>
19

20
21
    <!-- Main target -->
    <target name="main">
22
        <antcall target="clean"/>
23
24
        <antcall target="prepare"/>
        <antcall target="prepareHdocStructure"/>
Aghiles's avatar
Aghiles committed
25
        <antcall target="getImgMetadata"/>
26
27
28
29
30
31
32
33
34
        <antcall target="transformWikiToHdoc"/>
        <antcall target="zip"/>
    </target>
    
    <target name="prepare">
        <!-- Create tmp directory for tmp files and result directory if does not exist -->
        <mkdir dir="${tmpHdocResultFolderPath}"/>
        <mkdir dir="${hdocResultPath}"/>
        
35
        <!-- Fetching html from wikipedia 
36
37
38
        <java fork="true" failonerror="true" jar="${basedir}/javaSources/FetchWikipediaXml.jar">
            <arg value="${wikipediaUrl}"/>
            <arg value="${filename}"/>
39
40
41
42
        </java>-->
        <loadresource  encoding="UTF-8" property="myURL">
            <url url="${wikipediaUrl}"/>
        </loadresource>
43
        <echo file="${in}/${inputFile}" encoding="UTF-8">${myURL}</echo>
44
45
        
        <!-- Making tmp xml wikipedia file without some elements which are not useful -->
46
        <xslt classpath="lib/saxon9he.jar" in="${in}/${inputFile}" out="${tmpFolderPath}/${filename}_prepared.xml" style="${xsltFolderPath}/prepare_wiki_to_hdoc.xsl">
47
48
            <factory name="net.sf.saxon.TransformerFactoryImpl"/>
        </xslt>
49
50
51
52
53
54
55
56
57
58
59
60
61


        <!-- Prepare the ant script that will get our ressources(images )-->

        <xslt classpath="lib/saxon9he.jar" 
            in="${tmpFolderPath}/${filename}_prepared.xml" 
            out="${tmpFolderPath}/${filename}_ressources.xml" style="${xsltFolderPath}/get_ressources_urls.xsl">
            <factory name="net.sf.saxon.TransformerFactoryImpl"/>

        </xslt>

        <ant antfile="${tmpFolderPath}/${filename}_ressources.xml"/>    

62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
    </target>
    
    <!-- Create the hdoc structure of the new file -->
    <target name="prepareHdocStructure">
        <mkdir dir="${tmpHdocResultFolderPath}"/>
        <mkdir dir="${tmpHdocResultFolderPath}/META-INF" />
        <touch file="${tmpHdocResultFolderPath}/META-INF/container.xml" />
        <touch file="${tmpHdocResultFolderPath}/mimetype" />
        <echo message="application/x-hdoc+zip" file="${tmpHdocResultFolderPath}/mimetype" />
        <echoxml file="${tmpFolderPath}/containerTmp.xml">
            <container version="1.0" xmlns="urn:utc.fr:ics:hdoc:container">
                <rootfiles>
                    <rootfile full-path="content.xml" media-type="text/xml" />
                </rootfiles>
            </container>
        </echoxml>
        <xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/containerTmp.xml" out="${tmpHdocResultFolderPath}/META-INF/container.xml" style="${xsltFolderPath}/addNamespaceToContainer.xsl"/>
    </target>
Aghiles's avatar
Aghiles committed
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
    <target name="getImgMetadata">
        <!--  Preparing the links for the metadata extraction      -->
        <xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${hdocResultPath}/img_metadata_link.xml" style="${xsltFolderPath}/prepare_img_link_metadata.xsl">
            <factory name="net.sf.saxon.TransformerFactoryImpl"/>
        </xslt>
        
        <xmlproperty file="${hdocResultPath}/img_metadata_link.xml" prefix="TEST"/>  
        <taskdef resource="net/sf/antcontrib/antlib.xml">
            <classpath>
                <pathelement location="/usr/share/java/lib/ant-contrib-version.jar"/>
            </classpath>
        </taskdef>       
        <for list="${TEST.images.image.link}" param="file">
            <sequential>
                  <!--       <loadresource  encoding="UTF-8" property="myURL">
                             <url url="@{file}"/>
                </loadresource>
                <echo file="${hdocResultPath}/@{file}/test.xml" encoding="UTF-8">${myURL}</echo>   -->             
                <echo>@{file}</echo>
            </sequential>
        </for>
        
    </target>
103
104
105
106
    <target name="transformWikiToHdoc">
        <!-- Applying wikipedia -> hdoc -->
        <xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${hdocResultPath}/${filename}.html" style="${xsltFolderPath}/wiki_to_hdoc.xsl">
            <factory name="net.sf.saxon.TransformerFactoryImpl"/>
haroldcb's avatar
haroldcb committed
107
108
109
110
111
112
        </xslt>  

        <!-- Copy listing files -->        
        <copy todir="${tmpHdocResultFolderPath}/listing" failonerror="false">            
            <fileset dir="${hdocResultPath}/listing" includes="**"/>
        </copy>
113
        
haroldcb's avatar
haroldcb committed
114
115
116
        <!-- Copy Tables files -->
        <copy todir="${tmpHdocResultFolderPath}/tables" failonerror="false">  
            <fileset dir="${hdocResultPath}/tables" includes="**"/>  
117
118
        </copy>
        
lhommeni's avatar
lhommeni committed
119
120
        <!-- Copy Images files -->
        <copy todir="${tmpHdocResultFolderPath}/ressources" failonerror="false">  
121
122
            <fileset dir="${hdocResultPath}/ressources" includes="**"/>  
        </copy>        
123
124
125
126
127
128
129
130
131
132
133
        <!-- Copy of the html file, hdoc needs "container.xml" file, not a html. The html file can be useful to the user -->
        <copy file="${hdocResultPath}/${filename}.html" tofile="${tmpHdocResultFolderPath}/content.xml"/>
    </target>
    
    <!-- Make hdoc file -->
    <target name="zip">
        <zip destfile="${hdocResultPath}/${filename}.hdoc" basedir="${tmpHdocResultFolderPath}"/>
    </target>
    
    <!-- Delete tmp directory -->
    <target name="clean">
134
       <delete dir="${tmpFolderPath}"/>
135
136
    </target>
</project>