Commit 2ea5f2f6 authored by Aghiles's avatar Aghiles
Browse files

wikipedia_to_hdoc : gestion des métadonnées

parent f9682788
...@@ -82,22 +82,51 @@ ...@@ -82,22 +82,51 @@
<xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${hdocResultPath}/img_metadata_link.xml" style="${xsltFolderPath}/prepare_img_link_metadata.xsl"> <xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${hdocResultPath}/img_metadata_link.xml" style="${xsltFolderPath}/prepare_img_link_metadata.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/> <factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt> </xslt>
<!--On instancie une xml property qui correspond au xml des url des img -->
<xmlproperty file="${hdocResultPath}/img_metadata_link.xml" prefix="TEST"/> <xmlproperty file="${hdocResultPath}/img_metadata_link.xml" prefix="URLFILE" collapseAttributes="true" delimiter="|"/>
<!--On ajoute ce package pour pouvoir utiliser la boucle for -->
<taskdef resource="net/sf/antcontrib/antlib.xml"> <taskdef resource="net/sf/antcontrib/antlib.xml">
<classpath> <classpath>
<pathelement location="/usr/share/java/lib/ant-contrib-version.jar"/> <pathelement location="/usr/share/java/lib/ant-contrib-version.jar"/>
</classpath> </classpath>
</taskdef> </taskdef>
<for list="${TEST.images.image.link}" param="file"> <var name="iter" value="0"/>
<!--On parcourt les urls des images sous forme de liste-->
<for list="${URLFILE.images.image.link}" param="file" delimiter="|">
<sequential> <sequential>
<!-- <loadresource encoding="UTF-8" property="myURL"> <!--On recupère le contenue de la page associée à l'url-->
<delete file="${tmpFolderPath}/metadata_of_url_img.xml"/>
<var name="iter" value="${iter} + 1"/>
<var name="myURL" unset="true"/>
<loadresource encoding="UTF-8" property="myURL">
<url url="@{file}"/> <url url="@{file}"/>
</loadresource> </loadresource>
<echo file="${hdocResultPath}/@{file}/test.xml" encoding="UTF-8">${myURL}</echo> --> <delete file="${tmpFolderPath}/metadata_of_url_img.xml"/>
<echo file="${tmpFolderPath}/metadata_of_url_img.xml" encoding="UTF-8">${myURL}</echo>
<echo>@{file}</echo> <echo>@{file}</echo>
<xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/metadata_of_url_img.xml" out="${hdocResultPath}/img_metadata_temp.xml" style="${xsltFolderPath}/include_metadata_values_to_metadata_file.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<param name="Link" expression="@{file}"/>
</xslt>
<!--Copier prepared en tmp + modifier prepared en ajoutant metadonnée + supprimer prepared + renommer copie en prepared -->
<xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${tmpFolderPath}/${filename}_prepared_temp.xml" style="${xsltFolderPath}/include_metadata_to_prepared_file.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<param name="Link" expression="@{file}"/>
<param name="Path" expression="${hdocResultPath}/img_metadata_temp.xml"/>
</xslt>
<delete file="${tmpFolderPath}/${filename}_prepared.xml"/>
<copy file="${tmpFolderPath}/${filename}_prepared_temp.xml" tofile="${tmpFolderPath}/${filename}_prepared.xml"/>
<delete file="${tmpFolderPath}/${filename}_prepared_temp.xml"/>
<delete file="${hdocResultPath}/img_metadata_temp.xml"/>
</sequential> </sequential>
</for> </for>
<delete file="${tmpFolderPath}/metadata_of_url_img.xml"/>
<delete file="${hdocResultPath}/img_metadata_link.xml"/>
</target> </target>
<target name="transformWikiToHdoc"> <target name="transformWikiToHdoc">
......
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs"
version="2.0">
<xsl:output method="xml"/>
<xsl:param name="Link" required="yes" as="xs:string"/>
<xsl:param name="Length_string_to_delete" as="xs:integer">
<xsl:value-of select="string-length('https://fr.wikipedia.org')+1"/>
</xsl:param>
<xsl:param name="Path" required="yes" as="xs:string"/>
<xsl:param name="Link_to_test">
<xsl:value-of select="substring($Link,$Length_string_to_delete)"/>
</xsl:param>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="a[img][./@href=$Link_to_test]">
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:copy-of select="./*"/>
<span hidden="hidden" class="img_metadata">
<xsl:text>Date : </xsl:text><xsl:value-of select="document($Path)//date"/>
<xsl:text> ; Auteur : </xsl:text><xsl:value-of select="document($Path)//creator"/>
<xsl:text>; licence : </xsl:text><xsl:value-of select="document($Path)//licence"/>
</span>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs"
version="2.0">
<xsl:param name="Link" required="yes" as="xs:string"/>
<xsl:template match="html">
<link>
<url>
<xsl:value-of select="$Link"/>
</url>
<creator>
<xsl:value-of select="//span[@id='creator']//a"/>
</creator>
<date>
<xsl:value-of select="//td[@id='fileinfotpl_date']/../td[last()]"/>
</date>
<licence>
<xsl:value-of select="//span[@class='licensetpl_long']"/>
</licence>
</link>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
...@@ -11,7 +11,11 @@ ...@@ -11,7 +11,11 @@
</images> </images>
</xsl:template> </xsl:template>
<xsl:template match="a[img]"> <xsl:template match="a[img]">
<image> <xsl:param name="test">
<xsl:value-of select="@href"/>
</xsl:param>
<xsl:if test="not(contains($test,'External') or contains($test,'Portail')) and contains($test,'Fichier')">
<image>
<link> <link>
<xsl:text>https://fr.wikipedia.org</xsl:text><xsl:value-of select="@href"/> <xsl:text>https://fr.wikipedia.org</xsl:text><xsl:value-of select="@href"/>
</link> </link>
...@@ -19,6 +23,7 @@ ...@@ -19,6 +23,7 @@
<xsl:value-of select="./img/@src"/> <xsl:value-of select="./img/@src"/>
</ref> </ref>
</image> </image>
</xsl:if>
</xsl:template> </xsl:template>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment