Commit 2ea5f2f6 authored by Aghiles's avatar Aghiles

wikipedia_to_hdoc : gestion des métadonnées

parent f9682788
......@@ -82,22 +82,51 @@
<xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${hdocResultPath}/img_metadata_link.xml" style="${xsltFolderPath}/prepare_img_link_metadata.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
<xmlproperty file="${hdocResultPath}/img_metadata_link.xml" prefix="TEST"/>
<!--On instancie une xml property qui correspond au xml des url des img -->
<xmlproperty file="${hdocResultPath}/img_metadata_link.xml" prefix="URLFILE" collapseAttributes="true" delimiter="|"/>
<!--On ajoute ce package pour pouvoir utiliser la boucle for -->
<taskdef resource="net/sf/antcontrib/antlib.xml">
<classpath>
<pathelement location="/usr/share/java/lib/ant-contrib-version.jar"/>
</classpath>
</taskdef>
<for list="${TEST.images.image.link}" param="file">
<var name="iter" value="0"/>
<!--On parcourt les urls des images sous forme de liste-->
<for list="${URLFILE.images.image.link}" param="file" delimiter="|">
<sequential>
<!-- <loadresource encoding="UTF-8" property="myURL">
<!--On recupère le contenue de la page associée à l'url-->
<delete file="${tmpFolderPath}/metadata_of_url_img.xml"/>
<var name="iter" value="${iter} + 1"/>
<var name="myURL" unset="true"/>
<loadresource encoding="UTF-8" property="myURL">
<url url="@{file}"/>
</loadresource>
<echo file="${hdocResultPath}/@{file}/test.xml" encoding="UTF-8">${myURL}</echo> -->
</loadresource>
<delete file="${tmpFolderPath}/metadata_of_url_img.xml"/>
<echo file="${tmpFolderPath}/metadata_of_url_img.xml" encoding="UTF-8">${myURL}</echo>
<echo>@{file}</echo>
<xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/metadata_of_url_img.xml" out="${hdocResultPath}/img_metadata_temp.xml" style="${xsltFolderPath}/include_metadata_values_to_metadata_file.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<param name="Link" expression="@{file}"/>
</xslt>
<!--Copier prepared en tmp + modifier prepared en ajoutant metadonnée + supprimer prepared + renommer copie en prepared -->
<xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${tmpFolderPath}/${filename}_prepared_temp.xml" style="${xsltFolderPath}/include_metadata_to_prepared_file.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
<param name="Link" expression="@{file}"/>
<param name="Path" expression="${hdocResultPath}/img_metadata_temp.xml"/>
</xslt>
<delete file="${tmpFolderPath}/${filename}_prepared.xml"/>
<copy file="${tmpFolderPath}/${filename}_prepared_temp.xml" tofile="${tmpFolderPath}/${filename}_prepared.xml"/>
<delete file="${tmpFolderPath}/${filename}_prepared_temp.xml"/>
<delete file="${hdocResultPath}/img_metadata_temp.xml"/>
</sequential>
</for>
<delete file="${tmpFolderPath}/metadata_of_url_img.xml"/>
<delete file="${hdocResultPath}/img_metadata_link.xml"/>
</target>
<target name="transformWikiToHdoc">
......
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs"
version="2.0">
<xsl:output method="xml"/>
<xsl:param name="Link" required="yes" as="xs:string"/>
<xsl:param name="Length_string_to_delete" as="xs:integer">
<xsl:value-of select="string-length('https://fr.wikipedia.org')+1"/>
</xsl:param>
<xsl:param name="Path" required="yes" as="xs:string"/>
<xsl:param name="Link_to_test">
<xsl:value-of select="substring($Link,$Length_string_to_delete)"/>
</xsl:param>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<xsl:template match="a[img][./@href=$Link_to_test]">
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:copy-of select="./*"/>
<span hidden="hidden" class="img_metadata">
<xsl:text>Date : </xsl:text><xsl:value-of select="document($Path)//date"/>
<xsl:text> ; Auteur : </xsl:text><xsl:value-of select="document($Path)//creator"/>
<xsl:text>; licence : </xsl:text><xsl:value-of select="document($Path)//licence"/>
</span>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs"
version="2.0">
<xsl:param name="Link" required="yes" as="xs:string"/>
<xsl:template match="html">
<link>
<url>
<xsl:value-of select="$Link"/>
</url>
<creator>
<xsl:value-of select="//span[@id='creator']//a"/>
</creator>
<date>
<xsl:value-of select="//td[@id='fileinfotpl_date']/../td[last()]"/>
</date>
<licence>
<xsl:value-of select="//span[@class='licensetpl_long']"/>
</licence>
</link>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
......@@ -11,7 +11,11 @@
</images>
</xsl:template>
<xsl:template match="a[img]">
<image>
<xsl:param name="test">
<xsl:value-of select="@href"/>
</xsl:param>
<xsl:if test="not(contains($test,'External') or contains($test,'Portail')) and contains($test,'Fichier')">
<image>
<link>
<xsl:text>https://fr.wikipedia.org</xsl:text><xsl:value-of select="@href"/>
</link>
......@@ -19,6 +23,7 @@
<xsl:value-of select="./img/@src"/>
</ref>
</image>
</xsl:if>
</xsl:template>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment