Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Stephane Crozat
hdoc
Commits
2ea5f2f6
Commit
2ea5f2f6
authored
Jan 10, 2017
by
Aghiles
Browse files
wikipedia_to_hdoc : gestion des métadonnées
parent
f9682788
Changes
4
Show whitespace changes
Inline
Side-by-side
wikipedia_to_hdoc/ant/wiki_to_hdoc_fetcher.ant
View file @
2ea5f2f6
...
...
@@ -82,22 +82,51 @@
<xslt
classpath=
"lib/saxon9he.jar"
in=
"${tmpFolderPath}/${filename}_prepared.xml"
out=
"${hdocResultPath}/img_metadata_link.xml"
style=
"${xsltFolderPath}/prepare_img_link_metadata.xsl"
>
<factory
name=
"net.sf.saxon.TransformerFactoryImpl"
/>
</xslt>
<xmlproperty
file=
"${hdocResultPath}/img_metadata_link.xml"
prefix=
"TEST"
/>
<!--On instancie une xml property qui correspond au xml des url des img -->
<xmlproperty
file=
"${hdocResultPath}/img_metadata_link.xml"
prefix=
"URLFILE"
collapseAttributes=
"true"
delimiter=
"|"
/>
<!--On ajoute ce package pour pouvoir utiliser la boucle for -->
<taskdef
resource=
"net/sf/antcontrib/antlib.xml"
>
<classpath>
<pathelement
location=
"/usr/share/java/lib/ant-contrib-version.jar"
/>
</classpath>
</taskdef>
<for
list=
"${TEST.images.image.link}"
param=
"file"
>
<var
name=
"iter"
value=
"0"
/>
<!--On parcourt les urls des images sous forme de liste-->
<for
list=
"${URLFILE.images.image.link}"
param=
"file"
delimiter=
"|"
>
<sequential>
<!-- <loadresource encoding="UTF-8" property="myURL">
<!--On recupère le contenue de la page associée à l'url-->
<delete
file=
"${tmpFolderPath}/metadata_of_url_img.xml"
/>
<var
name=
"iter"
value=
"${iter} + 1"
/>
<var
name=
"myURL"
unset=
"true"
/>
<loadresource
encoding=
"UTF-8"
property=
"myURL"
>
<url
url=
"@{file}"
/>
</loadresource>
<echo file="${hdocResultPath}/@{file}/test.xml" encoding="UTF-8">${myURL}</echo> -->
<delete
file=
"${tmpFolderPath}/metadata_of_url_img.xml"
/>
<echo
file=
"${tmpFolderPath}/metadata_of_url_img.xml"
encoding=
"UTF-8"
>
${myURL}
</echo>
<echo>
@{file}
</echo>
<xslt
classpath=
"lib/saxon9he.jar"
in=
"${tmpFolderPath}/metadata_of_url_img.xml"
out=
"${hdocResultPath}/img_metadata_temp.xml"
style=
"${xsltFolderPath}/include_metadata_values_to_metadata_file.xsl"
>
<factory
name=
"net.sf.saxon.TransformerFactoryImpl"
/>
<param
name=
"Link"
expression=
"@{file}"
/>
</xslt>
<!--Copier prepared en tmp + modifier prepared en ajoutant metadonnée + supprimer prepared + renommer copie en prepared -->
<xslt
classpath=
"lib/saxon9he.jar"
in=
"${tmpFolderPath}/${filename}_prepared.xml"
out=
"${tmpFolderPath}/${filename}_prepared_temp.xml"
style=
"${xsltFolderPath}/include_metadata_to_prepared_file.xsl"
>
<factory
name=
"net.sf.saxon.TransformerFactoryImpl"
/>
<param
name=
"Link"
expression=
"@{file}"
/>
<param
name=
"Path"
expression=
"${hdocResultPath}/img_metadata_temp.xml"
/>
</xslt>
<delete
file=
"${tmpFolderPath}/${filename}_prepared.xml"
/>
<copy
file=
"${tmpFolderPath}/${filename}_prepared_temp.xml"
tofile=
"${tmpFolderPath}/${filename}_prepared.xml"
/>
<delete
file=
"${tmpFolderPath}/${filename}_prepared_temp.xml"
/>
<delete
file=
"${hdocResultPath}/img_metadata_temp.xml"
/>
</sequential>
</for>
<delete
file=
"${tmpFolderPath}/metadata_of_url_img.xml"
/>
<delete
file=
"${hdocResultPath}/img_metadata_link.xml"
/>
</target>
<target
name=
"transformWikiToHdoc"
>
...
...
wikipedia_to_hdoc/xslt/include_metadata_to_prepared_file.xsl
0 → 100644
View file @
2ea5f2f6
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
xmlns:xsl=
"http://www.w3.org/1999/XSL/Transform"
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes=
"xs"
version=
"2.0"
>
<xsl:output
method=
"xml"
/>
<xsl:param
name=
"Link"
required=
"yes"
as=
"xs:string"
/>
<xsl:param
name=
"Length_string_to_delete"
as=
"xs:integer"
>
<xsl:value-of
select=
"string-length('https://fr.wikipedia.org')+1"
/>
</xsl:param>
<xsl:param
name=
"Path"
required=
"yes"
as=
"xs:string"
/>
<xsl:param
name=
"Link_to_test"
>
<xsl:value-of
select=
"substring($Link,$Length_string_to_delete)"
/>
</xsl:param>
<xsl:template
match=
"node()|@*"
>
<xsl:copy>
<xsl:apply-templates
select=
"node()|@*"
/>
</xsl:copy>
</xsl:template>
<xsl:template
match=
"a[img][./@href=$Link_to_test]"
>
<xsl:copy>
<xsl:copy-of
select=
"@*"
/>
<xsl:copy-of
select=
"./*"
/>
<span
hidden=
"hidden"
class=
"img_metadata"
>
<xsl:text>
Date :
</xsl:text><xsl:value-of
select=
"document($Path)//date"
/>
<xsl:text>
; Auteur :
</xsl:text><xsl:value-of
select=
"document($Path)//creator"
/>
<xsl:text>
; licence :
</xsl:text><xsl:value-of
select=
"document($Path)//licence"
/>
</span>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
wikipedia_to_hdoc/xslt/include_metadata_values_to_metadata_file.xsl
0 → 100644
View file @
2ea5f2f6
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
xmlns:xsl=
"http://www.w3.org/1999/XSL/Transform"
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes=
"xs"
version=
"2.0"
>
<xsl:param
name=
"Link"
required=
"yes"
as=
"xs:string"
/>
<xsl:template
match=
"html"
>
<link>
<url>
<xsl:value-of
select=
"$Link"
/>
</url>
<creator>
<xsl:value-of
select=
"//span[@id='creator']//a"
/>
</creator>
<date>
<xsl:value-of
select=
"//td[@id='fileinfotpl_date']/../td[last()]"
/>
</date>
<licence>
<xsl:value-of
select=
"//span[@class='licensetpl_long']"
/>
</licence>
</link>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
wikipedia_to_hdoc/xslt/prepare_img_link_metadata.xsl
View file @
2ea5f2f6
...
...
@@ -11,6 +11,10 @@
</images>
</xsl:template>
<xsl:template
match=
"a[img]"
>
<xsl:param
name=
"test"
>
<xsl:value-of
select=
"@href"
/>
</xsl:param>
<xsl:if
test=
"not(contains($test,'External') or contains($test,'Portail')) and contains($test,'Fichier')"
>
<image>
<link>
<xsl:text>
https://fr.wikipedia.org
</xsl:text><xsl:value-of
select=
"@href"
/>
...
...
@@ -19,6 +23,7 @@
<xsl:value-of
select=
"./img/@src"
/>
</ref>
</image>
</xsl:if>
</xsl:template>
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment