Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Stephane Crozat
hdoc
Commits
e9984626
Commit
e9984626
authored
Dec 18, 2016
by
Neveux Anais
Browse files
Merge branch 'master' of gitlab.utc.fr:crozatst/hdoc
parents
b718f5dc
31638183
Changes
21
Hide whitespace changes
Inline
Side-by-side
framapad_to_hdoc/README.md
View file @
e9984626
...
...
@@ -54,7 +54,14 @@ Example :
Currently available on: https://framemo.org/framapad_to_opale
See also : https://bimestriel.framapad.org/p/nf29_framapad_to_opale for the full documentation of our working process.
## TODO
-
Nested Lists
-
Indentation
-
Titles (and therefore Structure of the doc)
-
Coloured text
-
Code
-
Markdown
## Technical notes
...
...
hdoc_to_canoprof/samples/Sample_V1.hdoc
0 → 100644
View file @
e9984626
File added
hdoc_to_canoprof/xsl/transformation2.xsl
View file @
e9984626
...
...
@@ -6,11 +6,11 @@
xmlns:fn=
"http://www.w3.org/2005/xpath-functions"
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes=
"xs"
>
<xsl:output
method=
"xml"
indent=
"yes"
encoding=
"UTF-8"
/>
<xsl:param
name=
"filename"
/>
<xsl:strip-space
elements=
"*"
/>
<!-- This template matches the root. One hdoc file = one Canoprof's Programme" -->
<xsl:template
match=
"h:html"
>
<sc:item
xmlns:sc=
"http://www.utc.fr/ics/scenari/v3/core"
>
...
...
@@ -20,7 +20,7 @@
</cp:program>
</sc:item>
</xsl:template>
<!-- Head related templates. -->
<xsl:template
match=
"h:head"
>
<cp:programM>
...
...
@@ -28,22 +28,22 @@
<sp:title>
<xsl:value-of
select=
"./h:title"
/>
</sp:title>
<!-- Hdoc's "date" = Programme's period. -->
<xsl:apply-templates
select=
"./h:meta[@name='date']"
/>
<!-- Hdoc's "description" = Programme's resume. -->
<xsl:apply-templates
select=
"./h:meta[@name='description']"
/>
</cp:programM>
</xsl:template>
<xsl:template
match=
"h:head/h:meta[@name='date']"
>
<sp:period>
<xsl:value-of
select=
"./@content"
/>
</sp:period>
</xsl:template>
<xsl:template
match=
"h:head/h:meta[@name='description']"
>
<sp:abstract>
<cp:txtDesc>
...
...
@@ -54,6 +54,7 @@
</sp:abstract>
</xsl:template>
<!-- Body related templates. -->
<xsl:template
match=
"h:body"
>
<xsl:if
test=
"./*"
>
...
...
@@ -66,19 +67,216 @@
<!-- CanoProf's Activite TetM = hdoc's body/Section/Section = Opale's Grain -->
<!-- CanoProf's Activite TetM (section) = hdoc's body/Section/Section/(Section...) = Opale's Grain (partie) -->
<!--
TODO :
CanoProf's Seance = hdoc's body/Section = Opale's Division -->
<!-- CanoProf's Seance = hdoc's body/Section = Opale's Division
or Activite
-->
<xsl:template
match=
"h:body/h:section"
>
<xsl:if
test=
"./*"
>
<sp:session>
<cp:session>
<cp:sessionM>
<sp:title>
<xsl:if
test=
"not(./h:header/h:h1/text())"
>
Untitled
</xsl:if>
<xsl:value-of
select=
"./h:header/h:h1"
/>
</sp:title>
</cp:sessionM>
<xsl:apply-templates
select=
"./*"
/>
</cp:session>
</sp:session>
</xsl:if>
</xsl:template>
<!--
TODO :
if Section have only a dev (no sub section), we create a short activty to print text into a seance -->
<!-- if Section have only a dev (no sub section), we create a short activty to print text into a seance -->
<xsl:template
match=
"h:body/h:section/h:div"
>
<sp:shortActivity>
<cp:shortActivity>
<cp:activityM>
<sp:title>
<xsl:value-of
select=
"../h:header/h:h1"
/>
</sp:title>
</cp:activityM>
<sp:body>
<cp:flow>
<xsl:apply-templates
select=
"./*"
/>
</cp:flow>
</sp:body>
</cp:shortActivity>
</sp:shortActivity>
</xsl:template>
<!-- Toutes les introductions de sections sont transformés en résumé. -->
<xsl:template
match=
"h:header/h:div[@data-hdoc-type = 'introduction']"
>
<sp:abstract>
<cp:txtDesc>
<sc:para
xml:space=
"preserve"
>
<xsl:value-of
select=
"./text()"
/>
</sc:para>
</cp:txtDesc>
</sp:abstract>
</xsl:template>
<!-- Text related templates -->
<xsl:template
match=
"h:p | h:ul | h:ol"
>
<xsl:if
test=
"not(preceding-sibling::h:p)"
>
<xsl:if
test=
"not(preceding-sibling::h:ul)"
>
<xsl:if
test=
"not(preceding-sibling::h:ol)"
>
<xsl:choose>
<!-- If it is directly included in a div, we have to add Canoprof's text markups -->
<xsl:when
test=
"parent::*[name() = 'div']"
>
<sp:txt>
<cp:txt>
<xsl:call-template
name=
"blockloop"
/>
</cp:txt>
</sp:txt>
</xsl:when>
<!-- Otherwise, we can directly display it (this can happen when it is included within a list or a table) -->
<xsl:otherwise>
<xsl:call-template
name=
"blockloop"
/>
</xsl:otherwise>
</xsl:choose>
</xsl:if>
</xsl:if>
</xsl:if>
</xsl:template>
<xsl:template
name=
"blockloop"
>
<xsl:for-each
select=
". | ./following-sibling::*"
>
<xsl:choose>
<!-- Paragraph -->
<xsl:when
test=
"name() = 'p' and (./* | ./text())"
>
<sc:para
xml:space=
"preserve"
>
<xsl:apply-templates
select=
"./* | ./text()"
/>
</sc:para>
</xsl:when>
<!-- Unordered list -->
<xsl:when
test=
"name() = 'ul' and (./h:li/h:p/text())"
>
<sc:itemizedList>
<xsl:apply-templates
select=
"./* | ./text()"
/>
</sc:itemizedList>
</xsl:when>
<!-- Ordered list -->
<xsl:when
test=
"name() = 'ol' and (./h:li/h:p/text())"
>
<sc:orderedList>
<xsl:apply-templates
select=
"./* | ./text()"
/>
</sc:orderedList>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<!-- TODO : CanoProf's Activite TetM = hdoc's body/Section/Section = Opale's Grain -->
<xsl:template
match=
"h:body/h:section/h:section"
>
<xsl:template
match=
"h:li"
>
<sc:listItem>
<xsl:apply-templates
select=
"./* | ./text()"
/>
</sc:listItem>
</xsl:template>
<xsl:template
match=
"h:i"
>
<xsl:if
test=
"./* | ./text()"
>
<sc:inlineStyle
role=
"specific"
>
<xsl:apply-templates
select=
"./* | ./text()"
/>
</sc:inlineStyle>
</xsl:if>
</xsl:template>
<xsl:template
match=
"h:em"
>
<xsl:if
test=
"./* | ./text()"
>
<sc:inlineStyle
role=
"emphasis"
>
<xsl:apply-templates
select=
"./* | ./text()"
/>
</sc:inlineStyle>
</xsl:if>
</xsl:template>
<xsl:template
match=
"h:q"
>
<xsl:if
test=
"./* | ./text()"
>
<sc:phrase
role=
"quote"
>
<xsl:apply-templates
select=
"./* | ./text()"
/>
</sc:phrase>
</xsl:if>
</xsl:template>
<xsl:template
match=
"h:sub"
>
<xsl:if
test=
"./* | ./text()"
>
<sc:textLeaf
role=
"ind"
>
<xsl:apply-templates
select=
"./* | ./text()"
/>
</sc:textLeaf>
</xsl:if>
</xsl:template>
<xsl:template
match=
"h:sup"
>
<xsl:if
test=
"./* | ./text()"
>
<sc:textLeaf
role=
"exp"
>
<xsl:apply-templates
select=
"./* | ./text()"
/>
</sc:textLeaf>
</xsl:if>
</xsl:template>
<xsl:template
match=
"h:a"
>
<xsl:if
test=
"./* | ./text()"
>
<sc:phrase
role=
"url"
>
<cp:link
xmlns:sc=
"http://www.utc.fr/ics/scenari/v3/core"
xmlns:cp=
"canope.fr:canoprof"
xmlns:sp=
"http://www.utc.fr/ics/scenari/v3/primitive"
>
<sp:url>
<xsl:value-of
select=
"./@href"
/>
</sp:url>
</cp:link>
<xsl:if
test=
"./@title"
>
<sp:title>
<xsl:value-of
select=
"./@title"
/>
</sp:title>
</xsl:if>
<xsl:value-of
select=
"."
/>
</sc:phrase>
</xsl:if>
</xsl:template>
<!-- Table related templates -->
<xsl:template
match=
"h:table"
>
<xsl:choose>
<xsl:when
test=
"parent::*[name() = 'div']"
>
<!-- If this <table> is a direct child of a <div> then it must be surrounded by Opale's text markups. -->
<sp:txt>
<cp:txt>
<sc:table>
<xsl:if
test=
"./h:caption"
>
<sc:caption>
<xsl:value-of
select=
"./h:caption"
/>
</sc:caption>
</xsl:if>
<xsl:apply-templates
select=
"./h:tr"
/>
</sc:table>
</cp:txt>
</sp:txt>
</xsl:when>
<xsl:otherwise>
<sc:table>
<xsl:if
test=
"./h:caption"
>
<sc:caption>
<xsl:value-of
select=
"./h:caption"
/>
</sc:caption>
</xsl:if>
<xsl:apply-templates
select=
"./h:tr"
/>
</sc:table>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template
match=
"h:tr"
>
<sc:row>
<xsl:apply-templates
select=
"./h:td"
/>
</sc:row>
</xsl:template>
<xsl:template
match=
"h:td"
>
<sc:cell>
<xsl:apply-templates
select=
"./*"
/>
</sc:cell>
</xsl:template>
<xsl:template
match=
"h:header"
/>
<!-- Its content is already used in <xsl:template match="h:section"> -->
<xsl:template
match=
"h:h6"
/>
<!-- Its content is already used in <xsl:template match="h:div"> -->
<xsl:template
match=
"h:h1"
/>
<!-- Its content is already used in <xsl:template match="h:section"> -->
</xsl:stylesheet>
hdoc_to_elasticSearch/build.properties
View file @
e9984626
libdir
=
${basedir}/lib
xsldir
=
${basedir}/xsl
inputPath
=
${basedir}/input
outputPath
=
${basedir}/output
\ No newline at end of file
tmpHdoc
=
${basedir}/tmp_hdoc
outputPath
=
${basedir}/output
hdoc_to_elasticSearch/hdoc_to_elasticsearch.ant
View file @
e9984626
<project name="hdoc_to_elasticsearch" default="unzipSource">
<property file="build.properties"/>
<target name="unzipSource">
<project name="hdoc_to_elasticsearch" default="clean">
<property file="build.properties"/>
<target name="unzipSource">
<echo message="Conversion begin" />
<path id="hdocDirPath">
<fileset dir="${inputPath}" id="hdocFile">
<include name="*.hdoc"/>
</fileset>
<include name="*.hdoc"/>
</fileset>
</path>
<property name="hdocFile" refId="hdocDirPath"/>
<property name="hdocFile" refId="hdocDirPath"/>
<basename property="hdocFileName" file="${hdocFile}" suffix=".hdoc"/>
<echo>Entry file : ${hdocFileName}</echo>
<echo>Entry file : ${hdocFileName}</echo>
<!-- create subdirectory for each file -->
<mkdir dir="${
outputPath
}/${hdocFileName}"/>
<!--unzips the hdoc to be converted into the previously created directory-->
<unzip dest="${
outputPath
}/${hdocFileName}">
<fileset dir="${inputPath}" id="hdocFile">
<include name="${hdocFileName}.hdoc"/>
</fileset>
</unzip>
<echo message="Conversion end" />
</target>
<mkdir dir="${
tmpHdoc
}/${hdocFileName}"/>
<!--unzips the hdoc to be converted into the previously created directory-->
<unzip dest="${
tmpHdoc
}/${hdocFileName}">
<fileset dir="${inputPath}" id="hdocFile">
<include name="${hdocFileName}.hdoc"/>
</fileset>
</unzip>
</target>
<target name="jsonFiles" depends="unzipSource">
<echo message="JSON File creation" />
<!-- TODO : create JSON & extract data -->
</target>
<echo message="JSON File creation" />
<xslt in="${tmpHdoc}/${hdocFileName}/content.xml" out="${outputPath}/${hdocFileName}.json" style="xsl/content_extractor.xsl"/>
</target>
<!-- Clear tmp files -->
<target name="clean" depends="jsonFiles">
</target>
<!-- Clear tmp files -->
<target name="clean" depends="jsonFiles">
<echo message="Clean files" />
<!-- -->
<delete includeemptydirs="true" verbose="true">
<fileset dir="${tmpHdoc}"> </fileset>
<dirset dir="${tmpHdoc}" includes="**/*" />
</delete>
<echo message="Conversion end" />
</target>
</project>
<echo message="Conversion end" />
</project>
\ No newline at end of file
hdoc_to_elasticSearch/xsl/content_extractor.xsl
0 → 100644
View file @
e9984626
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
xmlns:xsl=
"http://www.w3.org/1999/XSL/Transform"
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
xpath-default-namespace=
"http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes=
"xs"
version=
"2.0"
>
<xsl:output
omit-xml-declaration=
"yes"
encoding=
"UTF-8"
>
<!-- Encodage UTF-8 pour caractères spéciaux (accents...) -->
</xsl:output>
<!-- -->
<xsl:template
match=
"/"
>
{
<xsl:apply-templates
select=
"html"
/>
}
</xsl:template>
<!-- Extraction des informations du head -->
<xsl:template
match=
"head"
>
"title" : "
<xsl:value-of
select=
"title/text()"
/>
",
"keywords" : [
<xsl:for-each
select=
"meta[@name='keywords']"
>
"
<xsl:value-of
select=
"@content"
/>
"
<xsl:if
test=
"position() != last()"
>
,
</xsl:if>
</xsl:for-each>
],
<xsl:apply-templates
select=
"meta"
/>
</xsl:template>
<!-- Extraction des informations du body -->
<xsl:template
match=
"body"
/>
<xsl:template
match=
"meta[@name='author']"
>
"author" : "
<xsl:value-of
select=
"@content"
/>
"
</xsl:template>
<xsl:template
match=
"meta[@name='date']"
>
"date" : "
<xsl:value-of
select=
"@content"
/>
",
</xsl:template>
<xsl:template
match=
"meta[@name='rights']"
>
"rights" : "
<xsl:value-of
select=
"@content"
/>
",
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
hdoc_to_epub/README.md
View file @
e9984626
...
...
@@ -35,15 +35,23 @@ La personne possède un hdoc et voudrait le convertir en epub. Il se dirige vers
Unsupported
-----------
Known bugs
----------
Problème dans le hdoc : contient des images mais ne sont pas référencées.
Problème avec les keywords : le hdoc contient des keywords mais nous ne savons pas où les utiliser.
Todo list
---------
Validation de l'epub actuel
Mise en place de la première page
Validation
Étude compatibilité entre EPUB2 et EPUB3
Technical Notes
---------------
hdoc_to_epub/file/container.xml
View file @
e9984626
<?xml version="1.0"?>
<container
xmlns=
"urn:oasis:names:tc:opendocument:xmlns:container"
version=
"1.0"
>
<rootfiles>
<rootfile
full-path=
"
OEBPS
/content.opf"
media-type=
"application/oebps-package+xml"
/>
<rootfile
full-path=
"
oebps
/content.opf"
media-type=
"application/oebps-package+xml"
/>
</rootfiles>
</container>
hdoc_to_epub/hdoc_to_epub.ant
View file @
e9984626
...
...
@@ -7,9 +7,12 @@
<property
name=
"in"
location=
"${basedir}/input"
/>
<property
name=
"out"
location=
"${basedir}/output"
/>
<property
name=
"tmp"
location=
"${basedir}/tmp"
/>
<property
name=
"xsl"
location=
"${basedir}/xsl"
/>
<property
name=
"file"
location=
"${basedir}/file"
/>
<property
name=
"schema"
location=
"${basedir}/schema"
/>
<property
name=
"lib"
location=
"${basedir}/lib"
/>
<property
name=
"tmpRetour"
location=
"${basedir}/tmp/retour"
/>
<target
name=
"convert"
>
<delete
dir=
"${tmp}"
failonerror=
"false"
/>
<sleep
seconds=
"1"
/>
...
...
@@ -18,6 +21,8 @@
<delete
dir=
"${out}"
failonerror=
"false"
/>
<sleep
seconds=
"1"
/>
<mkdir
dir=
"${out}"
/>
<mkdir
dir=
"${tmpRetour}"
/>
<for
param=
"inputFile"
>
<path>
...
...
@@ -27,7 +32,7 @@
<sequential>
<local
name=
"filename"
/>
<basename
property=
"filename"
file=
"@{inputFile}"
/>
<antcall
target=
"UnzipHdocFile"
>
<param
name=
"filename"
value=
"${filename}"
/>
</antcall>
...
...
@@ -36,12 +41,27 @@
<param
name=
"filename"
value=
"${filename}"
/>
</antcall>
<antcall
target=
"MakeOCF"
/>
<antcall
target=
"MakeOCF"
>
<param
name=
"filename"
value=
"${filename}"
/>
</antcall>
<!--<antcall target="MakeOPF"/> -->
<antcall
target=
"MakeOPF"
>
<param
name=
"filename"
value=
"${filename}"
/>
</antcall>
<antcall
target=
"MakeOPS"
>
<param
name=
"filename"
value=
"${filename}"
/>
</antcall>
<antcall
target=
"MakeOPS"
/>
<antcall
target=
"ZipEpubFile"
>
<param
name=
"filename"
value=
"${filename}"
/>
</antcall>
<antcall
target=
"ValidateOutput"
>
<param
name=
"filename"
value=
"${filename}"
/>
</antcall>
<antcall
target=
"CleanTmp"
/>
</sequential>
</for>
</target>
...
...
@@ -66,20 +86,49 @@
</target>
<target
name=
"MakeOCF"
>
<mkdir
dir=
"${ou
t
}/META-INF"
/>
<mkdir
dir=
"${ou
t
}/oebps"
/>
<copy
file=
"${file}/mimetype"
todir=
"${ou
t
}"
/>
<copy
file=
"${file}/container.xml"
todir=
"${ou
t
}/META-INF"
/>
<mkdir
dir=
"${
tmpRet
ou
r
}/META-INF"
/>
<mkdir
dir=
"${
tmpRet
ou
r
}/oebps"
/>
<copy
file=
"${file}/mimetype"
todir=
"${
tmpRet
ou
r
}"
/>
<copy
file=
"${file}/container.xml"
todir=
"${
tmpRet
ou
r
}/META-INF"
/>
</target>
<target
name=
"MakeOPF"
>
<xslt
in=
"${tmp}/${filename}/decompressedHdoc/content.xml"
out=
"${tmpRetour}/oebps/content.opf"
classpath=
"${lib}/saxon9he.jar"
style=
"${xsl}/content.xsl"
/>
</target>
<target
name=
"MakeOPS"
>
<mkdir
dir=
"${out}/oebps/images"
/>
<mkdir
dir=
"${out}/oebps/styles"
/>
<mkdir
dir=
"${out}/oebps/text"
/>
<mkdir
dir=
"${tmpRetour}/oebps/images"
/>
<copy
todir=
"${tmpRetour}/oebps/images"
>
<fileset
dir=
"${tmp}/${filename}/decompressedHdoc/re"
includes=
"**"
/>
</copy>
<mkdir
dir=
"${tmpRetour}/oebps/styles"
/>
<mkdir
dir=
"${tmpRetour}/oebps/chapitres"
/>
<xslt
in=
"${tmp}/${filename}/decompressedHdoc/content.xml"
out=
"${tmpRetour}/oebps/tableDesMatieres.ncx"
classpath=
"${lib}/saxon9he.jar"
style=
"${xsl}/tdmncx.xsl"
/>
<xslt
destdir=
"${tmpRetour}/oebps/chapitres"
basedir=
"${tmp}/${filename}/decompressedHdoc"
includes=
"content.xml"
classpath=
"${lib}/saxon9he.jar"
style=
"${xsl}/chapitre.xsl"
/>
<delete
file=
"${tmpRetour}/oebps/chapitres/content.html"
/>
</target>
<target
name=
"ZipEpubFile"
>
<propertyregex
property=
"properFilename"
input=
"${filename}"
regexp=
".hdoc"
replace=
""
casesensitive=
"false"
override=
"true"
/>
<zip
destfile=
"${out}/${properFilename}.epub"
update=
"true"
encoding=
"UTF-8"
>
<zipfileset
dir=
"${tmpRetour}/META-INF"
includes=
"*"
prefix=
"META-INF"
/>
<zipfileset
dir=
"${tmpRetour}/oebps"
includes=
"**"
prefix=
"oebps"
/>
</zip>
<!-- zip64Mode="never" -->
<zip
destfile=
"${out}/${properFilename}.epub"
update=
"true"
compress=
"false"
encoding=
"UTF-8"
keepcompression=
"true"
createunicodeextrafields=
"never"
>
<zipfileset
dir=
"${tmpRetour}"
includes=
"mimetype"
/>
</zip>
</target>
<target
name=
"ValidateOutput"
>
</target>
<target
name=
"CleanTmp"
>
<delete
dir=
"${tmp}"
failonerror=
"false"
/>
</target>
</project>
hdoc_to_epub/xsl/chapitre.xsl
0 → 100644
View file @
e9984626
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
xmlns:xsl=
"http://www.w3.org/1999/XSL/Transform"
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes=
"xs"
xpath-default-namespace=
"http://www.utc.fr/ics/hdoc/xhtml"
xmlns=
"http://www.w3.org/1999/xhtml"
version=
"2.0"
>
<xsl:output
method=
"xml"
indent=
"yes"
encoding=
"UTF-8"
/>
<xsl:template
match=
"/html"
priority=
"10"
>
<xsl:for-each
select=
"body/section"
>
<xsl:variable
name=
"path"
>
../../../retour/oebps/chapitres/chapitre
<xsl:number
level=
"multiple"
count=
"section"
format=
"1"
/>
.xhtml
</xsl:variable>
<xsl:result-document
method=
"xml"
href=
"{$path}"
>
<html
xml:lang=
"en"
lang=
"en"
xmlns:epub=
"http://www.idpf.org/2007/ops"
>
<head>
<title><xsl:value-of
select=
"header/h1"
/></title>
</head>
<body>
<h1><xsl:value-of
select=
"header/h1"
/></h1>
<xsl:apply-templates
select=
"div|section"
/>
</body>
</html>
</xsl:result-document>
</xsl:for-each>
</xsl:template>
<xsl:template
match=
"section"
priority=
"3"
>
<xsl:element
name=
"h{count(ancestor::section)+1}"
>
<xsl:value-of
select=
"header/h1"
/>