Commit 9a148cb4 authored by Antoine's avatar Antoine
Browse files

Merge branch 'master' of https://gitlab.utc.fr/crozatst/hdoc

parents a9ca354d 89f9f767
*/input/ */input/*
*/tmp/ */tmp/*
*/output/ */output/*
wikipedia_to_hdoc/result/* !*/input/sample.*
wikipedia_to_hdoc/tmp/* \ No newline at end of file
wikipedia_to_hdoc/hdoc_to_opale/tmp/*
wikipedia_to_hdoc/output/*
mindmapping_to_opale/result/*
mindmapping_to_opale/tmp/*
mindmapping_to_lexique/result/*
mindmapping_to_lexique/tmp/*
mindmapping_to_optim/result/*
mindmapping_to_optim/tmp/*
mindmapping_to_hdoc/result/*
mindmapping_to_hdoc/tmp/*
\ No newline at end of file
Usage
-----
Put the `.hdoc` files in the input folder, run `run.bat` or `run.sh` according to your operating system and get the .scar in the output folder.
Now, the script is able to manage several files.
Todo list
---------
- Refactor the ant script
- Resolve bugs
- (Optional) Port to XSLT 2.0
Bugs
----
- Table cell size
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml version="1.0" encoding="UTF-8" standalone="no"?>
<project name="hdoc_to_opale" basedir="." default="convert"> <project name="hdoc_to_opale" basedir="." default="convert">
<property file="global.properties"/> <property name="in" location="${basedir}/input"/>
<property name="out" location="${basedir}/output"/>
<taskdef name="jing" classname="com.thaiopensource.relaxng.util.JingTask"> <property name="tmp" location="${basedir}/tmp"/>
<classpath> <property name="xsl" location="${basedir}/xsl"/>
<pathelement location="lib/jing.jar"/> <property name="lib" location="${basedir}/lib"/>
</classpath> <property name="log" location="${basedir}/log"/>
</taskdef> <property name="schema" location="${basedir}/schema"/>
<property name="bibtex" location="${basedir}/bibtex_to_opale"/>
<target name="convert">
<!-- This is the main target : it is called whether by the project (as the default target) or by the user/converter <property file="global.properties"/>
website, if specified in the command line. Basically, this target consists in several targets/ant buildfiles calls. -->
<antcall target="CleanDirectory"/> <taskdef resource="net/sf/antcontrib/antlib.xml"/>
<mkdir dir="${tmpdir}"/> <taskdef name="jing" classname="com.thaiopensource.relaxng.util.JingTask">
<classpath>
<antcall target="UnzipHdocFile"/> <pathelement location="lib/jing.jar"/>
<antcall target="ValidateInput" /> </classpath>
</taskdef>
<antcall target="FindContentFiles"/>
<ant antfile="${tmpdir}/generateContentPath.xml"/> <target name="convert">
<!-- This is the main target : it is called whether by the project (as the default target) or by the user/converter
<antcall target="ValidateOutput"/> website, if specified in the command line. Basically, this target consists in several targets/ant buildfiles calls. -->
<antcall target="DivideOutput"/> <antcall target="CleanDirectory"/>
<antcall target="ZipOutput"/> <antcall target="UnzipHdocFile"/>
<antcall target="ZipDividedOutput"/> <antcall target="ValidateInput" />
<antcall target="CleanDirectory"/> <antcall target="FindContentFiles"/>
</target>
<for param="inputFile">
<target name="CleanDirectory"> <path>
<delete dir="${tmpdir}"/> <fileset dir="${in}" includes="**/*.hdoc"/>
</target> </path>
<sequential>
<target name="UnzipHdocFile"> <local name="filename"/>
<!-- Unzip the input hdoc file. Decompressed folder is named "decompressedHdoc" : this name is the only one which <basename property="filename" file="@{inputFile}"/>
refers to the hdoc file furthermore in this project. --> <ant antfile="${tmp}/${filename}/generateContentPath.xml">
<unzip src="${InputPath}" dest="${tmpdir}/decompressedHdoc"/> <property name="filename" value="${filename}"/>
<chmod dir="${tmpdir}/decompressedHdoc" perm="777"/> </ant>
</target> </sequential>
</for>
<target name="FindContentFiles">
<!-- Finds the absolute path of container.xml and applies transformation0.xsl on it.--> <antcall target="ValidateOutput"/>
<first id="first"> <antcall target="DivideOutput"/>
<fileset
dir="${tmpdir}/decompressedHdoc/META-INF" <antcall target="ZipOutput"/>
includes="**/container.xml" <antcall target="ZipDividedOutput"/>
/>
</first> </target>
<xslt
in="${toString:first}" <target name="CleanDirectory">
out="${tmpdir}/generateContentPath.xml" <delete>
style="transformation0.xsl" <fileset dir="${tmp}">
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison" <include name="*"/>
/> </fileset>
<chmod file="${tmpdir}/generateContentPath.xml" perm="777"/> </delete>
</target> </target>
<target name="ZipOutput"> <target name="UnzipHdocFile">
<copy file=".wspmeta" todir="${tmpdir}/decompressedOpale"/> <!-- Unzip the input hdoc file. Decompressed folder is named "decompressedHdoc" : this name is the only one which
<mkdir dir="${tmpdir}/decompressedOpale/res"/> refers to the hdoc file furthermore in this project. -->
<ant antfile="${tmpdir}/moveRessourceFiles.xml"/> <for param="inputFile">
<zip basedir="${tmpdir}/decompressedOpale" destfile="${OutputPath}" encoding="UTF-8"/> <path>
</target> <fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<target name="ZipDividedOutput"> <sequential>
<copy file=".wspmeta" todir="${tmpdir}/decompressedOpaleDivided"/> <local name="filename"/>
<copy todir="${tmpdir}/decompressedOpaleDivided/res" > <basename property="filename" file="@{inputFile}"/>
<fileset dir="${tmpdir}/decompressedOpale/res" includes="**"/> <unzip src="${in}/${filename}" dest="${tmp}/${filename}/decompressedHdoc"/>
</copy> <chmod dir="${tmp}/${filename}/decompressedHdoc" perm="777"/>
<copy todir="${tmpdir}/decompressedOpaleDivided/references" > </sequential>
<fileset dir="${tmpdir}/decompressedOpale/references" includes="**"/> </for>
</copy> </target>
<zip basedir="${tmpdir}/decompressedOpaleDivided" destfile="${OutputPathDivided}" encoding="UTF-8"/>
</target> <target name="FindContentFiles">
<!-- Finds the absolute path of container.xml and applies transformation0.xsl on it.-->
<!-- Validating the XML container file --> <for param="inputFile">
<target name="ValidateInput"> <path>
<jing file="${tmpdir}/decompressedHdoc/META-INF/container.xml" rngfile="schema/hdoc1-container.rng"></jing> <fileset dir="${in}" includes="**/*.hdoc"/>
</target> </path>
<sequential>
<!-- Validating the XML output --> <local name="filename"/>
<target name="ValidateOutput"> <basename property="filename" file="@{inputFile}"/>
<jing file="${tmpdir}/decompressedOpale/main.xml" rngfile="schema/op_ue.rng"></jing> <first id="first">
</target> <fileset dir="${tmp}/${filename}/decompressedHdoc/META-INF" includes="**/container.xml"/>
</first>
<target name="DivideOutput"> <xslt in="${toString:first}" out="${tmp}/${filename}/generateContentPath.xml" style="${xsl}/transformation0.xsl" processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison">
<mkdir dir="${tmpdir}/decompressedOpaleDivided"/> <param name="filename" expression="${filename}"/>
</xslt>
<!-- Adding IDS to the general output file --> <chmod file="${tmp}/${filename}/generateContentPath.xml" perm="777"/>
<xslt </sequential>
in="${tmpdir}/decompressedOpale/main.xml" </for>
out="${tmpdir}/outputWithCourseUcIds.xml" </target>
style="addCourseUcIds.xsl"
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison" <target name="ZipOutput">
/> <for param="inputFile">
<path>
<!-- Generating the root file (with refs to other files) --> <fileset dir="${in}" includes="**/*.hdoc"/>
<xslt </path>
in="${tmpdir}/outputWithCourseUcIds.xml" <sequential>
out="${tmpdir}/decompressedOpaleDivided/main.xml" <local name="filename"/>
style="addCourseUcReferences.xsl" <basename property="filename" file="@{inputFile}"/>
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison" <propertyregex property="properFilename" input="${filename}" regexp=".hdoc" replace="" casesensitive="false" override="true" />
/>
<copy file="${bibtex}/.wspmeta" todir="${tmp}/${filename}/decompressedOpale"/>
<!-- Generating the ANT file that will copy the files --> <mkdir dir="${tmp}/${filename}/decompressedOpale/res"/>
<xslt <ant antfile="${tmp}/${filename}/moveRessourceFiles.xml"/>
in="${tmpdir}/outputWithCourseUcIds.xml" <zip basedir="${tmp}/${filename}/decompressedOpale" destfile="${out}/${properFilename}/output.scar" encoding="UTF-8"/>
out="${tmpdir}/exportUnits.ant" </sequential>
style="prepareCourseUcCopies.xsl" </for>
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison" </target>
/>
<target name="ZipDividedOutput">
<!-- Executing that ANT file --> <for param="inputFile">
<ant antfile="${tmpdir}/exportUnits.ant"/> <path>
</target> <fileset dir="${in}" includes="**/*.hdoc"/>
</project> </path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<propertyregex property="properFilename" input="${filename}" regexp=".hdoc" replace="" casesensitive="false" override="true" />
<copy file="${bibtex}/.wspmeta" todir="${tmp}/${filename}/decompressedOpaleDivided"/>
<copy todir="${tmp}/${filename}/decompressedOpaleDivided/res" >
<fileset dir="${tmp}/${filename}/decompressedOpale/res" includes="**"/>
</copy>
<copy todir="${tmp}/${filename}/decompressedOpaleDivided/references" >
<fileset dir="${tmp}/${filename}/decompressedOpale/references" includes="**"/>
</copy>
<zip basedir="${tmp}/${filename}/decompressedOpaleDivided" destfile="${out}/${properFilename}/dividedOutput.scar" encoding="UTF-8"/>
</sequential>
</for>
</target>
<!-- Validating the XML container file -->
<target name="ValidateInput">
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<jing file="${tmp}/${filename}/decompressedHdoc/META-INF/container.xml" rngfile="${schema}/hdoc1-container.rng"></jing>
</sequential>
</for>
</target>
<!-- Validating the XML output -->
<target name="ValidateOutput">
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<jing file="${tmp}/${filename}/decompressedOpale/main.xml" rngfile="${schema}/op_ue.rng"></jing>
</sequential>
</for>
</target>
<target name="DivideOutput">
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<mkdir dir="${tmp}/${filename}/decompressedOpaleDivided"/>
<!-- Adding IDS to the general output file -->
<xslt
in="${tmp}/${filename}/decompressedOpale/main.xml"
out="${tmp}/${filename}/outputWithCourseUcIds.xml"
style="${xsl}/addCourseUcIds.xsl"
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison"
/>
<!-- Generating the root file (with refs to other files) -->
<xslt
in="${tmp}/${filename}/outputWithCourseUcIds.xml"
out="${tmp}/${filename}/decompressedOpaleDivided/main.xml"
style="${xsl}/addCourseUcReferences.xsl"
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison"
/>
<!-- Generating the ANT file that will copy the files -->
<xslt
in="${tmp}/${filename}/outputWithCourseUcIds.xml"
out="${tmp}/${filename}/exportUnits.ant"
style="${xsl}/prepareCourseUcCopies.xsl"
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison"
>
<param name="filename" expression="${filename}"/>
</xslt>
<!-- Executing that ANT file -->
<ant antfile="${tmp}/${filename}/exportUnits.ant"/>
</sequential>
</for>
</target>
</project>
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<project name="hdoc_to_opale" basedir="." default="convert">
<property name="in" location="${basedir}/input"/>
<property name="out" location="${basedir}/output"/>
<property name="tmp" location="${basedir}/tmp"/>
<property name="xsl" location="${basedir}/xsl"/>
<property name="lib" location="${basedir}/lib"/>
<property name="log" location="${basedir}/log"/>
<property name="schema" location="${basedir}/schema"/>
<property name="bibtex" location="${basedir}/bibtex_to_opale"/>
<property file="global.properties"/>
<taskdef resource="net/sf/antcontrib/antlib.xml"/>
<taskdef name="jing" classname="com.thaiopensource.relaxng.util.JingTask">
<classpath>
<pathelement location="lib/jing.jar"/>
</classpath>
</taskdef>
<target name="convert">
<!-- This is the main target : it is called whether by the project (as the default target) or by the user/converter
website, if specified in the command line. Basically, this target consists in several targets/ant buildfiles calls. -->
<antcall target="CleanDirectory"/>
<antcall target="UnzipHdocFile"/>
<antcall target="ValidateInput" />
<antcall target="FindContentFiles"/>
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<ant antfile="${tmp}/${filename}/generateContentPath.xml">
<property name="filename" value="${filename}"/>
</ant>
</sequential>
</for>
<antcall target="ValidateOutput"/>
<antcall target="DivideOutput"/>
<antcall target="ZipOutput"/>
<antcall target="ZipDividedOutput"/>
</target>
<target name="CleanDirectory">
<delete>
<fileset dir="${tmp}">
<include name="*"/>
</fileset>
</delete>
</target>
<target name="UnzipHdocFile">
<!-- Unzip the input hdoc file. Decompressed folder is named "decompressedHdoc" : this name is the only one which
refers to the hdoc file furthermore in this project. -->
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<unzip src="${in}/${filename}" dest="${tmp}/${filename}/decompressedHdoc"/>
<chmod dir="${tmp}/${filename}/decompressedHdoc" perm="777"/>
</sequential>
</for>
</target>
<target name="FindContentFiles">
<!-- Finds the absolute path of container.xml and applies transformation0.xsl on it.-->
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<first id="first">
<fileset dir="${tmp}/${filename}/decompressedHdoc/META-INF" includes="**/container.xml"/>
</first>
<xslt in="${toString:first}" out="${tmp}/${filename}/generateContentPath.xml" style="${xsl}/transformation0.xsl" processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison">
<param name="filename" expression="${filename}"/>
</xslt>
<chmod file="${tmp}/${filename}/generateContentPath.xml" perm="777"/>
</sequential>
</for>
</target>
<target name="ZipOutput">
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<propertyregex property="properFilename" input="${filename}" regexp=".hdoc" replace="" casesensitive="false" override="true" />
<copy file="${bibtex}/.wspmeta" todir="${tmp}/${filename}/decompressedOpale"/>
<mkdir dir="${tmp}/${filename}/decompressedOpale/res"/>
<ant antfile="${tmp}/${filename}/moveRessourceFiles.xml"/>
<zip basedir="${tmp}/${filename}/decompressedOpale" destfile="${out}/${properFilename}/output.scar" encoding="UTF-8"/>
</sequential>
</for>
</target>
<target name="ZipDividedOutput">
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<propertyregex property="properFilename" input="${filename}" regexp=".hdoc" replace="" casesensitive="false" override="true" />
<copy file="${bibtex}/.wspmeta" todir="${tmp}/${filename}/decompressedOpaleDivided"/>
<copy todir="${tmp}/${filename}/decompressedOpaleDivided/res" >
<fileset dir="${tmp}/${filename}/decompressedOpale/res" includes="**"/>
</copy>
<copy todir="${tmp}/${filename}/decompressedOpaleDivided/references" >
<fileset dir="${tmp}/${filename}/decompressedOpale/references" includes="**"/>
</copy>
<zip basedir="${tmp}/${filename}/decompressedOpaleDivided" destfile="${out}/${properFilename}/dividedOutput.scar" encoding="UTF-8"/>
</sequential>
</for>
</target>
<!-- Validating the XML container file -->
<target name="ValidateInput">
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<jing file="${tmp}/${filename}/decompressedHdoc/META-INF/container.xml" rngfile="${schema}/hdoc1-container.rng"></jing>
</sequential>
</for>
</target>
<!-- Validating the XML output -->
<target name="ValidateOutput">
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<jing file="${tmp}/${filename}/decompressedOpale/main.xml" rngfile="${schema}/op_ue.rng"></jing>
</sequential>
</for>
</target>
<target name="DivideOutput">
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<mkdir dir="${tmp}/${filename}/decompressedOpaleDivided"/>
<!-- Adding IDS to the general output file -->
<xslt
in="${tmp}/${filename}/decompressedOpale/main.xml"
out="${tmp}/${filename}/outputWithCourseUcIds.xml"
style="${xsl}/addCourseUcIds.xsl"
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison"
/>
<!-- Generating the root file (with refs to other files) -->
<xslt
in="${tmp}/${filename}/outputWithCourseUcIds.xml"
out="${tmp}/${filename}/decompressedOpaleDivided/main.xml"
style="${xsl}/addCourseUcReferences.xsl"
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison"
/>
<!-- Generating the ANT file that will copy the files -->
<xslt
in="${tmp}/${filename}/outputWithCourseUcIds.xml"
out="${tmp}/${filename}/exportUnits.ant"
style="${xsl}/prepareCourseUcCopies.xsl"
processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison"
>
<param name="filename" expression="${filename}"/>
</xslt>
<!-- Executing that ANT file -->
<ant antfile="${tmp}/${filename}/exportUnits.ant"/>
</sequential>
</for>
</target>
</project>
@echo off @echo off
set lib=lib set lib=lib
set ant=myantce.ant set ant=hdoc_to_opale.ant
set antparam=-Dprogram.param=%1 set antparam=-Dprogram.param=%1
set scJarList=%lib%\* set scJarList=%lib%\*
......
#!/bin/sh #!/bin/sh
lib="lib" lib="lib"
ant="myantce.ant" ant="hdoc_to_opale.ant"
antparam="-Dprogram.param=$1" antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN #Recherche de java et controle que se soit une version SUN
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
<!-- Header in hdoc --> <!-- Header in hdoc -->
<xsl:template match="node[1]" mode="header"> <xsl:template match="node[1]" mode="header">
<title> <title>
<xsl:value-of select="utc:string-remove-order(@TEXT)"/> <xsl:value-of select="utc:remove-language(@TEXT)"/>
<xsl:value-of select="richcontent"/> <xsl:value-of select="richcontent"/>
</title> </title>
<meta charset="utf-8"/> <meta charset="utf-8"/>
...@@ -39,7 +39,7 @@ ...@@ -39,7 +39,7 @@
<xsl:element name="section"> <xsl:element name="section">
<header> <header>
<h1> <h1>
<xsl:value-of select="utc:string-remove-order(@TEXT)"/> <xsl:value-of select="utc:remove-language(@TEXT)"/>
<xsl:value-of select="richcontent"/> <xsl:value-of select="richcontent"/>
</h1> </h1>
</header> </header>
...@@ -49,7 +49,7 @@ ...@@ -49,7 +49,7 @@
<header><h1>Introduction</h1> <header><h1>Introduction</h1>
<xsl:element name="div"> <xsl:element name="div">
<xsl:attribute name="data-hdoc-type">introduction</xsl:attribute> <xsl:attribute name="data-hdoc-type">introduction</xsl:attribute>
<xsl:value-of select="utc:string-remove-order(utc:string-remove-all(child::*[contains(@TEXT, '#intro')][1]/@TEXT, '#intro '))"/> <xsl:value-of select="utc:remove-language(child::*[contains(@TEXT, '#intro')][1]/@TEXT)"/>
</xsl:element> </xsl:element>
</header> </header>
</xsl:element> </xsl:element>
...@@ -63,7 +63,7 @@ ...@@ -63,7 +63,7 @@
<footer> <footer>
<xsl:element name="div"> <xsl:element name="div">
<xsl:attribute name="data-hdoc-type">conclusion</xsl:attribute> <xsl:attribute name="data-hdoc-type">conclusion</xsl:attribute>
<xsl:value-of select="utc:string-remove-order(utc:string-remove-all(child::*[contains(@TEXT, '#conclu')][1]/@TEXT, '#conclu '))"/> <xsl:value-of select="utc:remove-language(child::*[contains(@TEXT, '#conclu')][1]/@TEXT)"/>
</xsl:element> </xsl:element>
</footer> </footer>
</xsl:element>