Commit caa2e203 authored by Gregory's avatar Gregory

Merge branch 'master' of https://gitlab.utc.fr/crozatst/hdoc

parents f194c1a3 349c6133
...@@ -6,8 +6,8 @@ http://www.gnu.org/licenses/gpl-3.0.txt ...@@ -6,8 +6,8 @@ http://www.gnu.org/licenses/gpl-3.0.txt
## Credits ## Credits
- 2016 - 2016
- - Etienne Chognard - Etienne Chognard
- - Fabien Boucaud - Fabien Boucaud
- 2015 - 2015
- Jean-Côme Douteau - Jean-Côme Douteau
- Gabrielle Rit - Gabrielle Rit
...@@ -54,7 +54,14 @@ Example : ...@@ -54,7 +54,14 @@ Example :
Currently available on: https://framemo.org/framapad_to_opale Currently available on: https://framemo.org/framapad_to_opale
See also : https://bimestriel.framapad.org/p/nf29_framapad_to_opale for the full documentation of our working process.
## TODO ## TODO
- Nested Lists
- Indentation
- Titles (and therefore Structure of the doc)
- Coloured text
- Code
- Markdown - Markdown
## Technical notes ## Technical notes
......
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce" default="main"> <project basedir="." name="myantce" default="main">
<property file="etherpad_to_hdoc.properties"/> <property file="framapad_to_hdoc.properties"/>
<!-- import classes --> <!-- import classes -->
<taskdef resource="net/sf/antcontrib/antlib.xml"/> <taskdef resource="net/sf/antcontrib/antlib.xml"/>
<taskdef name="htmlcleaner" classname="org.htmlcleaner.HtmlCleanerForAnt"/> <taskdef name="htmlcleaner" classname="org.htmlcleaner.HtmlCleanerForAnt"/>
......
@echo off @echo off
set lib=lib set lib=lib
set ant=etherpad_to_hdoc.ant set ant=framapad_to_hdoc.ant
set antparam=-Dprogram.param=%1 set antparam=-Dprogram.param=%1
set scJarList=%lib%\* set scJarList=%lib%\*
......
#!/bin/sh #!/bin/sh
lib="lib" lib="lib"
ant="etherpad_to_hdoc.ant" ant="framapad_to_hdoc.ant"
antparam="-Dprogram.param=$1" antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN #Recherche de java et controle que se soit une version SUN
......
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce" default="main"> <project basedir="." name="myantce" default="main">
<property file="etherpad_to_opale.properties"/> <property file="framapad_to_opale.properties"/>
<!-- CHECK FOR OS FAMILY --> <!-- CHECK FOR OS FAMILY -->
<condition property="is_windows"> <condition property="is_windows">
<os family="windows"/> <os family="windows"/>
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
<include name="*.html"/> <include name="*.html"/>
</fileset> </fileset>
</copy> </copy>
<exec dir="../etherpad_to_hdoc" executable="run.bat"/> <exec dir="../framapad_to_hdoc" executable="run.bat"/>
</target> </target>
<target name="to_hdoc_unix" if="${is_unix}"> <target name="to_hdoc_unix" if="${is_unix}">
...@@ -38,8 +38,8 @@ ...@@ -38,8 +38,8 @@
<include name="*.html"/> <include name="*.html"/>
</fileset> </fileset>
</copy> </copy>
<exec executable="/bin/bash" dir="../etherpad_to_hdoc"> <exec executable="/bin/bash" dir="../framapad_to_hdoc">
<arg value="../etherpad_to_hdoc/run.sh"/> <arg value="../framapad_to_hdoc/run.sh"/>
</exec> </exec>
</target> </target>
......
...@@ -5,8 +5,8 @@ xsl = ${basedir}/xsl ...@@ -5,8 +5,8 @@ xsl = ${basedir}/xsl
lib = ${basedir}/lib lib = ${basedir}/lib
log = ${basedir}/log log = ${basedir}/log
eth_in = ../etherpad_to_hdoc/input eth_in = ../framapad_to_hdoc/input
eth_out = ../etherpad_to_hdoc/output eth_out = ../framapad_to_hdoc/output
opa_in = ../hdoc_to_opale/input opa_in = ../hdoc_to_opale/input
opa_out = ../hdoc_to_opale/output opa_out = ../hdoc_to_opale/output
rootfilename = content.xml rootfilename = content.xml
@echo off @echo off
set lib=lib set lib=lib
set ant=etherpad_to_opale.ant set ant=framapad_to_opale.ant
set antparam=-Dprogram.param=%1 set antparam=-Dprogram.param=%1
set scJarList=%lib%\* set scJarList=%lib%\*
......
#!/bin/sh #!/bin/sh
lib="lib" lib="lib"
ant="etherpad_to_opale.ant" ant="framapad_to_opale.ant"
antparam="-Dprogram.param=$1" antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN #Recherche de java et controle que se soit une version SUN
......
Converter hdoc_to_basex
-----------------------
The purpose of this converter is to obtain an XML data file suitable for importation into basex for futher XQuery requests from a HDOC file
License GPL3.0
--------------
http://www.gnu.org/licenses/gpl-3.0.txt
Credits
-------
* Simei YIN
* Baptiste MONTANGE
Dependance
----------
This project can be used alone if you want to import an HDOC file into basex.
## User stories
------------------
- Among a group of courses, user can search by title, author or keywords of the course.
- By searching a certain keyword, user can obtain the sections that contain it with their hierarchy levels in the course.
- In a certain section, by searching a key word, user can obtain paragraphes that contain it.
- User can get definitions related to a keyword
- User can get examples whose titles contain a keyword
Step by step :
[Step 1 : File transformation]
- Put the files .hdoc you want to deal with in the folder [input](https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/input)
- Run the transformation progam (Win : double click run.bat, Linux : execute run.sh)
[Step 2 : Create data base in basex]
- Download and install [BaseX](http://basex.org/products/download/all-downloads/)
- Run BasexGui
- In the Text Editor of BaseX, open the command script "createbd.bxs" in folder [/basex/command] (https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/basex/command).
Follow the instructions in the script, and then execute it.
[Step 3 : Make XQuery request]
-
\ No newline at end of file
# This script allows for creation of a data base in baseX from the xml files in the folder output
# Before executing this script, please make sure that you have successfully finished the transformation in the step 1 in the "user story" of the README.md
# Syntax : CREATE DB [name] ([input])
# Example :
CREATE DB myDB D:\School\UTC\GI04\NF29\Projet\hdoc\hdoc_to_basex\output
#CREATE DB myDB [..the repertory of the projet in your local disk..]/hdoc/hdoc_to_basex/output
# P.S. If you get error message : Resource "..." not found, please confirm your file repertory is correct
\ No newline at end of file
(: This script will return documents by searching its author :)
(: We can assign a Regular Expression to the variable $author :)
(: For example, $author := '^Baptiste Montangé$', to search for an exact name:)
(: For example, $name := 'Montangé', to search for documents whose author named Coutant:)
(: Remark : Accents in the authors' names have been taken care of :)
<documents>{
let $name := 'Montangé'
let $name_noAcc := translate($name, 'áàâäéèêëíìîïóòôöúùûü','aaaaeeeeiiiioooouuuu')
for $doc in //document
return
for $author in $doc/authors/author
let $titre := $doc/titre
let $author_noAcc := translate($author, 'áàâäéèêëíìîïóòôöúùûü','aaaaeeeeiiiioooouuuu')
where matches($author_noAcc, $name_noAcc)
group by $titre (: Avoid duplications of documents by their titre:)
return $doc
}</documents>
\ No newline at end of file
(: This script will return documents by searching one keyword in their title :)
(: We can assign a Regular Expression to the variable $name :)
(: For example, $name := '^NF29_HdocEtherpad$', to search for an exact name:)
(: For example, $name := 'NF29', to search for documents whose name contains 'NF29':)
<documents>{
let $name := '^NF29_HdocEtherpad$'
for $doc in //document
where matches($doc/titre, $name, "i")
return $doc
}</documents>
\ No newline at end of file
lib=${basedir}/lib
log=${basedir}/log
xsl=${basedir}/xsl
in=${basedir}/input
out=${basedir}/output
tmp=${basedir}/tmp
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce" default="convert">
<taskdef resource="net/sf/antcontrib/antlib.xml"/>
<property file="build.properties"/>
<target name="convert">
<!-- Preparation for the file transformation : delete old folders and create new folders -->
<mkdir dir="${tmp}"/>
<delete dir="${out}" failonerror="false"/>
<mkdir dir="${out}"/>
<delete dir="${log}" failonerror="false"/>
<mkdir dir="${log}"/>
<echo message="DEBUT"/>
<!-- Convert all the hdoc files in the directory ${in} to data xml files that will be imported to basex later.
Fonctions "UnzipHdocFile" and "content" will be called. -->
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<antcall target="UnzipHdocFile">
<param name="filename" value="${filename}"/>
</antcall>
<antcall target="content">
<param name="filename" value="${filename}"/>
</antcall>
<echo message="FIN"/>
</sequential>
</for>
<!-- Clean-->
<delete dir="${tmp}" failonerror="false"/>
</target>
<target name="UnzipHdocFile">
<!-- Unzip the input hdoc file. Decompressed folder is named "decompressedHdoc" : this name is the only one which
refers to the hdoc file furthermore in this project. -->
<unzip src="${in}/${filename}" dest="${tmp}/${filename}/decompressedHdoc"/>
<chmod dir="${tmp}/${filename}/decompressedHdoc" perm="777"/>
<echo message="${tmp}/${filename}/decompressedHdoc"/>
</target>
<target name="content" >
<!-- Transformation of a xml file decompressed from hdoc file to data xml file to be imported to basex.
The transformation will be done in terms of the xsl file in ${xsl}.-->
<xslt in="${tmp}/${filename}/decompressedHdoc/content.xml" out="${out}/${filename}_data.xml" style="${xsl}/transformation.xsl" processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison">
<param name="filename" expression="${filename}"/>
<param name="lib" expression="${lib}"/>
</xslt>
<echo message="${filename}" />
</target>
</project>
\ No newline at end of file
@echo off
set lib=lib
set ant=hdoc_to_basex.ant
set antparam=-Dprogram.param=%1
set scJarList=%lib%\*
java.exe -classpath "%scJarList%" -Xmx150m org.apache.tools.ant.Main -buildfile %ant% %antparam%
pause
REM start /MIN java.exe -classpath "%scJarList%" -Xmx150m org.apache.tools.ant.Main -buildfile %ant% %antparam%
#!/bin/sh
lib="lib"
ant="hdoc_to_basex.ant"
antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN
vJavaCmd="java"
xCheckJava () {
vInputVarName=\$"$1"
vInputVarVal=`eval "expr \"$vInputVarName\" "`
if [ -z "$vInputVarVal" ];then
eval "$1=false"
return
fi
vSunJavaFound=`$vInputVarVal -version 2>&1 | grep -Eo -m 1 "(HotSpot)|(OpenJDK)"`
if [ "$vSunJavaFound" != "HotSpot" ] && [ "$vSunJavaFound" != "OpenJDK" ] ; then
eval "$1=false"
return
fi
}
xCheckJava vJavaCmd
if [ "$vJavaCmd" = "false" ]; then
vJavaCmd="$JAVA_HOME/bin/java"
xCheckJava vJavaCmd
if [ "$vJavaCmd" = "false" ]; then
echo "ERREUR: JRE de SUN introuvable. Veuillez déclarer la variable d'environnement JAVA_HOME."
exit 1
fi
fi
#Lancer la commande
scJarList="$lib/*"
$vJavaCmd -classpath "$scJarList:" -Xmx150m org.apache.tools.ant.Main -buildfile $ant $antparam
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:h="http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes="xs" version="2.0">
<xsl:template match="h:html">
<document>
<xsl:apply-templates mode="title"/>
</document>
</xsl:template>
<xsl:template match="h:head" mode="title">
<titre>
<xsl:value-of select="./h:title"/>
</titre>
<xsl:apply-templates select="./h:meta"/>
</xsl:template>
<xsl:template match="h:meta[@name='author']">
<authors>
<xsl:for-each select="tokenize(@content,', \d')">
<xsl:if test="position() = 1">
<xsl:for-each select="tokenize(.,', ')">
<author>
<xsl:value-of select="."/>
</author>
</xsl:for-each>
</xsl:if>
</xsl:for-each>
</authors>
</xsl:template>
<xsl:template match="h:body" mode="title">
<sections>
<xsl:apply-templates select="./h:section"/>
</sections>
</xsl:template>
<xsl:template match="h:section">
<section>
<titresection>
<xsl:value-of select="./h:header/h:h1"/>
</titresection>
<contenu>
<paragraphes>
<xsl:apply-templates select="h:div"/>
</paragraphes>
<xsl:apply-templates select="h:section" mode="soussect"/>
</contenu>
</section>
</xsl:template>
<xsl:template match="h:section" mode="soussect">
<soussection></soussection>
</xsl:template>
<xsl:template match="h:div" >
<xsl:if test="h:p">
<paragraphe>
<xsl:value-of select="."/>
</paragraphe>
</xsl:if>
<xsl:if test="h:ul">
<xsl:apply-templates select="h:ul"></xsl:apply-templates>
</xsl:if>
</xsl:template>
<xsl:template match="h:ul">
<xsl:for-each select="h:li">
<paragraphe>
<xsl:value-of select="h:p"/>
</paragraphe>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
...@@ -54,6 +54,7 @@ ...@@ -54,6 +54,7 @@
</sp:abstract> </sp:abstract>
</xsl:template> </xsl:template>
<!-- Body related templates. --> <!-- Body related templates. -->
<xsl:template match="h:body"> <xsl:template match="h:body">
<xsl:if test="./*"> <xsl:if test="./*">
...@@ -66,19 +67,216 @@ ...@@ -66,19 +67,216 @@
<!-- CanoProf's Activite TetM = hdoc's body/Section/Section = Opale's Grain --> <!-- CanoProf's Activite TetM = hdoc's body/Section/Section = Opale's Grain -->
<!-- CanoProf's Activite TetM (section) = hdoc's body/Section/Section/(Section...) = Opale's Grain (partie) --> <!-- CanoProf's Activite TetM (section) = hdoc's body/Section/Section/(Section...) = Opale's Grain (partie) -->
<!-- TODO :CanoProf's Seance = hdoc's body/Section = Opale's Division --> <!-- CanoProf's Seance = hdoc's body/Section = Opale's Division or Activite-->
<xsl:template match="h:body/h:section"> <xsl:template match="h:body/h:section">
<xsl:if test="./*">
<sp:session>
<cp:session>
<cp:sessionM>
<sp:title>
<xsl:if test="not(./h:header/h:h1/text())"> Untitled </xsl:if>
<xsl:value-of select="./h:header/h:h1"/>
</sp:title>
</cp:sessionM>
<xsl:apply-templates select="./*"/>
</cp:session>
</sp:session>
</xsl:if>
</xsl:template> </xsl:template>
<!-- TODO : if Section have only a dev (no sub section), we create a short activty to print text into a seance --> <!-- if Section have only a dev (no sub section), we create a short activty to print text into a seance -->
<xsl:template match="h:body/h:section/h:div"> <xsl:template match="h:body/h:section/h:div">
<sp:shortActivity>
<cp:shortActivity>
<cp:activityM>
<sp:title>
<xsl:value-of select="../h:header/h:h1"/>
</sp:title>
</cp:activityM>
<sp:body>
<cp:flow>
<xsl:apply-templates select="./*"/>
</cp:flow>
</sp:body>
</cp:shortActivity>
</sp:shortActivity>
</xsl:template>
<!-- Toutes les introductions de sections sont transformés en résumé. -->
<xsl:template match="h:header/h:div[@data-hdoc-type = 'introduction']">
<sp:abstract>
<cp:txtDesc>
<sc:para xml:space="preserve">
<xsl:value-of select="./text()"/>
</sc:para>
</cp:txtDesc>
</sp:abstract>
</xsl:template>
<!-- Text related templates -->
<xsl:template match="h:p | h:ul | h:ol">
<xsl:if test="not(preceding-sibling::h:p)">
<xsl:if test="not(preceding-sibling::h:ul)">
<xsl:if test="not(preceding-sibling::h:ol)">
<xsl:choose>
<!-- If it is directly included in a div, we have to add Canoprof's text markups -->
<xsl:when test="parent::*[name() = 'div']">
<sp:txt>
<cp:txt>
<xsl:call-template name="blockloop"/>
</cp:txt>
</sp:txt>
</xsl:when>
<!-- Otherwise, we can directly display it (this can happen when it is included within a list or a table) -->
<xsl:otherwise>
<xsl:call-template name="blockloop"/>
</xsl:otherwise>
</xsl:choose>
</xsl:if>
</xsl:if>
</xsl:if>
</xsl:template>
<xsl:template name="blockloop">
<xsl:for-each select=". | ./following-sibling::*">
<xsl:choose>
<!-- Paragraph -->
<xsl:when test="name() = 'p' and (./* | ./text())">
<sc:para xml:space="preserve">
<xsl:apply-templates select="./* | ./text()"/>
</sc:para>
</xsl:when>
<!-- Unordered list -->
<xsl:when test="name() = 'ul' and (./h:li/h:p/text())">
<sc:itemizedList>
<xsl:apply-templates select="./* | ./text()"/>
</sc:itemizedList>
</xsl:when>
<!-- Ordered list -->
<xsl:when test="name() = 'ol' and (./h:li/h:p/text())">
<sc:orderedList>
<xsl:apply-templates select="./* | ./text()"/>
</sc:orderedList>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:template> </xsl:template>
<!-- TODO : CanoProf's Activite TetM = hdoc's body/Section/Section = Opale's Grain --> <xsl:template match="h:li">
<xsl:template match="h:body/h:section/h:section"> <sc:listItem>
<xsl:apply-templates select="./* | ./text()"/>
</sc:listItem>
</xsl:template>
<xsl:template match="h:i">
<xsl:if test="./* | ./text()">
<sc:inlineStyle role="specific">
<xsl:apply-templates select="./* | ./text()"/>
</sc:inlineStyle>
</xsl:if>
</xsl:template>
<xsl:template match="h:em">
<xsl:if test="./* | ./text()">
<sc:inlineStyle role="emphasis">
<xsl:apply-templates select="./* | ./text()"/>
</sc:inlineStyle>
</xsl:if>
</xsl:template> </xsl:template>
<xsl:template match="h:q">
<xsl:if test="./* | ./text()">
<sc:phrase role="quote">
<xsl:apply-templates select="./* | ./text()"/>
</sc:phrase>
</xsl:if>
</xsl:template>
<xsl:template match="h:sub">
<xsl:if test="./* | ./text()">
<sc:textLeaf role="ind">
<xsl:apply-templates select="./* | ./text()"/>
</sc:textLeaf>
</xsl:if>
</xsl:template>
<xsl:template match="h:sup">
<xsl:if test="./* | ./text()">
<sc:textLeaf role="exp">
<xsl:apply-templates select="./* | ./text()"/>
</sc:textLeaf>
</xsl:if>
</xsl:template>
<xsl:template match="h:a">
<xsl:if test="./* | ./text()">
<sc:phrase role="url">
<cp:link xmlns:sc="http://www.utc.fr/ics/scenari/v3/core"
xmlns:cp="canope.fr:canoprof"
xmlns:sp="http://www.utc.fr/ics/scenari/v3/primitive">
<sp:url>
<xsl:value-of select="./@href"/>
</sp:url>
</cp:link>
<xsl:if test="./@title">
<sp:title>
<xsl:value-of select="./@title"/>
</sp:title>
</xsl:if>
<xsl:value-of select="."/>
</sc:phrase>
</xsl:if>
</xsl:template>
<!-- Table related templates -->
<xsl:template match="h:table">
<xsl:choose>
<xsl:when test="parent::*[name() = 'div']">
<!-- If this <table> is a direct child of a <div> then it must be surrounded by Opale's text markups. -->
<sp:txt>
<cp:txt>
<sc:table>
<xsl:if test="./h:caption">
<sc:caption>
<xsl:value-of select="./h:caption"/>
</sc:caption>
</xsl:if>
<xsl:apply-templates select="./h:tr"/>
</sc:table>
</cp:txt>
</sp:txt>
</xsl:when>
<xsl:otherwise>
<sc:table>
<xsl:if test="./h:caption">
<sc:caption>
<xsl:value-of select="./h:caption"/>
</sc:caption>
</xsl:if>
<xsl:apply-templates select="./h:tr"/>
</sc:table>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="h:tr">
<sc:row>
<xsl:apply-templates select="./h:td"/>
</sc:row>
</xsl:template>
<xsl:template match="h:td">
<sc:cell>
<xsl:apply-templates select="./*"/>
</sc:cell>
</xsl:template>
<xsl:template match="h:header"/>
<!-- Its content is already used in <xsl:template match="h:section"> -->
<xsl:template match="h:h6"/>
<!-- Its content is already used in <xsl:template match="h:div"> -->
<xsl:template match="h:h1"/>
<!-- Its content is already used in <xsl:template match="h:section"> -->
</xsl:stylesheet> </xsl:stylesheet>
<
...@@ -18,27 +18,40 @@ Credits ...@@ -18,27 +18,40 @@ Credits
Presentation Presentation
------------ ------------