Commit caa2e203 authored by Gregory's avatar Gregory

Merge branch 'master' of https://gitlab.utc.fr/crozatst/hdoc

parents f194c1a3 349c6133
......@@ -6,8 +6,8 @@ http://www.gnu.org/licenses/gpl-3.0.txt
## Credits
- 2016
- - Etienne Chognard
- - Fabien Boucaud
- Etienne Chognard
- Fabien Boucaud
- 2015
- Jean-Côme Douteau
- Gabrielle Rit
......@@ -54,7 +54,14 @@ Example :
Currently available on: https://framemo.org/framapad_to_opale
See also : https://bimestriel.framapad.org/p/nf29_framapad_to_opale for the full documentation of our working process.
## TODO
- Nested Lists
- Indentation
- Titles (and therefore Structure of the doc)
- Coloured text
- Code
- Markdown
## Technical notes
......
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce" default="main">
<property file="etherpad_to_hdoc.properties"/>
<property file="framapad_to_hdoc.properties"/>
<!-- import classes -->
<taskdef resource="net/sf/antcontrib/antlib.xml"/>
<taskdef name="htmlcleaner" classname="org.htmlcleaner.HtmlCleanerForAnt"/>
......
@echo off
set lib=lib
set ant=etherpad_to_hdoc.ant
set ant=framapad_to_hdoc.ant
set antparam=-Dprogram.param=%1
set scJarList=%lib%\*
......
#!/bin/sh
lib="lib"
ant="etherpad_to_hdoc.ant"
ant="framapad_to_hdoc.ant"
antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN
......
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce" default="main">
<property file="etherpad_to_opale.properties"/>
<property file="framapad_to_opale.properties"/>
<!-- CHECK FOR OS FAMILY -->
<condition property="is_windows">
<os family="windows"/>
......@@ -24,7 +24,7 @@
<include name="*.html"/>
</fileset>
</copy>
<exec dir="../etherpad_to_hdoc" executable="run.bat"/>
<exec dir="../framapad_to_hdoc" executable="run.bat"/>
</target>
<target name="to_hdoc_unix" if="${is_unix}">
......@@ -38,8 +38,8 @@
<include name="*.html"/>
</fileset>
</copy>
<exec executable="/bin/bash" dir="../etherpad_to_hdoc">
<arg value="../etherpad_to_hdoc/run.sh"/>
<exec executable="/bin/bash" dir="../framapad_to_hdoc">
<arg value="../framapad_to_hdoc/run.sh"/>
</exec>
</target>
......
......@@ -5,8 +5,8 @@ xsl = ${basedir}/xsl
lib = ${basedir}/lib
log = ${basedir}/log
eth_in = ../etherpad_to_hdoc/input
eth_out = ../etherpad_to_hdoc/output
eth_in = ../framapad_to_hdoc/input
eth_out = ../framapad_to_hdoc/output
opa_in = ../hdoc_to_opale/input
opa_out = ../hdoc_to_opale/output
rootfilename = content.xml
@echo off
set lib=lib
set ant=etherpad_to_opale.ant
set ant=framapad_to_opale.ant
set antparam=-Dprogram.param=%1
set scJarList=%lib%\*
......
#!/bin/sh
lib="lib"
ant="etherpad_to_opale.ant"
ant="framapad_to_opale.ant"
antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN
......
Converter hdoc_to_basex
-----------------------
The purpose of this converter is to obtain an XML data file suitable for importation into basex for futher XQuery requests from a HDOC file
License GPL3.0
--------------
http://www.gnu.org/licenses/gpl-3.0.txt
Credits
-------
* Simei YIN
* Baptiste MONTANGE
Dependance
----------
This project can be used alone if you want to import an HDOC file into basex.
## User stories
------------------
- Among a group of courses, user can search by title, author or keywords of the course.
- By searching a certain keyword, user can obtain the sections that contain it with their hierarchy levels in the course.
- In a certain section, by searching a key word, user can obtain paragraphes that contain it.
- User can get definitions related to a keyword
- User can get examples whose titles contain a keyword
Step by step :
[Step 1 : File transformation]
- Put the files .hdoc you want to deal with in the folder [input](https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/input)
- Run the transformation progam (Win : double click run.bat, Linux : execute run.sh)
[Step 2 : Create data base in basex]
- Download and install [BaseX](http://basex.org/products/download/all-downloads/)
- Run BasexGui
- In the Text Editor of BaseX, open the command script "createbd.bxs" in folder [/basex/command] (https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/basex/command).
Follow the instructions in the script, and then execute it.
[Step 3 : Make XQuery request]
-
\ No newline at end of file
# This script allows for creation of a data base in baseX from the xml files in the folder output
# Before executing this script, please make sure that you have successfully finished the transformation in the step 1 in the "user story" of the README.md
# Syntax : CREATE DB [name] ([input])
# Example :
CREATE DB myDB D:\School\UTC\GI04\NF29\Projet\hdoc\hdoc_to_basex\output
#CREATE DB myDB [..the repertory of the projet in your local disk..]/hdoc/hdoc_to_basex/output
# P.S. If you get error message : Resource "..." not found, please confirm your file repertory is correct
\ No newline at end of file
(: This script will return documents by searching its author :)
(: We can assign a Regular Expression to the variable $author :)
(: For example, $author := '^Baptiste Montangé$', to search for an exact name:)
(: For example, $name := 'Montangé', to search for documents whose author named Coutant:)
(: Remark : Accents in the authors' names have been taken care of :)
<documents>{
let $name := 'Montangé'
let $name_noAcc := translate($name, 'áàâäéèêëíìîïóòôöúùûü','aaaaeeeeiiiioooouuuu')
for $doc in //document
return
for $author in $doc/authors/author
let $titre := $doc/titre
let $author_noAcc := translate($author, 'áàâäéèêëíìîïóòôöúùûü','aaaaeeeeiiiioooouuuu')
where matches($author_noAcc, $name_noAcc)
group by $titre (: Avoid duplications of documents by their titre:)
return $doc
}</documents>
\ No newline at end of file
(: This script will return documents by searching one keyword in their title :)
(: We can assign a Regular Expression to the variable $name :)
(: For example, $name := '^NF29_HdocEtherpad$', to search for an exact name:)
(: For example, $name := 'NF29', to search for documents whose name contains 'NF29':)
<documents>{
let $name := '^NF29_HdocEtherpad$'
for $doc in //document
where matches($doc/titre, $name, "i")
return $doc
}</documents>
\ No newline at end of file
lib=${basedir}/lib
log=${basedir}/log
xsl=${basedir}/xsl
in=${basedir}/input
out=${basedir}/output
tmp=${basedir}/tmp
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce" default="convert">
<taskdef resource="net/sf/antcontrib/antlib.xml"/>
<property file="build.properties"/>
<target name="convert">
<!-- Preparation for the file transformation : delete old folders and create new folders -->
<mkdir dir="${tmp}"/>
<delete dir="${out}" failonerror="false"/>
<mkdir dir="${out}"/>
<delete dir="${log}" failonerror="false"/>
<mkdir dir="${log}"/>
<echo message="DEBUT"/>
<!-- Convert all the hdoc files in the directory ${in} to data xml files that will be imported to basex later.
Fonctions "UnzipHdocFile" and "content" will be called. -->
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<antcall target="UnzipHdocFile">
<param name="filename" value="${filename}"/>
</antcall>
<antcall target="content">
<param name="filename" value="${filename}"/>
</antcall>
<echo message="FIN"/>
</sequential>
</for>
<!-- Clean-->
<delete dir="${tmp}" failonerror="false"/>
</target>
<target name="UnzipHdocFile">
<!-- Unzip the input hdoc file. Decompressed folder is named "decompressedHdoc" : this name is the only one which
refers to the hdoc file furthermore in this project. -->
<unzip src="${in}/${filename}" dest="${tmp}/${filename}/decompressedHdoc"/>
<chmod dir="${tmp}/${filename}/decompressedHdoc" perm="777"/>
<echo message="${tmp}/${filename}/decompressedHdoc"/>
</target>
<target name="content" >
<!-- Transformation of a xml file decompressed from hdoc file to data xml file to be imported to basex.
The transformation will be done in terms of the xsl file in ${xsl}.-->
<xslt in="${tmp}/${filename}/decompressedHdoc/content.xml" out="${out}/${filename}_data.xml" style="${xsl}/transformation.xsl" processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison">
<param name="filename" expression="${filename}"/>
<param name="lib" expression="${lib}"/>
</xslt>
<echo message="${filename}" />
</target>
</project>
\ No newline at end of file
@echo off
set lib=lib
set ant=hdoc_to_basex.ant
set antparam=-Dprogram.param=%1
set scJarList=%lib%\*
java.exe -classpath "%scJarList%" -Xmx150m org.apache.tools.ant.Main -buildfile %ant% %antparam%
pause
REM start /MIN java.exe -classpath "%scJarList%" -Xmx150m org.apache.tools.ant.Main -buildfile %ant% %antparam%
#!/bin/sh
lib="lib"
ant="hdoc_to_basex.ant"
antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN
vJavaCmd="java"
xCheckJava () {
vInputVarName=\$"$1"
vInputVarVal=`eval "expr \"$vInputVarName\" "`
if [ -z "$vInputVarVal" ];then
eval "$1=false"
return
fi
vSunJavaFound=`$vInputVarVal -version 2>&1 | grep -Eo -m 1 "(HotSpot)|(OpenJDK)"`
if [ "$vSunJavaFound" != "HotSpot" ] && [ "$vSunJavaFound" != "OpenJDK" ] ; then
eval "$1=false"
return
fi
}
xCheckJava vJavaCmd
if [ "$vJavaCmd" = "false" ]; then
vJavaCmd="$JAVA_HOME/bin/java"
xCheckJava vJavaCmd
if [ "$vJavaCmd" = "false" ]; then
echo "ERREUR: JRE de SUN introuvable. Veuillez déclarer la variable d'environnement JAVA_HOME."
exit 1
fi
fi
#Lancer la commande
scJarList="$lib/*"
$vJavaCmd -classpath "$scJarList:" -Xmx150m org.apache.tools.ant.Main -buildfile $ant $antparam
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:h="http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes="xs" version="2.0">
<xsl:template match="h:html">
<document>
<xsl:apply-templates mode="title"/>
</document>
</xsl:template>
<xsl:template match="h:head" mode="title">
<titre>
<xsl:value-of select="./h:title"/>
</titre>
<xsl:apply-templates select="./h:meta"/>
</xsl:template>
<xsl:template match="h:meta[@name='author']">
<authors>
<xsl:for-each select="tokenize(@content,', \d')">
<xsl:if test="position() = 1">
<xsl:for-each select="tokenize(.,', ')">
<author>
<xsl:value-of select="."/>
</author>
</xsl:for-each>
</xsl:if>
</xsl:for-each>
</authors>
</xsl:template>
<xsl:template match="h:body" mode="title">
<sections>
<xsl:apply-templates select="./h:section"/>
</sections>
</xsl:template>
<xsl:template match="h:section">
<section>
<titresection>
<xsl:value-of select="./h:header/h:h1"/>
</titresection>
<contenu>
<paragraphes>
<xsl:apply-templates select="h:div"/>
</paragraphes>
<xsl:apply-templates select="h:section" mode="soussect"/>
</contenu>
</section>
</xsl:template>
<xsl:template match="h:section" mode="soussect">
<soussection></soussection>
</xsl:template>
<xsl:template match="h:div" >
<xsl:if test="h:p">
<paragraphe>
<xsl:value-of select="."/>
</paragraphe>
</xsl:if>
<xsl:if test="h:ul">
<xsl:apply-templates select="h:ul"></xsl:apply-templates>
</xsl:if>
</xsl:template>
<xsl:template match="h:ul">
<xsl:for-each select="h:li">
<paragraphe>
<xsl:value-of select="h:p"/>
</paragraphe>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
......@@ -54,6 +54,7 @@
</sp:abstract>
</xsl:template>
<!-- Body related templates. -->
<xsl:template match="h:body">
<xsl:if test="./*">
......@@ -66,19 +67,216 @@
<!-- CanoProf's Activite TetM = hdoc's body/Section/Section = Opale's Grain -->
<!-- CanoProf's Activite TetM (section) = hdoc's body/Section/Section/(Section...) = Opale's Grain (partie) -->
<!-- TODO :CanoProf's Seance = hdoc's body/Section = Opale's Division -->
<!-- CanoProf's Seance = hdoc's body/Section = Opale's Division or Activite-->
<xsl:template match="h:body/h:section">
<xsl:if test="./*">
<sp:session>
<cp:session>
<cp:sessionM>
<sp:title>
<xsl:if test="not(./h:header/h:h1/text())"> Untitled </xsl:if>
<xsl:value-of select="./h:header/h:h1"/>
</sp:title>
</cp:sessionM>
<xsl:apply-templates select="./*"/>
</cp:session>
</sp:session>
</xsl:if>
</xsl:template>
<!-- TODO : if Section have only a dev (no sub section), we create a short activty to print text into a seance -->
<!-- if Section have only a dev (no sub section), we create a short activty to print text into a seance -->
<xsl:template match="h:body/h:section/h:div">
<sp:shortActivity>
<cp:shortActivity>
<cp:activityM>
<sp:title>
<xsl:value-of select="../h:header/h:h1"/>
</sp:title>
</cp:activityM>
<sp:body>
<cp:flow>
<xsl:apply-templates select="./*"/>
</cp:flow>
</sp:body>
</cp:shortActivity>
</sp:shortActivity>
</xsl:template>
<!-- Toutes les introductions de sections sont transformés en résumé. -->
<xsl:template match="h:header/h:div[@data-hdoc-type = 'introduction']">
<sp:abstract>
<cp:txtDesc>
<sc:para xml:space="preserve">
<xsl:value-of select="./text()"/>
</sc:para>
</cp:txtDesc>
</sp:abstract>
</xsl:template>
<!-- Text related templates -->
<xsl:template match="h:p | h:ul | h:ol">
<xsl:if test="not(preceding-sibling::h:p)">
<xsl:if test="not(preceding-sibling::h:ul)">
<xsl:if test="not(preceding-sibling::h:ol)">
<xsl:choose>
<!-- If it is directly included in a div, we have to add Canoprof's text markups -->
<xsl:when test="parent::*[name() = 'div']">
<sp:txt>
<cp:txt>
<xsl:call-template name="blockloop"/>
</cp:txt>
</sp:txt>
</xsl:when>
<!-- Otherwise, we can directly display it (this can happen when it is included within a list or a table) -->
<xsl:otherwise>
<xsl:call-template name="blockloop"/>
</xsl:otherwise>
</xsl:choose>
</xsl:if>
</xsl:if>
</xsl:if>
</xsl:template>
<xsl:template name="blockloop">
<xsl:for-each select=". | ./following-sibling::*">
<xsl:choose>
<!-- Paragraph -->
<xsl:when test="name() = 'p' and (./* | ./text())">
<sc:para xml:space="preserve">
<xsl:apply-templates select="./* | ./text()"/>
</sc:para>
</xsl:when>
<!-- Unordered list -->
<xsl:when test="name() = 'ul' and (./h:li/h:p/text())">
<sc:itemizedList>
<xsl:apply-templates select="./* | ./text()"/>
</sc:itemizedList>
</xsl:when>
<!-- Ordered list -->
<xsl:when test="name() = 'ol' and (./h:li/h:p/text())">
<sc:orderedList>
<xsl:apply-templates select="./* | ./text()"/>
</sc:orderedList>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</xsl:template>
<!-- TODO : CanoProf's Activite TetM = hdoc's body/Section/Section = Opale's Grain -->
<xsl:template match="h:body/h:section/h:section">
<xsl:template match="h:li">
<sc:listItem>
<xsl:apply-templates select="./* | ./text()"/>
</sc:listItem>
</xsl:template>
<xsl:template match="h:i">
<xsl:if test="./* | ./text()">
<sc:inlineStyle role="specific">
<xsl:apply-templates select="./* | ./text()"/>
</sc:inlineStyle>
</xsl:if>
</xsl:template>
<xsl:template match="h:em">
<xsl:if test="./* | ./text()">
<sc:inlineStyle role="emphasis">
<xsl:apply-templates select="./* | ./text()"/>
</sc:inlineStyle>
</xsl:if>
</xsl:template>
<xsl:template match="h:q">
<xsl:if test="./* | ./text()">
<sc:phrase role="quote">
<xsl:apply-templates select="./* | ./text()"/>
</sc:phrase>
</xsl:if>
</xsl:template>
<xsl:template match="h:sub">
<xsl:if test="./* | ./text()">
<sc:textLeaf role="ind">
<xsl:apply-templates select="./* | ./text()"/>
</sc:textLeaf>
</xsl:if>
</xsl:template>
<xsl:template match="h:sup">
<xsl:if test="./* | ./text()">
<sc:textLeaf role="exp">
<xsl:apply-templates select="./* | ./text()"/>
</sc:textLeaf>
</xsl:if>
</xsl:template>
<xsl:template match="h:a">
<xsl:if test="./* | ./text()">
<sc:phrase role="url">
<cp:link xmlns:sc="http://www.utc.fr/ics/scenari/v3/core"
xmlns:cp="canope.fr:canoprof"
xmlns:sp="http://www.utc.fr/ics/scenari/v3/primitive">
<sp:url>
<xsl:value-of select="./@href"/>
</sp:url>
</cp:link>
<xsl:if test="./@title">
<sp:title>
<xsl:value-of select="./@title"/>
</sp:title>
</xsl:if>
<xsl:value-of select="."/>
</sc:phrase>
</xsl:if>
</xsl:template>
<!-- Table related templates -->
<xsl:template match="h:table">
<xsl:choose>
<xsl:when test="parent::*[name() = 'div']">
<!-- If this <table> is a direct child of a <div> then it must be surrounded by Opale's text markups. -->
<sp:txt>
<cp:txt>
<sc:table>
<xsl:if test="./h:caption">
<sc:caption>
<xsl:value-of select="./h:caption"/>
</sc:caption>
</xsl:if>
<xsl:apply-templates select="./h:tr"/>
</sc:table>
</cp:txt>
</sp:txt>
</xsl:when>
<xsl:otherwise>
<sc:table>
<xsl:if test="./h:caption">
<sc:caption>
<xsl:value-of select="./h:caption"/>
</sc:caption>
</xsl:if>
<xsl:apply-templates select="./h:tr"/>
</sc:table>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="h:tr">
<sc:row>
<xsl:apply-templates select="./h:td"/>
</sc:row>
</xsl:template>
<xsl:template match="h:td">
<sc:cell>
<xsl:apply-templates select="./*"/>
</sc:cell>
</xsl:template>
<xsl:template match="h:header"/>
<!-- Its content is already used in <xsl:template match="h:section"> -->
<xsl:template match="h:h6"/>
<!-- Its content is already used in <xsl:template match="h:div"> -->
<xsl:template match="h:h1"/>
<!-- Its content is already used in <xsl:template match="h:section"> -->
</xsl:stylesheet>
......@@ -18,27 +18,40 @@ Credits
Presentation
------------
"Hdoc to Epub" is an hdoc converter to epub files. It's a set of ANT scripts and XSL files
Dependencies
------------
There's no particular dependencies needed to run the converter.
User Documentation
------------------
### Scenario
La personne possède un hdoc et voudrait le convertir en epub. Il se dirige vers le site hdoc et télécharge le zip. Il extrait le zip et se rend vers le dossier hdoc_to_epub. Il lit le README.md et suit les directives pour obtenir son format epub. Pour cela, il doit coller son hdoc dans le dossier input et lancer l'exécutable "run". Enfin, il aura son epub dans le dossier output.
Unsupported
-----------
Known bugs
----------
Problème dans le hdoc : contient des images mais ne sont pas référencées.
Problème avec les keywords : le hdoc contient des keywords mais nous ne savons pas où les utiliser.
Todo list
---------
Validation de l'epub actuel
Mise en place de la première page
Validation
Étude compatibilité entre EPUB2 et EPUB3
Technical Notes
---------------
<?xml version="1.0"?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfiles>
<rootfile full-path="oebps/content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
application/epub+zip
<?xml version="1.0" encoding="UTF-8"?>
<!-- test.ant -->
<project>
<property name="p">Default</property>
<echo message="${p}"/>
<project name="hdoc_to_epub" basedir="." default="convert">
<