Commit e56195eb authored by Aghiles's avatar Aghiles

fix conflict

parents c7d43a1d 9e71c987
......@@ -7,6 +7,7 @@
<target name="convert">
<antcall target="init" />
<antcall target="unzip_scar" />
<antcall target="hdoc_structure"/>
<antcall target="process_canoscar"/>
<antcall target="not_canoscar"/>
<echo message="OK" file="${out}/testfile"/>
......@@ -15,6 +16,21 @@
</target>
<!-- Create the hdoc structure -->
<target name="hdoc_structure">
<mkdir dir="${hdoc}"/>
<mkdir dir="${hdoc}/META-INF" />
<touch file="${hdoc}/META-INF/container.xml" />
<touch file="${hdoc}/mimetype" />
<echo message="application/x-hdoc+zip" file="${hdoc}/mimetype" />
<echoxml file="${hdoc}/META-INF/container.xml" namespacePolicy="elementsOnly">
<container xmlns="urn:utc.fr:ics:hdoc:container" version="1.0">
<rootfiles>
<rootfile full-path="${rootfilename}" media-type="text/xml" />
</rootfiles>
</container>
</echoxml>
</target>
<!-- Initialize folders -->
<target name="init">
......@@ -43,8 +59,10 @@
<target name="process_canoscar" depends="check_scar" if="${canoprof.is_canoprof_scar}">
<!-- TODO traitement du fichier -->
<echo file="${out}/scar_cano_or_not_scar_cano.txt" message="C'est un scar canoprof"/>
<!-- TODO change test.xml to the temporary file created while fusioning all the xml files -->
<xslt style="xsl/programmation.xsl" in="${unzipfolder}/Test/Test.programme" out="${hdoc}/content.xml" classpath="./lib/saxon9he.jar">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
</target>
<target name="not_canoscar" depends="check_scar" unless="${canoprof.is_canoprof_scar}">
......@@ -63,6 +81,11 @@
<!-- Zip the final content into a scar file -->
<target name="zip">
<!-- TODO zip the output files -->
<fileset id="fileset_scar" dir="input" casesensitive="yes">
<include name="*.scar"/>
</fileset>
<pathconvert property="scar.path" pathsep="" refid="fileset_scar" />
<basename property="filename" file="${scar.path}" suffix=".scar"/>
<zip destfile="${out}/${filename}.hdoc" basedir="${hdoc}"/>
</target>
</project>
\ No newline at end of file
......@@ -4,6 +4,7 @@ tmp = ${basedir}/tmp
xsl = ${basedir}/xsl
lib = ${basedir}/lib
log = ${basedir}/log
hdoc = ${out}/hdoc
unzipfolder = ${basedir}/unzip
cano_in = ../canoprof_to_hdoc/input
......
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs" version="2.0" xmlns:sc="http://www.utc.fr/ics/scenari/v3/core"
xmlns:sp="http://www.utc.fr/ics/scenari/v3/primitive" xmlns:op="utc.fr:ics/opale3" xmlns:cp="canope.fr:canoprof">
<xsl:template match="sp:textActivity">
<section>
<header>
<xsl:apply-templates select="cp:textActivity/cp:activityM/sp:title"/>
</header>
</section>
<!-- Blocs -->
<xsl:apply-templates select="./cp:textActivity/sp:body/cp:blocks/*"/>
<!-- Parts -->
<footer>
</footer>
</xsl:template>
<!-- Titles -->
<xsl:template match="cp:activityM/sp:title">
<h1>
<xsl:value-of select="."/>
</h1>
</xsl:template>
<xsl:template match="cp:block/cp:blockM/sp:title">
<h6>
<xsl:value-of select="."/>
</h6>
</xsl:template>
<!-- Content information -->
<xsl:template match="sp:body/cp:blocks/sp:info">
<div>
<xsl:apply-templates select="cp:block/cp:blockM/sp:title"/>
<xsl:apply-templates select="cp:block/sp:body/cp:flow/*"/>
</div>
</xsl:template>
<!-- Content definition -->
<xsl:template match="sp:body/cp:blocks/sp:def">
<div data-hdoc-type="definition">
<xsl:apply-templates select="cp:block/cp:blockM/sp:title"/>
<xsl:apply-templates select="cp:block/sp:body/cp:flow/*"/>
</div>
</xsl:template>
<!-- content example -->
<xsl:template match="sp:body/cp:blocks/sp:ex">
<div data-hdoc-type="example">
<xsl:apply-templates select="cp:block/cp:blockM/sp:title"/>
<xsl:apply-templates select="cp:block/sp:body/cp:flow/*"/>
</div>
</xsl:template>
<!-- content hypothesis-->
<xsl:template match="sp:body/cp:blocks/sp:hypothesis">
<div data-hdoc-type="remark">
<xsl:apply-templates select="cp:block/cp:blockM/sp:title"/>
<xsl:apply-templates select="cp:block/sp:body/cp:flow/*"/>
</div>
</xsl:template>
<!-- content advice -->
<xsl:template match="sp:body/cp:blocks/sp:advice">
<div data-hdoc-type="advice">
<xsl:apply-templates select="cp:block/cp:blockM/sp:title"/>
<xsl:apply-templates select="cp:block/sp:body/cp:flow/*"/>
</div>
</xsl:template>
<!-- content warning -->
<xsl:template match="sp:body/cp:blocks/sp:warning">
<div data-hdoc-type="warning">
<xsl:apply-templates select="cp:block/cp:blockM/sp:title"/>
<xsl:apply-templates select="cp:block/sp:body/cp:flow/*"/>
</div>
</xsl:template>
<!-- content rappel -->
<xsl:template match="sp:body/cp:blocks/sp:reminder">
<div data-hdoc-type="complement"> <!-- TODO change for reminder -->
<xsl:apply-templates select="cp:block/cp:blockM/sp:title"/>
<xsl:apply-templates select="cp:block/sp:body/cp:flow/*"/>
</div>
</xsl:template>
<!-- content complement -->
<xsl:template match="sp:body/cp:blocks/sp:extra">
<div data-hdoc-type="complement">
<xsl:apply-templates select="cp:block/cp:blockM/sp:title"/>
<xsl:apply-templates select="cp:block/sp:body/cp:flow/*"/>
</div>
</xsl:template>
<!-- content method -->
<xsl:template match="sp:body/cp:blocks/sp:method">
<div data-hdoc-type="advice"> <!-- TODO change for method -->
<xsl:apply-templates select="cp:block/cp:blockM/sp:title"/>
<xsl:apply-templates select="cp:block/sp:body/cp:flow/*"/>
</div>
</xsl:template>
<!-- Simple text -->
<xsl:template match="op:res/sp:txt">
<xsl:apply-templates select="./cp:txt/*"/>
</xsl:template>
<!-- Paragraph -->
<xsl:template match="sc:para">
<p>
test
<!-- TODO Gérer -->
</p>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns="http://www.utc.fr/ics/hdoc/xhtml" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs" version="2.0" xmlns:sc="http://www.utc.fr/ics/scenari/v3/core"
xmlns:sp="http://www.utc.fr/ics/scenari/v3/primitive" xmlns:op="utc.fr:ics/opale3" xmlns:cp="canope.fr:canoprof"
>
<xsl:output indent="yes"/>
<xsl:template match="sc:item/cp:program">
<!-- RNG -->
<xsl:processing-instruction name="oxygen">
RNGSchema="http://hdoc.crzt.fr/schemas/xhtml/hdoc1-xhtml.rng" type="xml"
</xsl:processing-instruction>
<!-- HTML -->
<html xmlns="http://www.utc.fr/ics/hdoc/xhtml">
<head>
<xsl:apply-templates select="cp:programM/sp:title"/>
<meta charset="utf-8"/>
<meta content="HdocConverter/Opale3.4" name="generator"/>
</head>
<body>
<xsl:apply-templates select="sp:sequence/cp:sequence"/>
</body>
</html>
</xsl:template>
<!-- Sequence -->
<xsl:template match="sp:sequence/cp:sequence">
<section>
<header>
<xsl:apply-templates select="cp:sequenceM/sp:title"/>
</header>
<!-- check for seance -->
<xsl:apply-templates select="//sp:session"/>
</section>
</xsl:template>
<!-- Seance -->
<xsl:template match="sp:session">
</xsl:template>
<!-- Titles -->
<xsl:template match="cp:programM/sp:title">
<title><xsl:value-of select="."/></title>
</xsl:template>
<xsl:template match="cp:sequenceM/sp:title">
<h1><xsl:value-of select="."/></h1>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
......@@ -54,7 +54,14 @@ Example :
Currently available on: https://framemo.org/framapad_to_opale
See also : https://bimestriel.framapad.org/p/nf29_framapad_to_opale for the full documentation of our working process.
## TODO
- Nested Lists
- Indentation
- Titles (and therefore Structure of the doc)
- Coloured text
- Code
- Markdown
## Technical notes
......
......@@ -29,6 +29,17 @@ This project can be used alone if you want to import an HDOC file into basex.
- In a certain section, by searching a key word, user can obtain paragraphes that contain it.
- User can get definitions related to a keyword
- User can get examples whose titles contain a keyword
[Step 1]
- Unzip works, creation of a XML file with the title of the hdoc. (little problem, all of the document is in the output).
- the file created is in the tmp/{hdocfile} directory.
\ No newline at end of file
Step by step :
[Step 1 : File transformation]
- Put the files .hdoc you want to deal with in the folder [input](https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/input)
- Run the transformation progam (Win : double click run.bat, Linux : execute run.sh)
[Step 2 : Create data base in basex]
- Download and install [BaseX](http://basex.org/products/download/all-downloads/)
- Run BasexGui
- In the Text Editor of BaseX, open the command script "createbd.bxs" in folder [/basex/command] (https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/basex/command).
Follow the instructions in the script, and then execute it.
[Step 3 : Make XQuery request]
-
\ No newline at end of file
# This script allows for creation of a data base in baseX from the xml files in the folder output
# Before executing this script, please make sure that you have successfully finished the transformation in the step 1 in the "user story" of the README.md
# Syntax : CREATE DB [name] ([input])
# Example :
CREATE DB myDB D:\School\UTC\GI04\NF29\Projet\hdoc\hdoc_to_basex\output
#CREATE DB myDB [..the repertory of the projet in your local disk..]/hdoc/hdoc_to_basex/output
# P.S. If you get error message : Resource "..." not found, please confirm your file repertory is correct
\ No newline at end of file
(: This script will return documents by searching its author :)
(: We can assign a Regular Expression to the variable $author :)
(: For example, $author := '^Baptiste Montangé$', to search for an exact name:)
(: For example, $name := 'Montangé', to search for documents whose author named Coutant:)
(: Remark : Accents in the authors' names have been taken care of :)
<documents>{
let $name := 'Montangé'
let $name_noAcc := translate($name, 'áàâäéèêëíìîïóòôöúùûü','aaaaeeeeiiiioooouuuu')
for $doc in //document
return
for $author in $doc/authors/author
let $titre := $doc/titre
let $author_noAcc := translate($author, 'áàâäéèêëíìîïóòôöúùûü','aaaaeeeeiiiioooouuuu')
where matches($author_noAcc, $name_noAcc)
group by $titre (: Avoid duplications of documents by their titre:)
return $doc
}</documents>
\ No newline at end of file
(: This script will return documents by searching one keyword in their title :)
(: We can assign a Regular Expression to the variable $name :)
(: For example, $name := '^NF29_HdocEtherpad$', to search for an exact name:)
(: For example, $name := 'NF29', to search for documents whose name contains 'NF29':)
<documents>{
let $name := '^NF29_HdocEtherpad$'
for $doc in //document
where matches($doc/titre, $name, "i")
return $doc
}</documents>
\ No newline at end of file
......@@ -40,6 +40,7 @@
<delete dir="${tmp}" failonerror="false"/>
</target>
<target name="UnzipHdocFile">
<!-- Unzip the input hdoc file. Decompressed folder is named "decompressedHdoc" : this name is the only one which
refers to the hdoc file furthermore in this project. -->
......@@ -49,6 +50,7 @@
</target>
<target name="content" >
<!-- Transformation of a xml file decompressed from hdoc file to data xml file to be imported to basex.
The transformation will be done in terms of the xsl file in ${xsl}.-->
<xslt in="${tmp}/${filename}/decompressedHdoc/content.xml" out="${out}/${filename}_data.xml" style="${xsl}/transformation.xsl" processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison">
......@@ -56,6 +58,7 @@
<param name="lib" expression="${lib}"/>
</xslt>
<echo message="${filename}" />
</target>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:h="http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes="xs"
version="2.0">
<xsl:template match="h:html">
<xsl:apply-templates mode="title"></xsl:apply-templates>
</xsl:template>
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:h="http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes="xs" version="2.0">
<xsl:template match="h:html">
<document>
<xsl:apply-templates mode="title"/>
</document>
</xsl:template>
<xsl:template match="h:head" mode="title">
<titre><xsl:value-of select="./h:title"/></titre>
<titre>
<xsl:value-of select="./h:title"/>
</titre>
<xsl:apply-templates select="./h:meta"/>
</xsl:template>
<xsl:template match="h:meta[@name='author']">
<authors>
<xsl:for-each select="tokenize(@content,', \d')">
<xsl:if test="position() = 1">
<xsl:for-each select="tokenize(.,', ')">
<author>
<xsl:value-of select="."/>
</author>
</xsl:for-each>
</xsl:if>
</xsl:for-each>
</authors>
</xsl:template>
<xsl:template match="h:body" mode="title">
</xsl:template>
<sections>
<xsl:apply-templates select="./h:section"/>
</sections>
</xsl:template>
<xsl:template match="h:section">
<section>
<titresection>
<xsl:value-of select="./h:header/h:h1"/>
</titresection>
<contenu>
<paragraphes>
<xsl:apply-templates select="h:div"/>
</paragraphes>
<xsl:apply-templates select="h:section" mode="soussect"/>
</contenu>
</section>
</xsl:template>
<xsl:template match="h:section" mode="soussect">
<soussection></soussection>
</xsl:template>
<xsl:template match="h:div" >
<xsl:if test="h:p">
<paragraphe>
<xsl:value-of select="."/>
</paragraphe>
</xsl:if>
<xsl:if test="h:ul">
<xsl:apply-templates select="h:ul"></xsl:apply-templates>
</xsl:if>
</xsl:template>
<xsl:template match="h:ul">
<xsl:for-each select="h:li">
<paragraphe>
<xsl:value-of select="h:p"/>
</paragraphe>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
</xsl:stylesheet>
This diff is collapsed.
libdir=${basedir}/lib
xsldir=${basedir}/xsl
inputPath=${basedir}/input
outputPath=${basedir}/output
\ No newline at end of file
tmpHdoc=${basedir}/tmp_hdoc
outputPath=${basedir}/output
<project name="hdoc_to_elasticsearch" default="unzipSource">
<property file="build.properties"/>
<target name="unzipSource">
<project name="hdoc_to_elasticsearch" default="clean">
<property file="build.properties"/>
<target name="unzipSource">
<echo message="Conversion begin" />
<path id="hdocDirPath">
<fileset dir="${inputPath}" id="hdocFile">
<include name="*.hdoc"/>
</fileset>
<include name="*.hdoc"/>
</fileset>
</path>
<property name="hdocFile" refId="hdocDirPath"/>
<property name="hdocFile" refId="hdocDirPath"/>
<basename property="hdocFileName" file="${hdocFile}" suffix=".hdoc"/>
<echo>Entry file : ${hdocFileName}</echo>
<echo>Entry file : ${hdocFileName}</echo>
<!-- create subdirectory for each file -->
<mkdir dir="${outputPath}/${hdocFileName}"/>
<!--unzips the hdoc to be converted into the previously created directory-->
<unzip dest="${outputPath}/${hdocFileName}">
<fileset dir="${inputPath}" id="hdocFile">
<include name="${hdocFileName}.hdoc"/>
</fileset>
</unzip>
<echo message="Conversion end" />
</target>
<mkdir dir="${tmpHdoc}/${hdocFileName}"/>
<!--unzips the hdoc to be converted into the previously created directory-->
<unzip dest="${tmpHdoc}/${hdocFileName}">
<fileset dir="${inputPath}" id="hdocFile">
<include name="${hdocFileName}.hdoc"/>
</fileset>
</unzip>
</target>
<target name="jsonFiles" depends="unzipSource">
<echo message="JSON File creation" />
<!-- TODO : create JSON & extract data -->
</target>
<echo message="JSON File creation" />
<xslt in="${tmpHdoc}/${hdocFileName}/content.xml" out="${outputPath}/${hdocFileName}.json" style="xsl/content_extractor.xsl"/>
</target>
<!-- Clear tmp files -->
<target name="clean" depends="jsonFiles">
</target>
<!-- Clear tmp files -->
<target name="clean" depends="jsonFiles">
<echo message="Clean files" />
<!-- -->
<delete includeemptydirs="true" verbose="true">
<fileset dir="${tmpHdoc}"> </fileset>
<dirset dir="${tmpHdoc}" includes="**/*" />
</delete>
<echo message="Conversion end" />
</target>
</project>
<echo message="Conversion end" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xpath-default-namespace="http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes="xs"
version="2.0">
<xsl:output omit-xml-declaration="yes"
encoding="UTF-8">
<!-- Encodage UTF-8 pour caractères spéciaux (accents...) -->
</xsl:output>
<!-- -->
<xsl:template match="/" >
{
<xsl:apply-templates select="html"/>
}
</xsl:template>
<!-- Extraction des informations du head -->
<xsl:template match="head">
"title" : "<xsl:value-of select="title/text()"/>",
"keywords" : [
<xsl:for-each select="meta[@name='keywords']">
"<xsl:value-of select="@content"/>"<xsl:if test="position() != last()">,</xsl:if>
</xsl:for-each>
],
<xsl:apply-templates select="meta"/>
</xsl:template>
<!-- Extraction des informations du body -->
<xsl:template match="body" />
<xsl:template match="meta[@name='author']">
"author" : "<xsl:value-of select="@content"/>"
</xsl:template>
<xsl:template match="meta[@name='date']">
"date" : "<xsl:value-of select="@content"/>",
</xsl:template>
<xsl:template match="meta[@name='rights']">
"rights" : "<xsl:value-of select="@content"/>",
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
......@@ -35,15 +35,23 @@ La personne possède un hdoc et voudrait le convertir en epub. Il se dirige vers
Unsupported
-----------
Known bugs
----------
Problème dans le hdoc : contient des images mais ne sont pas référencées.
Problème avec les keywords : le hdoc contient des keywords mais nous ne savons pas où les utiliser.
Todo list
---------
Validation de l'epub actuel
Mise en place de la première page
Validation
Étude compatibilité entre EPUB2 et EPUB3
Technical Notes
---------------
<?xml version="1.0"?>
<container xmlns="urn:oasis:names:tc:opendocument:xmlns:container" version="1.0">
<rootfiles>
<rootfile full-path="oebps/content.opf" media-type="application/oebps-package+xml"/>
</rootfiles>
</container>
application/epub+zip
......@@ -7,8 +7,12 @@
<property name="in" location="${basedir}/input"/>
<property name="out" location="${basedir}/output"/>
<property name="tmp" location="${basedir}/tmp"/>
<property name="xsl" location="${basedir}/xsl"/>
<property name="file" location="${basedir}/file"/>
<property name="schema" location="${basedir}/schema"/>
<property name="lib" location="${basedir}/lib"/>
<property name="tmpRetour" location="${basedir}/tmp/retour"/>
<target name="convert">
<delete dir="${tmp}" failonerror="false"/>
<sleep seconds="1"/>
......@@ -17,6 +21,8 @@
<delete dir="${out}" failonerror="false"/>
<sleep seconds="1"/>
<mkdir dir="${out}"/>
<mkdir dir="${tmpRetour}"/>
<for param="inputFile">
<path>
......@@ -26,7 +32,7 @@
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<antcall target="UnzipHdocFile">
<param name="filename" value="${filename}"/>
</antcall>
......@@ -34,6 +40,28 @@
<antcall target="ValidateInput">
<param name="filename" value="${filename}"/>
</antcall>
<antcall target="MakeOCF">
<param name="filename" value="${filename}"/>
</antcall>
<antcall target="MakeOPF">
<param name="filename" value="${filename}"/>
</antcall>
<antcall target="MakeOPS">
<param name="filename" value="${filename}"/>
</antcall>
<antcall target="ZipEpubFile">
<param name="filename" value="${filename}"/>
</antcall>
<antcall target="ValidateOutput">
<param name="filename" value="${filename}"/>
</antcall>
<antcall target="CleanTmp"/>
</sequential>
</for>
</target>
......@@ -57,4 +85,50 @@
</trycatch>
</target>
<target name="MakeOCF">
<mkdir dir="${tmpRetour}/META-INF"/>
<mkdir dir="${tmpRetour}/oebps"/>
<copy file="${file}/mimetype" todir="${tmpRetour}"/>
<copy file="${file}/container.xml" todir="${tmpRetour}/META-INF"/>
</target>
<target name="MakeOPF">
<xslt in="${tmp}/${filename}/decompressedHdoc/content.xml" out="${tmpRetour}/oebps/content.opf" classpath="${lib}/saxon9he.jar" style="${xsl}/content.xsl"/>
</target>
<target name="MakeOPS">
<mkdir dir="${tmpRetour}/oebps/images"/>
<copy todir="${tmpRetour}/oebps/images">
<fileset dir="${tmp}/${filename}/decompressedHdoc/re" includes="**"/>
</copy>
<mkdir dir="${tmpRetour}/oebps/styles"/>
<mkdir dir="${tmpRetour}/oebps/chapitres"/>
<xslt in="${tmp}/${filename}/decompressedHdoc/content.xml" out="${tmpRetour}/oebps/tableDesMatieres.ncx" classpath="${lib}/saxon9he.jar" style="${xsl}/tdmncx.xsl"/>
<xslt destdir="${tmpRetour}/oebps/chapitres" basedir="${tmp}/${filename}/decompressedHdoc" includes="content.xml" classpath="${lib}/saxon9he.jar" style="${xsl}/chapitre.xsl"/>
<delete file="${tmpRetour}/oebps/chapitres/content.html"/>
</target>