Commit caa2e203 authored by Gregory's avatar Gregory
Browse files

Merge branch 'master' of https://gitlab.utc.fr/crozatst/hdoc

parents f194c1a3 349c6133
......@@ -6,8 +6,8 @@ http://www.gnu.org/licenses/gpl-3.0.txt
## Credits
- 2016
- - Etienne Chognard
- - Fabien Boucaud
- Etienne Chognard
- Fabien Boucaud
- 2015
- Jean-Côme Douteau
- Gabrielle Rit
......@@ -54,7 +54,14 @@ Example :
Currently available on: https://framemo.org/framapad_to_opale
See also : https://bimestriel.framapad.org/p/nf29_framapad_to_opale for the full documentation of our working process.
## TODO
- Nested Lists
- Indentation
- Titles (and therefore Structure of the doc)
- Coloured text
- Code
- Markdown
## Technical notes
......@@ -85,4 +92,4 @@ Currently available on: https://framemo.org/framapad_to_opale
- br -> p
## Capitalisation
Using regular expression with xsl is a good way to parse a non xml file.
\ No newline at end of file
Using regular expression with xsl is a good way to parse a non xml file.
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce" default="main">
<property file="etherpad_to_hdoc.properties"/>
<property file="framapad_to_hdoc.properties"/>
<!-- import classes -->
<taskdef resource="net/sf/antcontrib/antlib.xml"/>
<taskdef name="htmlcleaner" classname="org.htmlcleaner.HtmlCleanerForAnt"/>
......
@echo off
set lib=lib
set ant=etherpad_to_hdoc.ant
set ant=framapad_to_hdoc.ant
set antparam=-Dprogram.param=%1
set scJarList=%lib%\*
......
#!/bin/sh
lib="lib"
ant="etherpad_to_hdoc.ant"
ant="framapad_to_hdoc.ant"
antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN
......
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce" default="main">
<property file="etherpad_to_opale.properties"/>
<property file="framapad_to_opale.properties"/>
<!-- CHECK FOR OS FAMILY -->
<condition property="is_windows">
<os family="windows"/>
......@@ -24,7 +24,7 @@
<include name="*.html"/>
</fileset>
</copy>
<exec dir="../etherpad_to_hdoc" executable="run.bat"/>
<exec dir="../framapad_to_hdoc" executable="run.bat"/>
</target>
<target name="to_hdoc_unix" if="${is_unix}">
......@@ -38,8 +38,8 @@
<include name="*.html"/>
</fileset>
</copy>
<exec executable="/bin/bash" dir="../etherpad_to_hdoc">
<arg value="../etherpad_to_hdoc/run.sh"/>
<exec executable="/bin/bash" dir="../framapad_to_hdoc">
<arg value="../framapad_to_hdoc/run.sh"/>
</exec>
</target>
......@@ -90,4 +90,4 @@
<antcall target="to_opale_windows"/>
<antcall target="to_opale_end"/>
</target>
</project>
\ No newline at end of file
</project>
......@@ -5,8 +5,8 @@ xsl = ${basedir}/xsl
lib = ${basedir}/lib
log = ${basedir}/log
eth_in = ../etherpad_to_hdoc/input
eth_out = ../etherpad_to_hdoc/output
eth_in = ../framapad_to_hdoc/input
eth_out = ../framapad_to_hdoc/output
opa_in = ../hdoc_to_opale/input
opa_out = ../hdoc_to_opale/output
rootfilename = content.xml
\ No newline at end of file
rootfilename = content.xml
@echo off
set lib=lib
set ant=etherpad_to_opale.ant
set ant=framapad_to_opale.ant
set antparam=-Dprogram.param=%1
set scJarList=%lib%\*
......
#!/bin/sh
lib="lib"
ant="etherpad_to_opale.ant"
ant="framapad_to_opale.ant"
antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN
......
Converter hdoc_to_basex
-----------------------
The purpose of this converter is to obtain an XML data file suitable for importation into basex for futher XQuery requests from a HDOC file
License GPL3.0
--------------
http://www.gnu.org/licenses/gpl-3.0.txt
Credits
-------
* Simei YIN
* Baptiste MONTANGE
Dependance
----------
This project can be used alone if you want to import an HDOC file into basex.
## User stories
------------------
- Among a group of courses, user can search by title, author or keywords of the course.
- By searching a certain keyword, user can obtain the sections that contain it with their hierarchy levels in the course.
- In a certain section, by searching a key word, user can obtain paragraphes that contain it.
- User can get definitions related to a keyword
- User can get examples whose titles contain a keyword
Step by step :
[Step 1 : File transformation]
- Put the files .hdoc you want to deal with in the folder [input](https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/input)
- Run the transformation progam (Win : double click run.bat, Linux : execute run.sh)
[Step 2 : Create data base in basex]
- Download and install [BaseX](http://basex.org/products/download/all-downloads/)
- Run BasexGui
- In the Text Editor of BaseX, open the command script "createbd.bxs" in folder [/basex/command] (https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/basex/command).
Follow the instructions in the script, and then execute it.
[Step 3 : Make XQuery request]
-
\ No newline at end of file
# This script allows for creation of a data base in baseX from the xml files in the folder output
# Before executing this script, please make sure that you have successfully finished the transformation in the step 1 in the "user story" of the README.md
# Syntax : CREATE DB [name] ([input])
# Example :
CREATE DB myDB D:\School\UTC\GI04\NF29\Projet\hdoc\hdoc_to_basex\output
#CREATE DB myDB [..the repertory of the projet in your local disk..]/hdoc/hdoc_to_basex/output
# P.S. If you get error message : Resource "..." not found, please confirm your file repertory is correct
\ No newline at end of file
(: This script will return documents by searching its author :)
(: We can assign a Regular Expression to the variable $author :)
(: For example, $author := '^Baptiste Montangé$', to search for an exact name:)
(: For example, $name := 'Montangé', to search for documents whose author named Coutant:)
(: Remark : Accents in the authors' names have been taken care of :)
<documents>{
let $name := 'Montangé'
let $name_noAcc := translate($name, 'áàâäéèêëíìîïóòôöúùûü','aaaaeeeeiiiioooouuuu')
for $doc in //document
return
for $author in $doc/authors/author
let $titre := $doc/titre
let $author_noAcc := translate($author, 'áàâäéèêëíìîïóòôöúùûü','aaaaeeeeiiiioooouuuu')
where matches($author_noAcc, $name_noAcc)
group by $titre (: Avoid duplications of documents by their titre:)
return $doc
}</documents>
\ No newline at end of file
(: This script will return documents by searching one keyword in their title :)
(: We can assign a Regular Expression to the variable $name :)
(: For example, $name := '^NF29_HdocEtherpad$', to search for an exact name:)
(: For example, $name := 'NF29', to search for documents whose name contains 'NF29':)
<documents>{
let $name := '^NF29_HdocEtherpad$'
for $doc in //document
where matches($doc/titre, $name, "i")
return $doc
}</documents>
\ No newline at end of file
lib=${basedir}/lib
log=${basedir}/log
xsl=${basedir}/xsl
in=${basedir}/input
out=${basedir}/output
tmp=${basedir}/tmp
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce" default="convert">
<taskdef resource="net/sf/antcontrib/antlib.xml"/>
<property file="build.properties"/>
<target name="convert">
<!-- Preparation for the file transformation : delete old folders and create new folders -->
<mkdir dir="${tmp}"/>
<delete dir="${out}" failonerror="false"/>
<mkdir dir="${out}"/>
<delete dir="${log}" failonerror="false"/>
<mkdir dir="${log}"/>
<echo message="DEBUT"/>
<!-- Convert all the hdoc files in the directory ${in} to data xml files that will be imported to basex later.
Fonctions "UnzipHdocFile" and "content" will be called. -->
<for param="inputFile">
<path>
<fileset dir="${in}" includes="**/*.hdoc"/>
</path>
<sequential>
<local name="filename"/>
<basename property="filename" file="@{inputFile}"/>
<antcall target="UnzipHdocFile">
<param name="filename" value="${filename}"/>
</antcall>
<antcall target="content">
<param name="filename" value="${filename}"/>
</antcall>
<echo message="FIN"/>
</sequential>
</for>
<!-- Clean-->
<delete dir="${tmp}" failonerror="false"/>
</target>
<target name="UnzipHdocFile">
<!-- Unzip the input hdoc file. Decompressed folder is named "decompressedHdoc" : this name is the only one which
refers to the hdoc file furthermore in this project. -->
<unzip src="${in}/${filename}" dest="${tmp}/${filename}/decompressedHdoc"/>
<chmod dir="${tmp}/${filename}/decompressedHdoc" perm="777"/>
<echo message="${tmp}/${filename}/decompressedHdoc"/>
</target>
<target name="content" >
<!-- Transformation of a xml file decompressed from hdoc file to data xml file to be imported to basex.
The transformation will be done in terms of the xsl file in ${xsl}.-->
<xslt in="${tmp}/${filename}/decompressedHdoc/content.xml" out="${out}/${filename}_data.xml" style="${xsl}/transformation.xsl" processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison">
<param name="filename" expression="${filename}"/>
<param name="lib" expression="${lib}"/>
</xslt>
<echo message="${filename}" />
</target>
</project>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment