Commit d49f64e7 authored by Tyrannas's avatar Tyrannas

Merge branch 'master' of https://gitlab.utc.fr/crozatst/hdoc

parents 909d5a50 2069b53b
......@@ -29,6 +29,17 @@ This project can be used alone if you want to import an HDOC file into basex.
- In a certain section, by searching a key word, user can obtain paragraphes that contain it.
- User can get definitions related to a keyword
- User can get examples whose titles contain a keyword
[Step 1]
- Unzip works, creation of a XML file with the title of the hdoc. (little problem, all of the document is in the output).
- the file created is in the tmp/{hdocfile} directory.
\ No newline at end of file
Step by step :
[Step 1 : File transformation]
- Put the files .hdoc you want to deal with in the folder [input](https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/input)
- Run the transformation progam (Win : double click run.bat, Linux : execute run.sh)
[Step 2 : Create data base in basex]
- Download and install [BaseX](http://basex.org/products/download/all-downloads/)
- Run BasexGui
- In the Text Editor of BaseX, open the command script "createbd.bxs" in folder [/basex/command] (https://gitlab.utc.fr/crozatst/hdoc/tree/master/hdoc_to_basex/basex/command).
Follow the instructions in the script, and then execute it.
[Step 3 : Make XQuery request]
-
\ No newline at end of file
# This script allows for creation of a data base in baseX from the xml files in the folder output
# Before executing this script, please make sure that you have successfully finished the transformation in the step 1 in the "user story" of the README.md
# Syntax : CREATE DB [name] ([input])
# Example :
CREATE DB myDB D:\School\UTC\GI04\NF29\Projet\hdoc\hdoc_to_basex\output
#CREATE DB myDB [..the repertory of the projet in your local disk..]/hdoc/hdoc_to_basex/output
# P.S. If you get error message : Resource "..." not found, please confirm your file repertory is correct
\ No newline at end of file
(: This script will return documents by searching one/several keyword in their title :)
(: We can assign a Regular Expression to the variable $name :)
(: For example, $name := '^NF29_HdocEtherpad$', to search for an exact name:)
(: For example, $name := 'NF29', to search for documents whose name contains 'NF29':)
let $name := '^NF29_HdocEtherpad$'
for $doc in //document
where matches($doc/titre, $name, "i")
return $doc
\ No newline at end of file
......@@ -40,6 +40,7 @@
<delete dir="${tmp}" failonerror="false"/>
</target>
<target name="UnzipHdocFile">
<!-- Unzip the input hdoc file. Decompressed folder is named "decompressedHdoc" : this name is the only one which
refers to the hdoc file furthermore in this project. -->
......@@ -49,6 +50,7 @@
</target>
<target name="content" >
<!-- Transformation of a xml file decompressed from hdoc file to data xml file to be imported to basex.
The transformation will be done in terms of the xsl file in ${xsl}.-->
<xslt in="${tmp}/${filename}/decompressedHdoc/content.xml" out="${out}/${filename}_data.xml" style="${xsl}/transformation.xsl" processor="org.apache.tools.ant.taskdefs.optional.TraXLiaison">
......@@ -56,6 +58,7 @@
<param name="lib" expression="${lib}"/>
</xslt>
<echo message="${filename}" />
</target>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:h="http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes="xs"
version="2.0">
<xsl:template match="h:html">
<xsl:apply-templates mode="title"></xsl:apply-templates>
</xsl:template>
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:h="http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes="xs" version="2.0">
<xsl:template match="h:html">
<document>
<xsl:apply-templates mode="title"/>
</document>
</xsl:template>
<xsl:template match="h:head" mode="title">
<titre><xsl:value-of select="./h:title"/></titre>
<titre>
<xsl:value-of select="./h:title"/>
</titre>
<xsl:apply-templates select="./h:meta"/>
</xsl:template>
<xsl:template match="h:meta[@name='author']">
<authors>
<xsl:for-each select="tokenize(@content,', \d')">
<xsl:if test="position() = 1">
<xsl:for-each select="tokenize(.,', ')">
<author>
<xsl:value-of select="."/>
</author>
</xsl:for-each>
</xsl:if>
</xsl:for-each>
</authors>
</xsl:template>
<xsl:template match="h:body" mode="title">
</xsl:template>
<sections>
<xsl:apply-templates select="./h:section"/>
</sections>
</xsl:template>
<xsl:template match="h:section">
<section>
<titresection>
<xsl:value-of select="./h:header/h:h1"/>
</titresection>
<contenu>
<paragraphes>
<xsl:apply-templates select="h:div"/>
</paragraphes>
<xsl:apply-templates select="h:section" mode="soussect"/>
</contenu>
</section>
</xsl:template>
<xsl:template match="h:section" mode="soussect">
<soussection></soussection>
</xsl:template>
<xsl:template match="h:div" >
<xsl:if test="h:p">
<paragraphe>
<xsl:value-of select="."/>
</paragraphe>
</xsl:if>
<xsl:if test="h:ul">
<xsl:apply-templates select="h:ul"></xsl:apply-templates>
</xsl:if>
</xsl:template>
<xsl:template match="h:ul">
<xsl:for-each select="h:li">
<paragraphe>
<xsl:value-of select="h:p"/>
</paragraphe>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
</xsl:stylesheet>
<project name="hdoc_to_optim" default="unzipSource">
<project name="hdoc_to_optim" default="main">
<property file="build.properties"/>
......@@ -22,9 +22,11 @@
<unzip dest="${hdocdir}" src ="${hdocFilePath}"/>
</target>
<target name="main" depends="unzipSource">
<xslt in="${hdocdir}/content.xml" out="${outputPath}/result.json" style="xsl/main.xsl"/>
</target>
<target name="clean" depends="unzipSource">
<delete file="get_content.ant"/>
<delete file="get_ressources.ant"/>
<delete dir="${hdocdir}" />
</target>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
[
1,
2,
3
]
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="array.xsl"?>
<root>
<ul>
<li value="1"/>
<li value="2"/>
<li value="3"/>
</ul>
</root>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xpath-default-namespace="http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes="xs"
version="2.0">
<xsl:import href="subroutines/title.xsl"/>
<xsl:import href="subroutines/author.xsl"/>
<xsl:import href="subroutines/keyword.xsl"/>
<xsl:import href="subroutines/right.xsl"/>
<xsl:output method="text" encoding="UTF-8" omit-xml-declaration="yes"/>
<xsl:template match="/">
{
"title" : <xsl:call-template name="title-main"/>,
"authors" : <xsl:call-template name="author-main"/>,
"keywords" : <xsl:call-template name="keyword-main"/>,
"rights" : <xsl:call-template name="right-main"/>
}
</xsl:template>
<xsl:template match="text()"/>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<?target hdoc/content.xml ?>
<?main author-main?>
<xsl:stylesheet version="2.0" xpath-default-namespace="http://www.utc.fr/ics/hdoc/xhtml"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:h2m="http://www.utc.fr/hdoc/hdoc_to_mongo"
xmlns:h2ma="http://www.utc.fr/hdoc/hdoc_to_mongo/author">
<xsl:import href="../xsl-import/array.xsl"/>
<xsl:template match="text()"></xsl:template>
<xsl:template match="/html/head">
authors: "<xsl:value-of select="meta[@name='author']/@content"/>"
xmlns:h2m="http://www.utc.fr/hdoc/hdoc_to_mongo">
<xsl:import href="../xsl-import/string.xsl"/>
<xsl:template name="author-main">
<xsl:value-of select="h2m:escape-string(/html/head/meta[@name='author']/@content)"/>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<?target hdoc/content.xml ?>
<?main author-main?>
<xsl:stylesheet version="2.0" xpath-default-namespace="http://www.utc.fr/ics/hdoc/xhtml"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:h2m="http://www.utc.fr/hdoc/hdoc_to_mongo"
xmlns:h2ma="http://www.utc.fr/hdoc/hdoc_to_mongo/author">
xmlns:h2m="http://www.utc.fr/hdoc/hdoc_to_mongo">
<xsl:import href="../xsl-import/array.xsl"/>
<xsl:template match="text()"></xsl:template>
<xsl:template match="/html/head">
keywords: <xsl:value-of select="h2m:array(meta[@name='keywords']/@content)"/>
<xsl:template name="keyword-main">
<xsl:value-of select="h2m:array(/html/head/meta[@name='keywords']/@content)"/>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<?target hdoc/content.xml ?>
<xsl:stylesheet version="2.0" xpath-default-namespace="http://www.utc.fr/ics/hdoc/xhtml"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:h2m="http://www.utc.fr/hdoc/hdoc_to_mongo">
<xsl:import href="../xsl-import/string.xsl"/>
<xsl:template name="right-main">
<xsl:value-of select="h2m:escape-string(/html/head/meta[@name='rights']/@content)"/>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<?target hdoc/content.xml ?>
<xsl:stylesheet version="2.0" xpath-default-namespace="http://www.utc.fr/ics/hdoc/xhtml"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:h2m="http://www.utc.fr/hdoc/hdoc_to_mongo">
<xsl:import href="../xsl-import/string.xsl"/>
<xsl:template name="title-main">
<xsl:choose>
<xsl:when test="../section">
<xsl:call-template name="title-section"></xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="title-rootDocument"></xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template name="title-section">
<xsl:value-of select="h2m:escape-string(header/h1/text())"/>
</xsl:template>
<xsl:template name="title-rootDocument">
<xsl:value-of select="h2m:escape-string(/html/head/title/text())"/>
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
......@@ -2,13 +2,14 @@
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:h2m="http://www.utc.fr/hdoc/hdoc_to_mongo">
<xsl:import href="../xsl-import/string.xsl"/>
<xsl:function name="h2m:array">
<xsl:param name="nodeset"/>
[
<xsl:for-each select="$nodeset[not(position() = last())]">
"<xsl:value-of select="."/>",
<xsl:value-of select="h2m:escape-string(.)"/>,
</xsl:for-each>
<xsl:value-of select="$nodeset[last()]"/>
<xsl:value-of select="h2m:escape-string($nodeset[last()])"/>
]
</xsl:function>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:h2m="http://www.utc.fr/hdoc/hdoc_to_mongo">
<xsl:function name="h2m:escape-string">
<xsl:param name="value"/>
"<xsl:value-of select="replace($value, '&quot;', '\\&quot;')"/>"
</xsl:function>
</xsl:stylesheet>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment