Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
H
hdoc
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
2
Merge Requests
2
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Stephane Crozat
hdoc
Commits
25142265
Commit
25142265
authored
Jan 11, 2017
by
Vincent Keller
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update readme
parent
25b43161
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
120 additions
and
94 deletions
+120
-94
hdoc_to_elasticSearch/README.md
hdoc_to_elasticSearch/README.md
+27
-1
hdoc_to_elasticSearch/xsl/content_extractor.xsl
hdoc_to_elasticSearch/xsl/content_extractor.xsl
+93
-93
No files found.
hdoc_to_elasticSearch/README.md
View file @
25142265
...
...
@@ -23,4 +23,30 @@ No depedencies
Pour utiliser ce convertisseur, veuillez suivre les étapes suivantes :
1 - Placer le(s) fichier(s) hdoc que vous souhaitez convertir dans le dossier input (des exemples sont fournis dans le dossier sample).
2 - Lancer l'un des executable run.bat ou run.sh en fonction du système d'exploitation de votre ordinateur.
3 - Récupérer le(s) résultat(s) dans le dossier output.
\ No newline at end of file
3 - Récupérer le(s) résultat(s) au format .json dans le dossier output.
##Conversion
------------
Ce module permet :
-
L'extraction des données du header
-
Titre
-
Auteurs
-
Mots clés
-
Droits
-
L'extraction des exercices conformément aux attentes de Kibana :
-
Récupération des exercices et affectation d'un ID unique.
-
Association des questions aux exercices grâce aux ID générés précédemment.
##TODO
------
-
Prendre en compte les futures modification de la conversion opale_to_hdoc pour les exercices.
-
Ajouter la gestion des sections sachant que Kibana ne considère pas les tableaux d'objets JSON. Pour cela, la solution que nous avons retenu :
-
Générer des ID via la function generate-id() pour produire une clé de la forme section_ID.
-
Ajouter une références vers l'id de la section mère dans chacun des fils. (Ex : "parent" : "section_ID")
##Technical notes
-----------------
Les tabulations lors de la conversion en JSON sont déconseillés car cela pose problème avec Kibana.
\ No newline at end of file
hdoc_to_elasticSearch/xsl/content_extractor.xsl
View file @
25142265
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
xmlns:xsl=
"http://www.w3.org/1999/XSL/Transform"
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
xpath-default-namespace=
"http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes=
"xs"
version=
"2.0"
>
<xsl:output
omit-xml-declaration=
"yes"
encoding=
"UTF-8"
>
<!-- Encodage UTF-8 pour caractères spéciaux (accents...) -->
</xsl:output>
<!-- On remplace les quotes dans le texte pour éviter les problèmes de JSON -->
<xsl:param
name=
"pPattern"
>
"
</xsl:param>
<xsl:param
name=
"pReplacement"
>
\\"
</xsl:param>
<!-- -->
<xsl:template
match=
"/"
>
{
<xsl:apply-templates
select=
"html"
/>
}
</xsl:template>
<!-- Extraction des informations du head -->
<xsl:template
match=
"head"
>
"title" : "
<xsl:value-of
select=
"title/text(
)"
/>
",
"keywords" : [
<xsl:for-each
select=
"meta[@name='keywords']"
>
"
<xsl:value-of
select=
"@content"
/>
"
<xsl:if
test=
"position() != last()"
>
,
</xsl:if>
</xsl:for-each>
],
<xsl:apply-templates
select=
"meta"
/>
</xsl:template>
<!-- Extraction des informations du body -->
<xsl:template
match=
"body"
>
<!-- Extraction des sections -->
<xsl:apply-templates
select=
"*/section[@data-hdoc-type='exercise']"
/>
</xsl:template>
<xsl:template
match=
"meta[@name='author']"
>
"author" : "
<xsl:value-of
select=
"@content"
/>
",
</xsl:template>
<xsl:template
match=
"meta[@name='date']"
>
"date" : "
<xsl:value-of
select=
"@content"
/>
",
</xsl:template>
<xsl:template
match=
"meta[@name='rights']"
>
"rights" : "
<xsl:value-of
select=
"@content"
/>
",
</xsl:template>
<!-- Traitement des sections -->
<xsl:template
match=
"section[not(@data-hdoc-type='exercise')]"
/>
<xsl:template
match=
"section[@data-hdoc-type='exercise']"
>
"exercice_
<xsl:value-of
select=
"generate-id()"
/>
" : {
"titre" : "
<xsl:value-of
select=
"normalize-space(replace(header/h1/text(),$pPattern,$pReplacement))"
/>
",
<xsl:apply-templates
select=
"div[@data-hdoc-type='description']"
/>
}
<xsl:if
test=
". != (//section[@data-hdoc-type='exercise'])[last()] or div[@data-hdoc-type='question']"
>
,
</xsl:if>
<!-- On test si c'est le dernier exercice et qu'il n'a pas de question -->
<xsl:apply-templates
select=
"div[@data-hdoc-type='question']"
>
<xsl:with-param
name=
"prev_id"
select=
"generate-id()"
/>
<xsl:with-param
name=
"is_not_last"
select=
". != (//section[@data-hdoc-type='exercise'])[last()]"
/>
</xsl:apply-templates>
</xsl:template>
<!-- Traitement des question -->
<xsl:template
match=
"div[@data-hdoc-type='question']"
>
<xsl:param
name=
"prev_id"
/>
<xsl:param
name=
"is_not_last"
/>
"question_
<xsl:value-of
select=
"generate-id()"
/>
" : {
"parent" : "exercice_
<xsl:value-of
select=
"$prev_id"
/>
",
<xsl:apply-templates
select=
"div[@data-hdoc-type='description']"
/>
<xsl:apply-templates
select=
"div[@data-hdoc-type='solution']"
/>
}
<xsl:if
test=
"((position() != last()) or $is_not_last)"
>
,
</xsl:if>
<!-- On test si c'est le dernier exercice et la dernière question -->
</xsl:template>
<!-- Traitement de l'élément description -->
<xsl:template
match=
"div[@data-hdoc-type='description']"
>
"description" : "
<xsl:value-of
select=
"normalize-space(replace(./text(),$pPattern,$pReplacement))"
/>
"
</xsl:template>
<!-- Traitement de l'élément solution -->
<xsl:template
match=
"div[@data-hdoc-type='solution']"
>
,"solution" : "
<xsl:value-of
select=
"normalize-space(replace(./text(),$pPattern,$pReplacement))"
/>
"
</xsl:template>
xmlns:xs=
"http://www.w3.org/2001/XMLSchema"
xpath-default-namespace=
"http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes=
"xs"
version=
"2.0"
>
<xsl:output
omit-xml-declaration=
"yes"
encoding=
"UTF-8"
>
<!-- Encodage UTF-8 pour caractères spéciaux (accents...) -->
</xsl:output>
<!-- On remplace les quotes dans le texte pour éviter les problèmes de JSON -->
<xsl:param
name=
"pPattern"
>
"
</xsl:param>
<xsl:param
name=
"pReplacement"
>
\\"
</xsl:param>
<!-- -->
<xsl:template
match=
"/"
>
{
<xsl:apply-templates
select=
"html"
/>
}
</xsl:template>
<!-- Extraction des informations du head -->
<xsl:template
match=
"head"
>
"title" : "
<xsl:value-of
select=
"normalize-space(title/text()
)"
/>
",
"keywords" : [
<xsl:for-each
select=
"meta[@name='keywords']"
>
"
<xsl:value-of
select=
"@content"
/>
"
<xsl:if
test=
"position() != last()"
>
,
</xsl:if>
</xsl:for-each>
],
<xsl:apply-templates
select=
"meta"
/>
</xsl:template>
<!-- Extraction des informations du body -->
<xsl:template
match=
"body"
>
<!-- Extraction des sections -->
<xsl:apply-templates
select=
"*/section[@data-hdoc-type='exercise']"
/>
</xsl:template>
<xsl:template
match=
"meta[@name='author']"
>
"author" : "
<xsl:value-of
select=
"normalize-space(@content)"
/>
",
</xsl:template>
<xsl:template
match=
"meta[@name='date']"
>
"date" : "
<xsl:value-of
select=
"normalize-space(@content)"
/>
",
</xsl:template>
<xsl:template
match=
"meta[@name='rights']"
>
"rights" : "
<xsl:value-of
select=
"normalize-space(@content)"
/>
",
</xsl:template>
<!-- Traitement des sections -->
<xsl:template
match=
"section[not(@data-hdoc-type='exercise')]"
/>
<xsl:template
match=
"section[@data-hdoc-type='exercise']"
>
"exercice_
<xsl:value-of
select=
"generate-id()"
/>
" : {
"titre" : "
<xsl:value-of
select=
"normalize-space(replace(header/h1/text(),$pPattern,$pReplacement))"
/>
",
<xsl:apply-templates
select=
"div[@data-hdoc-type='description']"
/>
}
<xsl:if
test=
". != (//section[@data-hdoc-type='exercise'])[last()] or div[@data-hdoc-type='question']"
>
,
</xsl:if>
<!-- On test si c'est le dernier exercice et qu'il n'a pas de question -->
<xsl:apply-templates
select=
"div[@data-hdoc-type='question']"
>
<xsl:with-param
name=
"prev_id"
select=
"generate-id()"
/>
<xsl:with-param
name=
"is_not_last"
select=
". != (//section[@data-hdoc-type='exercise'])[last()]"
/>
</xsl:apply-templates>
</xsl:template>
<!-- Traitement des question -->
<xsl:template
match=
"div[@data-hdoc-type='question']"
>
<xsl:param
name=
"prev_id"
/>
<xsl:param
name=
"is_not_last"
/>
"question_
<xsl:value-of
select=
"generate-id()"
/>
" : {
"parent" : "exercice_
<xsl:value-of
select=
"$prev_id"
/>
",
<xsl:apply-templates
select=
"div[@data-hdoc-type='description']"
/>
<xsl:apply-templates
select=
"div[@data-hdoc-type='solution']"
/>
}
<xsl:if
test=
"((position() != last()) or $is_not_last)"
>
,
</xsl:if>
<!-- On test si c'est le dernier exercice et la dernière question -->
</xsl:template>
<!-- Traitement de l'élément description -->
<xsl:template
match=
"div[@data-hdoc-type='description']"
>
"description" : "
<xsl:value-of
select=
"normalize-space(replace(./text(),$pPattern,$pReplacement))"
/>
"
</xsl:template>
<!-- Traitement de l'élément solution -->
<xsl:template
match=
"div[@data-hdoc-type='solution']"
>
,"solution" : "
<xsl:value-of
select=
"normalize-space(replace(./text(),$pPattern,$pReplacement))"
/>
"
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment