Commit f194c1a3 authored by Gregory's avatar Gregory

Begin JSON transformations.

Update hdoc_to_elasticSearch to delete temp files and create a JSON
Add first XSL tranformations to extract <head> data (author, rights,
date, keywords)
parent 6218c009
libdir=${basedir}/lib libdir=${basedir}/lib
xsldir=${basedir}/xsl xsldir=${basedir}/xsl
inputPath=${basedir}/input inputPath=${basedir}/input
outputPath=${basedir}/output tmpHdoc=${basedir}/tmp_hdoc
\ No newline at end of file outputPath=${basedir}/output
<project name="hdoc_to_elasticsearch" default="unzipSource"> <project name="hdoc_to_elasticsearch" default="clean">
<property file=""/> <property file=""/>
<target name="unzipSource">
<target name="unzipSource">
<echo message="Conversion begin" /> <echo message="Conversion begin" />
<path id="hdocDirPath"> <path id="hdocDirPath">
<fileset dir="${inputPath}" id="hdocFile"> <fileset dir="${inputPath}" id="hdocFile">
<include name="*.hdoc"/> <include name="*.hdoc"/>
</fileset> </fileset>
</path> </path>
<property name="hdocFile" refId="hdocDirPath"/>
<property name="hdocFile" refId="hdocDirPath"/>
<basename property="hdocFileName" file="${hdocFile}" suffix=".hdoc"/> <basename property="hdocFileName" file="${hdocFile}" suffix=".hdoc"/>
<echo>Entry file : ${hdocFileName}</echo>
<echo>Entry file : ${hdocFileName}</echo>
<!-- create subdirectory for each file --> <!-- create subdirectory for each file -->
<mkdir dir="${outputPath}/${hdocFileName}"/> <mkdir dir="${tmpHdoc}/${hdocFileName}"/>
<!--unzips the hdoc to be converted into the previously created directory--> <!--unzips the hdoc to be converted into the previously created directory-->
<unzip dest="${outputPath}/${hdocFileName}"> <unzip dest="${tmpHdoc}/${hdocFileName}">
<fileset dir="${inputPath}" id="hdocFile"> <fileset dir="${inputPath}" id="hdocFile">
<include name="${hdocFileName}.hdoc"/> <include name="${hdocFileName}.hdoc"/>
</fileset> </fileset>
</unzip> </unzip>
<echo message="Conversion end" />
</target> </target>
<target name="jsonFiles" depends="unzipSource"> <target name="jsonFiles" depends="unzipSource">
<echo message="JSON File creation" /> <echo message="JSON File creation" />
<!-- TODO : create JSON & extract data --> <xslt in="${tmpHdoc}/${hdocFileName}/content.xml" out="${outputPath}/${hdocFileName}.json" style="xsl/content_extractor.xsl"/>
</target> </target>
<!-- Clear tmp files --> <!-- Clear tmp files -->
<target name="clean" depends="jsonFiles"> <target name="clean" depends="jsonFiles">
<echo message="Clean files" />
</target> <!-- -->
<delete includeemptydirs="true" verbose="true">
<fileset dir="${tmpHdoc}"> </fileset>
<dirset dir="${tmpHdoc}" includes="**/*" />
<echo message="Conversion end" />
<echo message="Conversion end" />
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl=""
<xsl:output omit-xml-declaration="yes"
<!-- Encodage UTF-8 pour caractères spéciaux (accents...) -->
<!-- -->
<xsl:template match="/" >
<xsl:apply-templates select="html"/>
<!-- Extraction des informations du head -->
<xsl:template match="head">
"title" : "<xsl:value-of select="title/text()"/>"
"keywords" : [
<xsl:for-each select="meta[@name='keywords']">
"<xsl:value-of select="@content"/>"<xsl:if test="position() != last()">,</xsl:if>
<xsl:apply-templates select="meta"/>
<!-- Extraction des informations du body -->
<xsl:template match="body" />
<xsl:template match="meta[@name='author']">
"author" : "<xsl:value-of select="@content"/>"
<xsl:template match="meta[@name='date']">
"date" : "<xsl:value-of select="@content"/>"
<xsl:template match="meta[@name='rights']">
"rights" : "<xsl:value-of select="@content"/>"
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment