Commit f194c1a3 authored by Gregory's avatar Gregory

Begin JSON transformations.

Update hdoc_to_elasticSearch to delete temp files and create a JSON
file.
Add first XSL tranformations to extract <head> data (author, rights,
date, keywords)
parent 6218c009
libdir=${basedir}/lib
xsldir=${basedir}/xsl
inputPath=${basedir}/input
outputPath=${basedir}/output
\ No newline at end of file
tmpHdoc=${basedir}/tmp_hdoc
outputPath=${basedir}/output
<project name="hdoc_to_elasticsearch" default="unzipSource">
<property file="build.properties"/>
<target name="unzipSource">
<project name="hdoc_to_elasticsearch" default="clean">
<property file="build.properties"/>
<target name="unzipSource">
<echo message="Conversion begin" />
<path id="hdocDirPath">
<fileset dir="${inputPath}" id="hdocFile">
<include name="*.hdoc"/>
</fileset>
<include name="*.hdoc"/>
</fileset>
</path>
<property name="hdocFile" refId="hdocDirPath"/>
<property name="hdocFile" refId="hdocDirPath"/>
<basename property="hdocFileName" file="${hdocFile}" suffix=".hdoc"/>
<echo>Entry file : ${hdocFileName}</echo>
<echo>Entry file : ${hdocFileName}</echo>
<!-- create subdirectory for each file -->
<mkdir dir="${outputPath}/${hdocFileName}"/>
<!--unzips the hdoc to be converted into the previously created directory-->
<unzip dest="${outputPath}/${hdocFileName}">
<fileset dir="${inputPath}" id="hdocFile">
<include name="${hdocFileName}.hdoc"/>
</fileset>
</unzip>
<echo message="Conversion end" />
</target>
<mkdir dir="${tmpHdoc}/${hdocFileName}"/>
<!--unzips the hdoc to be converted into the previously created directory-->
<unzip dest="${tmpHdoc}/${hdocFileName}">
<fileset dir="${inputPath}" id="hdocFile">
<include name="${hdocFileName}.hdoc"/>
</fileset>
</unzip>
</target>
<target name="jsonFiles" depends="unzipSource">
<echo message="JSON File creation" />
<!-- TODO : create JSON & extract data -->
</target>
<echo message="JSON File creation" />
<xslt in="${tmpHdoc}/${hdocFileName}/content.xml" out="${outputPath}/${hdocFileName}.json" style="xsl/content_extractor.xsl"/>
</target>
<!-- Clear tmp files -->
<target name="clean" depends="jsonFiles">
</target>
<!-- Clear tmp files -->
<target name="clean" depends="jsonFiles">
<echo message="Clean files" />
<!-- -->
<delete includeemptydirs="true" verbose="true">
<fileset dir="${tmpHdoc}"> </fileset>
<dirset dir="${tmpHdoc}" includes="**/*" />
</delete>
<echo message="Conversion end" />
</target>
</project>
<echo message="Conversion end" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xpath-default-namespace="http://www.utc.fr/ics/hdoc/xhtml"
exclude-result-prefixes="xs"
version="2.0">
<xsl:output omit-xml-declaration="yes"
encoding="UTF-8">
<!-- Encodage UTF-8 pour caractères spéciaux (accents...) -->
</xsl:output>
<!-- -->
<xsl:template match="/" >
{
<xsl:apply-templates select="html"/>
}
</xsl:template>
<!-- Extraction des informations du head -->
<xsl:template match="head">
"title" : "<xsl:value-of select="title/text()"/>"
"keywords" : [
<xsl:for-each select="meta[@name='keywords']">
"<xsl:value-of select="@content"/>"<xsl:if test="position() != last()">,</xsl:if>
</xsl:for-each>
]
<xsl:apply-templates select="meta"/>
</xsl:template>
<!-- Extraction des informations du body -->
<xsl:template match="body" />
<xsl:template match="meta[@name='author']">
"author" : "<xsl:value-of select="@content"/>"
</xsl:template>
<xsl:template match="meta[@name='date']">
"date" : "<xsl:value-of select="@content"/>"
</xsl:template>
<xsl:template match="meta[@name='rights']">
"rights" : "<xsl:value-of select="@content"/>"
</xsl:template>
</xsl:stylesheet>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment