Commit 88018ddf authored by Jean Vintache's avatar Jean Vintache

clean input and deal with br tags

parent 4a3dabab
......@@ -2,6 +2,18 @@
<project basedir="." name="myantce" default="main">
<property file="build.properties"/>
<taskdef name="htmlcleaner" classname="org.htmlcleaner.HtmlCleanerForAnt">
<classpath>
<pathelement location="lib/htmlcleaner-2.16.jar"/>
</classpath>
</taskdef>
<taskdef name="jing" classname="com.thaiopensource.relaxng.util.JingTask">
<classpath>
<pathelement location="lib/jing.jar"/>
</classpath>
</taskdef>
<target name="start">
<delete dir="${tmp}" failonerror="false"/>
<mkdir dir="${tmp}"/>
......@@ -18,6 +30,9 @@
<!-- Apply Xslt -->
<target name="apply-xslt">
<!-- <replaceregexp file="${in}/pad.html" match="&lt;br&gt;" replace="&lt;br /&gt;" flags="g" /> -->
<htmlcleaner src="${in}/pad.html" dest="${tmp}/aaaa.xml"/>
<xslt classpath="lib\saxon9he.jar" in="${xsl}/html2xhtml.xsl" out="${tmp}/tmpPad.xhtml" style="${xsl}/html2xhtml.xsl" />
<xslt in="${tmp}/tmpPad.xhtml" out="${tmp}/tozip/content.xml" style="${xsl}/xhtml2hdoc.xsl" />
</target>
......@@ -42,13 +57,7 @@
<echo message="application/x-hdoc+zip" file="${tmp}/tozip/mimetype" />
</target>
<target name="jing-hdoc">
<taskdef name="jing" classname="com.thaiopensource.relaxng.util.JingTask">
<classpath>
<pathelement location="lib/jing.jar"/>
</classpath>
</taskdef>
<target name="jing-hdoc">
<jing file="${tmp}/tozip/content.xml" rngfile="schema/xhtml/hdoc1-xhtml.rng"></jing>
<echo>content.xml respecte le schema hdoc1-xhtml.rng</echo>
......@@ -59,8 +68,12 @@
<zip basedir="${tmp}/tozip/" destfile="${out}/output.hdoc" />
</target>
<target name="main" depends="start,apply-xslt,construct_hdoc,jing-hdoc,zip">
<target name="main">
<antcall target="start"></antcall>
<antcall target="apply-xslt"></antcall>
<antcall target="construct_hdoc"></antcall>
<antcall target="jing-hdoc"></antcall>
<antcall target="zip"></antcall>
</target>
</project>
\ No newline at end of file
......@@ -8,11 +8,11 @@
<xsl:strip-space elements="*"/>
<xsl:param name="text-encoding" as="xs:string" select="'utf-8'"/>
<xsl:param name="text-uri" as="xs:string" select="'../input/pad.html'"/>
<xsl:param name="text-uri" as="xs:string" select="'../tmp/aaaa.xml'"/>
<xsl:template name="text2xml">
<xsl:variable name="text" select="unparsed-text($text-uri, $text-encoding)"/>
<xsl:analyze-string select="$text" regex=".*title.(.*).title.\n.*\n.*\n.*\n.*\n.*\n.*.body.(.*br.*)..body.">
<xsl:analyze-string select="$text" flags="s" regex=".*title.(.*?)./title>.*.style.*/style.*body.(.*)./body">
<xsl:matching-substring>
<head>
<title>
......@@ -22,10 +22,11 @@
</head>
<body>
<div>
<xsl:value-of select="replace(normalize-space(regex-group(2)), 'br', 'br/')" disable-output-escaping="yes"/>
<p>
<xsl:value-of select="replace(normalize-space(regex-group(2)), 'br /', 'br /')" disable-output-escaping="yes"/>
</p>
</div>
</body>
</xsl:matching-substring>
</xsl:analyze-string>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment