Commit e11606ad authored by lhommeni's avatar lhommeni

Fonctionnement avec antce de wikipedia_to_hdoc

parent d2db9473
<?xml version="1.0" encoding="UTF-8"?>
<project name="wikiToHdoc" default="main" basedir="..">
<!-- inputFile is the only required parameter to run the script -->
<property name="inputFile" value="source.xml"/>
<basename property="filename" file="${inputFile}" suffix=".xml"/>
<!-- Folder paths -->
<!-- tmp paths (removed at the end of the process) -->
<property name="tmpFolderPath" value="${basedir}/tmp"/>
<property name="tmpHdocResultFolderPath" value="${tmpFolderPath}/hdoc"/>
<!-- ant and xslt paths -->
<property name="resultFolderPath" value="${basedir}/result"/>
<property name="xsltFolderPath" value="${basedir}/xslt"/>
<!-- result path -->
<property name="hdocResultPath" value="${resultFolderPath}/${filename}"/>
<!-- Main target -->
<target name="main">
<antcall target="prepare"/>
<antcall target="prepareHdocStructure"/>
<antcall target="transformWikiToHdoc"/>
<antcall target="zip"/>
<antcall target="clean"/>
</target>
<target name="prepare">
<!-- Create tmp directory for tmp files and result directory if does not exist -->
<mkdir dir="${tmpHdocResultFolderPath}"/>
<mkdir dir="${hdocResultPath}"/>
<!-- Making tmp xml wikipedia file without some elements which are not useful -->
<xslt classpath="ant/saxon9he.jar" in="ant/${inputFile}" out="${tmpFolderPath}/${filename}_prepared.xml" style="${xsltFolderPath}/prepare_wiki_to_hdoc.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
</target>
<!-- Create the hdoc structure of the new file -->
<target name="prepareHdocStructure">
<mkdir dir="${tmpHdocResultFolderPath}"/>
<mkdir dir="${tmpHdocResultFolderPath}/META-INF" />
<touch file="${tmpHdocResultFolderPath}/META-INF/container.xml" />
<touch file="${tmpHdocResultFolderPath}/mimetype" />
<echo message="application/x-hdoc+zip" file="${tmpHdocResultFolderPath}/mimetype" />
<echoxml file="${tmpFolderPath}/containerTmp.xml">
<container version="1.0" xmlns="urn:utc.fr:ics:hdoc:container">
<rootfiles>
<rootfile full-path="content.xml" media-type="text/xml" />
</rootfiles>
</container>
</echoxml>
<xslt classpath="ant/saxon9he.jar" in="${tmpFolderPath}/containerTmp.xml" out="${tmpHdocResultFolderPath}/META-INF/container.xml" style="${xsltFolderPath}/addNamespaceToContainer.xsl"/>
</target>
<target name="transformWikiToHdoc">
<!-- Applying wikipedia -> hdoc -->
<xslt classpath="ant/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${hdocResultPath}/${filename}.html" style="${xsltFolderPath}/wiki_to_hdoc.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
<!-- Copy of the html file, hdoc needs "container.xml" file, not a html. The html file can be useful to the user -->
<copy file="${hdocResultPath}/${filename}.html" tofile="${tmpHdocResultFolderPath}/content.xml"/>
</target>
<!-- Make hdoc file -->
<target name="zip">
<zip destfile="${hdocResultPath}/${filename}.hdoc" basedir="${tmpHdocResultFolderPath}"/>
</target>
<!-- Delete tmp directory -->
<target name="clean">
<delete dir="${tmpFolderPath}"/>
</target>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project name="wikiToHdoc" default="main" basedir=".">
<!-- wikipediaUrl and destination are the only required parameters -->
<property name="wikipediaUrl" value=""/>
<property name="filename" value=""/>
<!-- Folder paths -->
<!-- tmp paths (removed at the end of the process) -->
<property name="tmpFolderPath" value="${basedir}/tmp"/>
<property name="tmpHdocResultFolderPath" value="${tmpFolderPath}/hdoc"/>
<property name="in" location="${basedir}/input"/>
<property name="resultFolderPath" value="${basedir}/result"/>
<property name="xsltFolderPath" value="${basedir}/xslt"/>
<property name="hdocResultPath" value="${resultFolderPath}/${filename}"/>
<property name="inputFile" value="${hdocResultPath}/${filename}.xml"/>
<!-- Main target -->
<target name="main">
<antcall target="prepare"/>
<antcall target="prepareHdocStructure"/>
<antcall target="transformWikiToHdoc"/>
<antcall target="zip"/>
<antcall target="clean"/>
</target>
<target name="prepare">
<!-- Create tmp directory for tmp files and result directory if does not exist -->
<mkdir dir="${tmpHdocResultFolderPath}"/>
<mkdir dir="${hdocResultPath}"/>
<!-- Fetching html from wikipedia -->
<java fork="true" failonerror="true" jar="${basedir}/javaSources/FetchWikipediaXml.jar">
<arg value="${wikipediaUrl}"/>
<arg value="${filename}"/>
</java>
<!-- Making tmp xml wikipedia file without some elements which are not useful -->
<xslt classpath="lib/saxon9he.jar" in="${inputFile}" out="${tmpFolderPath}/${filename}_prepared.xml" style="${xsltFolderPath}/prepare_wiki_to_hdoc.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
</target>
<!-- Create the hdoc structure of the new file -->
<target name="prepareHdocStructure">
<mkdir dir="${tmpHdocResultFolderPath}"/>
<mkdir dir="${tmpHdocResultFolderPath}/META-INF" />
<touch file="${tmpHdocResultFolderPath}/META-INF/container.xml" />
<touch file="${tmpHdocResultFolderPath}/mimetype" />
<echo message="application/x-hdoc+zip" file="${tmpHdocResultFolderPath}/mimetype" />
<echoxml file="${tmpFolderPath}/containerTmp.xml">
<container version="1.0" xmlns="urn:utc.fr:ics:hdoc:container">
<rootfiles>
<rootfile full-path="content.xml" media-type="text/xml" />
</rootfiles>
</container>
</echoxml>
<xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/containerTmp.xml" out="${tmpHdocResultFolderPath}/META-INF/container.xml" style="${xsltFolderPath}/addNamespaceToContainer.xsl"/>
</target>
<target name="transformWikiToHdoc">
<!-- Applying wikipedia -> hdoc -->
<xslt classpath="lib/saxon9he.jar" in="${tmpFolderPath}/${filename}_prepared.xml" out="${hdocResultPath}/${filename}.html" style="${xsltFolderPath}/wiki_to_hdoc.xsl">
<factory name="net.sf.saxon.TransformerFactoryImpl"/>
</xslt>
<!-- Copy of the html file, hdoc needs "container.xml" file, not a html. The html file can be useful to the user -->
<copy file="${hdocResultPath}/${filename}.html" tofile="${tmpHdocResultFolderPath}/content.xml"/>
</target>
<!-- Make hdoc file -->
<target name="zip">
<zip destfile="${hdocResultPath}/${filename}.hdoc" basedir="${tmpHdocResultFolderPath}"/>
</target>
<!-- Delete tmp directory -->
<target name="clean">
<delete dir="${tmpFolderPath}"/>
</target>
</project>
<?xml version="1.0" encoding="UTF-8"?>
<project name="wikiToOpale" default="main" basedir="..">
<!-- inputFile is the only required parameter to run the script -->
<property name="inputFile" value="source.xml"/>
<basename property="filename" file="${inputFile}" suffix=".xml"/>
<!-- Folder paths -->
<!-- tmp paths (removed at the end of the process) -->
<property name="tmpFolderPath" value="${basedir}/tmp"/>
<property name="tmpHdocResultFolderPath" value="${tmpFolderPath}/hdoc"/>
<!-- ant and xslt paths -->
<property name="resultFolderPath" value="${basedir}/result"/>
<property name="antFolderPath" value="${basedir}/ant"/>
<property name="xsltFolderPath" value="${basedir}/xslt"/>
<!-- result path -->
<property name="hdocResultPath" value="${resultFolderPath}/${filename}"/>
<target name="main">
<!-- First: wiki to Hdoc -->
<ant antfile="${antFolderPath}/wiki_to_hdoc.ant"/>
<!-- Then hdoc to opale (important to set the new basedir in order to make it work) -->
<ant antfile="${basedir}/hdoc_to_opale/hdoc_to_opale.ant" dir="${basedir}/hdoc_to_opale">
<property name="InputPath" value="${hdocResultPath}/${filename}.hdoc"/>
<property name="OutputPath" value="${hdocResultPath}/${filename}.scar"/>
</ant>
</target>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project name="wikiToOpale" default="main" basedir="..">
<!-- wikipediaUrl and destination are the only required parameters -->
<property name="wikipediaUrl" value=""/>
<property name="filename" value=""/>
<!-- Folder paths -->
<!-- tmp paths (removed at the end of the process) -->
<property name="tmpFolderPath" value="${basedir}/tmp"/>
<property name="tmpHdocResultFolderPath" value="${tmpFolderPath}/hdoc"/>
<!-- ant and xslt paths -->
<property name="resultFolderPath" value="${basedir}/result"/>
<property name="antFolderPath" value="${basedir}/ant"/>
<property name="xsltFolderPath" value="${basedir}/xslt"/>
<!-- result path -->
<property name="hdocResultPath" value="${resultFolderPath}/${filename}"/>
<target name="main">
<!-- First: wiki to Hdoc -->
<ant antfile="${antFolderPath}/wiki_to_hdoc_fetcher.ant"/>
<!-- Then hdoc to opale (important to set the new basedir in order to make it work) -->
<ant antfile="${basedir}/hdoc_to_opale/hdoc_to_opale.ant" dir="${basedir}/hdoc_to_opale">
<property name="InputPath" value="${hdocResultPath}/${filename}.hdoc"/>
<property name="OutputPath" value="${hdocResultPath}/${filename}.scar"/>
<property name="OutputPathDivided" value="${hdocResultPath}/${filename}_divided.scar"/>
</ant>
</target>
</project>
\ No newline at end of file
hdoc_to_opale @ 5b6cedd1
Subproject commit 5b6cedd19b2bf98a4a78c27402102f0cc3fc0099
import java.io.BufferedInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
public class FetchWikipediaXml {
public static void main(String[] args) throws IOException {
URL url = new URL(args[0]);
String destination = args[1];
System.out.println(destination);
BufferedInputStream in = null;
FileOutputStream fout = null;
in = new BufferedInputStream(url.openStream());
fout = new FileOutputStream("test.xml");
final byte data[] = new byte[1024];
int count;
while ((count = in.read(data, 0, 1024)) != -1) {
fout.write(data, 0, count);
}
if (in != null) {
in.close();
}
if (fout != null) {
fout.close();
}
}
}
import java.io.BufferedInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
public class FetchWikipediaXml {
public static void main(String[] args) throws Exception {
URL url = new URL(args[0]);
String destination = args[1];
System.out.println("URL "+args[0]);
System.out.println("File "+args[1]);
if(destination.length() == 0 || !args[0].contains("wikipedia.org/wiki"))
throw new Exception("Received invalid parameters");
BufferedInputStream in = null;
FileOutputStream fout = null;
in = new BufferedInputStream(url.openStream());
fout = new FileOutputStream("result/" + destination + "/" + destination + ".xml");
final byte data[] = new byte[1024];
int count;
while ((count = in.read(data, 0, 1024)) != -1) {
fout.write(data, 0, count);
}
if (in != null) {
in.close();
}
if (fout != null) {
fout.close();
}
}
}
<?xml version="1.0" encoding="UTF-8"?>
<project basedir="." name="myantce">
<property name="in" location="${basedir}/input"/>
<property name="out" location="${basedir}/output"/>
<property name="tmp" location="${basedir}/tmp"/>
<property name="xsl" location="${basedir}/xsl"/>
<property name="lib" location="${basedir}/lib"/>
<property name="log" location="${basedir}/log"/>
<echo message="Hello World !"/>
</project>
\ No newline at end of file
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
@echo off
set lib=lib
set ant=ant/wiki_to_opale_fetcher.ant
set url=%1
set filename=%2
set scJarList=%lib%\*
java.exe -classpath "%scJarList%" -Xmx150m org.apache.tools.ant.Main -buildfile %ant% -DwikipediaUrl %url% -Dfilename %filename%
pause
REM start /MIN java.exe -classpath "%scJarList%" -Xmx150m org.apache.tools.ant.Main -buildfile %ant% %antparam%
#!/bin/sh
lib="lib"
ant="myantce.ant"
antparam="-Dprogram.param=$1"
#Recherche de java et controle que se soit une version SUN
vJavaCmd="java"
xCheckJava () {
vInputVarName=\$"$1"
vInputVarVal=`eval "expr \"$vInputVarName\" "`
if [ -z "$vInputVarVal" ];then
eval "$1=false"
return
fi
vSunJavaFound=`$vInputVarVal -version 2>&1 | grep -Eo -m 1 "(HotSpot)|(OpenJDK)"`
if [ "$vSunJavaFound" != "HotSpot" ] && [ "$vSunJavaFound" != "OpenJDK" ] ; then
eval "$1=false"
return
fi
}
xCheckJava vJavaCmd
if [ "$vJavaCmd" = "false" ]; then
vJavaCmd="$JAVA_HOME/bin/java"
xCheckJava vJavaCmd
if [ "$vJavaCmd" = "false" ]; then
echo "ERREUR: JRE de SUN introuvable. Veuillez déclarer la variable d'environnement JAVA_HOME."
exit 1
fi
fi
#Lancer la commande
scJarList="$lib/*"
$vJavaCmd -classpath "$scJarList:" -Xmx150m org.apache.tools.ant.Main -buildfile $ant $antparam
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
exclude-result-prefixes="xs"
version="2.0"
xmlns="urn:utc.fr:ics:hdoc:container">
<xsl:output method="xml" indent="yes"/>
<!-- Identity transformation -->
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<!-- Namespace substitution for hdoc elements -->
<xsl:template match="*" priority="1">
<xsl:element name="{local-name()}">
<xsl:apply-templates select="node()|@*"/>
</xsl:element>
</xsl:template>
<xsl:template match="container" priority="2">
<xsl:processing-instruction name="oxygen">RNGSchema="http://scenari.utc.fr/hdoc/schemas/container/hdoc1-container.rng" type="xml"</xsl:processing-instruction>
<xsl:element name="{local-name()}">
<xsl:apply-templates select="node()|@*"/>
</xsl:element>
</xsl:template>
<!-- Suppress processing-instructions -->
<xsl:template match="processing-instruction()" priority="1"/>
</xsl:stylesheet>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs" version="2.0">
<xsl:output method="xml" indent="yes"/>
<xsl:template match="node()|@*">
<xsl:copy>
<xsl:apply-templates select="node()|@*"/>
</xsl:copy>
</xsl:template>
<!-- Scripts are not useful to us -->
<xsl:template match="script"/>
<!-- Ignoring divs that are not useful and that might interfere with the true xslt transformation -->
<xsl:template match="/html/body/div/div/div/div"/>
</xsl:stylesheet>
\ No newline at end of file
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment