html2xhtml.xsl 2.35 KB
Newer Older
1 2 3 4 5 6
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs"
    xmlns:regexp="http://exslt.org/regular-expressions" extension-element-prefixes="regexp">
    
    
jcomedouteau's avatar
jcomedouteau committed
7
    <xsl:output method="xml" indent="yes" />
8 9 10
    <xsl:strip-space elements="*"/>
    
    <xsl:param name="text-encoding" as="xs:string" select="'utf-8'"/>
11
    <xsl:param name="text-uri" as="xs:string" select="'../tmp/pad-clean.xml'"/>
12 13 14
    
    <xsl:template name="text2xml">
        <xsl:variable name="text" select="unparsed-text($text-uri, $text-encoding)"/>
15
        <xsl:analyze-string select="$text" flags="s" regex=".*title.(.*?)./title>.*.style.*/style.*body.(.*)./body">    
Jean Vintache's avatar
Jean Vintache committed
16
            <xsl:matching-substring>
17 18 19 20 21 22
                <head>
                    <title>
                        <xsl:value-of select="normalize-space(regex-group(1))"/>
                    </title>
                    <meta charset="utf-8"/>
                </head>
qaomia's avatar
fixes  
qaomia committed
23 24
                <body>
                    <div>
25 26 27
                        <p>
                            <xsl:value-of select="replace(normalize-space(regex-group(2)), 'br /', 'br /')" disable-output-escaping="yes"/>
                        </p>
qaomia's avatar
fixes  
qaomia committed
28
                    </div>
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
                </body>
            </xsl:matching-substring>
        </xsl:analyze-string>
        
    </xsl:template>
    
    
    <xsl:template match="/">
        <html lang="en">
            <xsl:choose>
                <xsl:when test="unparsed-text-available($text-uri, $text-encoding)">
                    <xsl:call-template name="text2xml"/>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:variable name="error">
                        <xsl:text>Error reading "</xsl:text>
                        <xsl:value-of select="$text-uri"/>
                        <xsl:text>" (encoding "</xsl:text>
                        <xsl:value-of select="$text-encoding"/>
                        <xsl:text>").</xsl:text>
                    </xsl:variable>
                    <xsl:message>
                        <xsl:value-of select="$error"/>
                    </xsl:message>
                    <xsl:value-of select="$error"/>
                </xsl:otherwise>
            </xsl:choose>
        </html>
    </xsl:template>
    
</xsl:stylesheet>