html2xhtml.xsl 2.36 KB
Newer Older
1
2
3
4
5
6
<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="xs"
    xmlns:regexp="http://exslt.org/regular-expressions" extension-element-prefixes="regexp">
    
    
jcomedouteau's avatar
jcomedouteau committed
7
    <xsl:output method="xml" indent="yes" />
8
9
10
    <xsl:strip-space elements="*"/>
    
    <xsl:param name="text-encoding" as="xs:string" select="'utf-8'"/>
11
    <xsl:param name="text-uri" as="xs:string" select="'../input/pad.html'"/>
12
13
14
    
    <xsl:template name="text2xml">
        <xsl:variable name="text" select="unparsed-text($text-uri, $text-encoding)"/>
15
        <xsl:analyze-string select="$text" flags="s" regex=".*title.(.*?)./title>.*.style.*/style.*body.(.*)./body">    
Jean Vintache's avatar
Jean Vintache committed
16
            <xsl:matching-substring>
17
18
19
20
21
22
                <head>
                    <title>
                        <xsl:value-of select="normalize-space(regex-group(1))"/>
                    </title>
                    <meta charset="utf-8"/>
                </head>
qaomia's avatar
fixes  
qaomia committed
23
24
                <body>
                    <div>
25
                        <p>
qaomia's avatar
fixes  
qaomia committed
26
                            <xsl:value-of select="replace(normalize-space(regex-group(2)), '&lt;br&gt;', '&lt;br/&gt;')" disable-output-escaping="yes"/>
27
                        </p>
qaomia's avatar
fixes  
qaomia committed
28
                    </div>
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
                </body>
            </xsl:matching-substring>
        </xsl:analyze-string>
        
    </xsl:template>
    
    
    <xsl:template match="/">
        <html lang="en">
            <xsl:choose>
                <xsl:when test="unparsed-text-available($text-uri, $text-encoding)">
                    <xsl:call-template name="text2xml"/>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:variable name="error">
                        <xsl:text>Error reading "</xsl:text>
                        <xsl:value-of select="$text-uri"/>
                        <xsl:text>" (encoding "</xsl:text>
                        <xsl:value-of select="$text-encoding"/>
                        <xsl:text>").</xsl:text>
                    </xsl:variable>
                    <xsl:message>
                        <xsl:value-of select="$error"/>
                    </xsl:message>
                    <xsl:value-of select="$error"/>
                </xsl:otherwise>
            </xsl:choose>
        </html>
    </xsl:template>
    
</xsl:stylesheet>