-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathexpand_bidix.xsl
112 lines (104 loc) · 3.86 KB
/
expand_bidix.xsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
<?xml version="1.0"?><!-- -*- nxml -*- -->
<!--
Automatically add entries to eng-deu bidix
1) for all nouns a lowercased variant is added to allow decompounding.
Given:
<e r="RL"><p><l>area<s n="n"/></l><r>Bereich<s n="n"/><s n="m"/></r></p></e>
Will be added:
<e r="RL"><p><l>area<s n="n"/></l><r>bereich<s n="n"/><s n="m"/></r></p></e>
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" encoding="UTF-8"/>
<xsl:variable name="lowercase" select="'abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿžšœ'" />
<xsl:variable name="uppercase" select="'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞŸŽŠŒ'" />
<xsl:template match="alphabet|sdefs">
<xsl:copy-of select="."/>
</xsl:template>
<xsl:template match="section|e|p|l|r|s|b|g|re">
<xsl:copy select="."> <!-- copy the element name -->
<xsl:copy-of select="@*"/> <!-- copy the element attributes -->
<xsl:apply-templates/> <!-- recurse -->
</xsl:copy>
</xsl:template>
<xsl:template match="e">
<xsl:copy-of select="."/>
<xsl:if test="p/r/s[@n='n'] and not(p/r/s[@n='acr'])">
<xsl:choose>
<xsl:when test="count(p/r/text()) > 1 or p/r/b or p/r/g">
<!--
skip the cases where R part is a phrase:
adoptiertes<b/>Kind
Paar<g><b/>Handschuhe
-->
</xsl:when>
<xsl:when test="contains(p/r/text(), '-') or contains(p/r/text(), '.')">
<!--
skip words with punctuation inside:
US-Präsident
TODO: CO2, Na+
Unfortunately, xsltproc does not support XPath v.2 with regexpressions
-->
</xsl:when>
<xsl:when test="@r='LR'">
<!--
skip entries with LR restriction (valid only for English to German):
ex: eventuality/Möglichkeit
-->
</xsl:when>
<xsl:otherwise>
<!--
hopefully, here we have only words /^[[:upper:]][[:lower:]]+$/
TODO: the following should also be excluded
BH, CD, DDR, DJ, GI, GSoC, http, https, kg, BahnCard, SMS, TV
-->
<xsl:text>
 </xsl:text> <!-- insert \n and some indentation spaces -->
<e r="RL"> <!-- TODO: temporarily for deu-eng only -->
<!-- <xsl:copy-of select="@*"/> --><!-- copy attributes of the E element -->
<p>
<xsl:copy-of select="p/@*"/> <!-- copy attributes of the P element -->
<xsl:apply-templates select="p/l"/> <!-- copy L element -->
<r>
<xsl:copy-of select="p/r/@*"/> <!-- copy attributes of R element -->
<xsl:copy-of select="translate(p/r/text(), $uppercase, $lowercase)"/>
<xsl:apply-templates select="p/r/s"/>
<!-- <s n="lower"/> TODO breaks deu-eng -->
</r>
</p>
</e>
<!-- add a comment to mark the added entry -->
<xsl:text disable-output-escaping="yes"><!--</xsl:text> added by expand_bidix.xsl <xsl:text disable-output-escaping="yes">--></xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:if>
</xsl:template>
<xsl:template match="dictionary">
<xsl:text disable-output-escaping="yes">
<!--</xsl:text>
THIS IS AN AUTOGENERATED FILE. DO NOT EDIT.
<xsl:text disable-output-escaping="yes">-->
</xsl:text>
<dictionary>
<xsl:value-of select="string('
')"/>
<xsl:apply-templates/>
</dictionary>
</xsl:template>
<!--
Default rule that catches elements for which there is no explicit XSLT rule.
Such elements will be outputted as a comment and labeled ERROR.
If any ERRORs are present, this xls file must be extended by adding rules for
untranslated nodes.
-->
<xsl:template match="*">
<xsl:text disable-output-escaping="yes">
<!--</xsl:text>
ERROR: untranslated node: <xsl:value-of select="name()"/>
in
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:apply-templates select="node()"/>
</xsl:copy>
<xsl:text disable-output-escaping="yes">
-->
</xsl:text>
</xsl:template>
</xsl:stylesheet>