-
Notifications
You must be signed in to change notification settings - Fork 2
/
codiicsa.py
275 lines (228 loc) · 9.52 KB
/
codiicsa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# untitled.py
#
# Copyright 2011 Lex Trotman <lex@fred5>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301, USA.
#
#
from xml.etree.ElementTree import ElementTree
import sys
_title_ul = [ '=', '-', '~', '^', '+' ]
class Out( object ) :
""" Extendable list using + as operator, never extends with None """
def __init__( self, i = None ) :
""" If i is a list initial value is the list otherwise it is [ i ] """
if i is None : self.lst = []
elif isinstance( i, list ) : self.lst = i
else : self.lst = [ i ]
def __add__( self, i ) :
""" if i is a list extend self, else append i """
if i is None : return self
elif isinstance( i, list ) : self.lst.extend( i )
elif isinstance( i, Out ) : self.lst.extend( i.lst )
else : self.lst.append( i )
return self
def __radd__( self, i ) :
""" if i is a list prepend each element, else prepend i """
if i is None : return self
elif isinstance( i, list ) :
for a in i[:].reverse() : self.lst.insert( 0, a )
else : self.lst.insert( 0, i )
return self
def __iter__( self ) : return self.lst.__iter__()
def __len__( self ) : return len( self.lst )
def __getitem__( self, i ) : return self.lst[ i ]
class Processing ( object ) :
def __init__( self, tree, all_ids = False ) :
self.all_ids = all_ids
self._section_level = -1
self._list_level = 0
self._var_list_level = 1
self._ordered_list_level = 0
self.Parent = dict( ( c, p ) for p in tree.getiterator() for c in p)
def Pre( self, elem, inline = False, attrs = [] ) :
out = Out()
i = elem.get( 'id' )
if i is not None and ( self.all_ids or not i.startswith( '_' ) ) :
out += ( '' if inline else '\n\n') + '[[' + i + ']]' + ( '' if inline else '\n' )
r = elem.get( 'role' )
if r or attrs :
a = ', '.join( attrs )
out += '[' + a
if a and r : out += ', '
if r : out += 'role=' + r
out += ']' + ( '' if inline else '\n' )
return out
def Process( self, elem ) :
""" process the specified element, returns list of strings """
p = getattr( self, elem.tag, None )
if p is None :
out = self.Pre( elem ) + "<" + elem.tag + ">" + elem.text + self.Children( elem ) \
+ "</" + elem.tag + ">" + elem.tail
else : out = p( elem )
return out
def Children( self, elem, do = None, dont = set() ) :
""" Process children, return resulting string list
elem = element whose children are processed
do = set of strings naming tags of children to process, default None = all children
dont = set of strings naming tags of children to not process, default = empty set
tag names in both do and dont sets will be processed, ie explicit do overrides dont
a single tag name can be passed to do and dont as a string """
out = Out()
doset = do; dontset = dont
if isinstance( do, str ) : doset = set( [ do ] )
if isinstance( dont, str ) : dontset = set( [ dont ] )
for e in elem.getchildren() :
if do is None :
if e.tag not in dont : out += self.Process( e )
else :
if e.tag in do : out += self.Process( e )
return out
def Strip( self, lst, ch = None ) :
""" Strip the characters in string ch from both sides of the string
resulting from the iterable of strings lst.
Returns an Out containing a single string """
if not isinstance( lst, Out ) : lst = Out( lst )
return Out( ''.join( lst ).lstrip( ch ).rstrip( ch ) )
def Stripl( self, lst, ch = None ) :
""" Strip the characters in string ch from the left side of the string
resulting from the iterable of strings lst.
Returns an Out containing a single string """
if not isinstance( lst, Out ) : lst = Out( lst )
return Out( ''.join( lst ).lstrip( ch ) )
def Stripr( self, lst, ch = None ) :
""" Strip the characters in string ch from the right side of the string
resulting from the iterable of strings lst.
Returns an Out containing a single string """
if not isinstance( lst, Out ) : lst = Out( lst )
return Out( ''.join( lst ).rstrip( ch ) )
def Underline_title( self, elem ) :
""" If elem contains <title> create an underlined title for the current section level """
out = self.Strip( self.Children( elem, 'title' ), None )
l = len( out[0] )
return out + '\n' + ( _title_ul[ self._section_level ] * l ) + '\n\n'
def Block_title( self, elem ) :
""" If elem contains a <title> tag generate a block title """
out = self.Children( elem, do = 'title' )
if out : out = '\n\n.' + out
else : out = Out( '\n\n' )
return out
class docbook_common( Processing ) :
def __init__( self, *args ) : Processing.__init__( self, *args )
def section( self, elem ) :
self._section_level += 1
out = self.Pre( elem ) + self.Underline_title( elem ) + elem.text + self.Children( elem, dont = 'title' )
self._section_level -= 1
return out
def simpara( self, elem ) :
return self.Pre( elem ) + '\n\n' + elem.text + self.Children( elem ) + '\n\n'
def sidebar( self, elem ) :
return self.Pre( elem ) + self.Block_title( elem ) + '\n********' + elem.text \
+ self.Children( elem, dont = 'title' ) + '\n********\n\n' + elem.tail
def literal( self, elem ) :
return Out( '`' ) + elem.text + self.Children( elem ) + '`' + elem.tail
def emphasis( self, elem ) :
return Out( "'" ) + elem.text + self.Children( elem ) + "'" + elem.tail
def itemizedlist( self, elem ) :
self._list_level += 1
out = self.Pre( elem ) + self.Block_title( elem ) + self.Children( elem, dont = 'title' )
self._list_level -= 1
return out
def listitem( self, elem ) :
if self.Parent[ elem ].tag == 'itemizedlist' :
return '\n' + '*' * self._list_level + ' ' + self.Stripl( self.Children( elem ) )
if self.Parent[ elem ].tag == 'varlistentry' :
return self.Stripl( self.Children( elem ), '\n' )
if self.Parent[ elem ].tag == 'orderedlist' :
return '\n' + '.' * self._ordered_list_level + ' ' + self.Stripl( self.Children( elem ) )
return Out()
def link( self, elem ) :
return Out( '<<' ) + elem.get( 'linkend' ) + ',' + elem.text + self.Children( elem ) + '>>' + elem.tail
def variablelist( self, elem ) :
self._var_list_level += 1
out = self.Pre( elem ) + self.Block_title( elem ) + self.Children( elem, dont = 'title' )
self._var_list_level -= 1
return out
def varlistentry( self, elem ) :
return self.Pre( elem ) + self.Children( elem )
def term( self, elem ) :
return self.Pre( elem ) + self.Stripr( elem.text + self.Children( elem ) ) + ( ':' * self._var_list_level )
def important( self, elem ) :
return self.Pre( elem, attrs = [ 'IMPORTANT' ] ) + '\n========' + self.Children( elem ) + '========\n'
def footnote( self, elem ) :
return 'footnote:[' + self.Strip( self.Children( elem ) ) + ']'
def literallayout( self, elem ) :
return Out( '\n ' ) + elem.text.replace( '\n', '\n ' ) + '\n'
def orderedlist( self, elem ) :
self._ordered_list_level += 1
out = self.Pre( elem ) + self.Block_title( elem ) + self.Children( elem, dont = 'title' )
self._ordered_list_level -= 1
return out
def note( self, elem ) :
return self.Pre( elem, attrs = [ 'NOTE' ] ) + '\n========' + self.Children( elem ) + '========\n'
def screen( self, elem ) :
return self.Pre( elem ) + '\n--------\n' + elem.text + self.Children( elem ) + '\n--------\n'
def anchor( self, elem ) :
return self.Pre( elem, True )
def title( self, elem ) :
return Out( elem.text ) + self.Children( elem )
def blockquote( self, elem ) :
return self.Pre( elem ) + self.Children( elem )
class docbook_article ( docbook_common ) :
def __init__( self, *args ) : docbook_common.__init__( self, *args )
def article( self, elem ) :
self._section_level = 0
return self.Children( elem )
def articleinfo( self, elem ) :
return self.Underline_title( elem ) + self.Children( elem, dont = 'title' )
_defaults = { 'article' : docbook_article }
def convert( infile, outfile, dbclass = None, cargs = [], kcargs = {}, cwsl = True ) :
""" convert the infile in docbook to the outfile in asciidoc
using the specified docbook convert object or a default one for the type of document """
t = ElementTree( file = infile )
root = t.getroot()
if dbclass is None :
dt = root.tag
dts = _defaults.get( dt, None )
if dts : db = dts( t, *cargs, **kcargs )
else :
print "Error: Unknown document type", dt
return
else : db = dbclass( t, *cargs, **kcargs )
out = db.Process( root )
nn = 0
with open( outfile, "w" ) as f :
for i in out :
if cwsl :
l = i.lstrip( '\n' ); ln = len( i ) - len( l )
r = l.rstrip( '\n' ); rn = len( l ) - len( r )
b = min( ln, 2 - nn ); nn += b
f.write( ( '\n' * b ).encode( 'utf-8' ) )
if r : f.write( r.encode( 'utf-8' ) ); nn = 0
b = min( rn, 2 - nn ); nn += b
f.write( ( '\n' * b ).encode( 'utf-8' ) )
else : f.write( i.encode( 'utf-8' ) )
def main():
if len( sys.argv ) != 3 :
print "Usage: codiicsa infile outfile"
else :
convert( sys.argv[1], sys.argv[2] )
return 0
if __name__ == '__main__':
main()