-
Notifications
You must be signed in to change notification settings - Fork 0
/
xconverter.py
224 lines (196 loc) · 7.39 KB
/
xconverter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
#! python
# -*- coding: utf-8 -*-
#Import relevant libraries including Markdown
import codecs
import sys
import os
markdown_directory = os.path.join(os.path.dirname(__file__), 'markdown')
sys.path.append(markdown_directory)
import markdown
#Functions
def stripped(str):
if tab_spaced:
return str.lstrip('\t').rstrip('\n\r')
else:
return str.lstrip().rstrip('\n\r')
def indented (str,i):
return ("\t"*(len(unclosed_tags)-i) + str + "\n")
def write_indented (str,i,writearray):
return writearray.append(indented(str,i))
def close_tags():
if (whitespace_array[-1] <= whitespace_array[-2]) and (whitespacing>0):
for x in xrange(1+int(round((whitespace_array[-2]-whitespace_array[-1])/float(whitespacing)))):
write_indented("</"+unclosed_tags[-1]+">",1,html_to_write)
unclosed_tags.pop()
def last_tag_index():
tagindex = 0
for key,value in enumerate(reversed(html_to_write)):
if stripped(value).startswith('<'+unclosed_tags[-1]):
tagindex = -1 - key
return tagindex
return tagindex
def current_ws():
if whitespacing == 0 or not tab_spaced:
return len(line) - len(line.lstrip())
if tab_spaced:
return len(line) - len(line.lstrip('\t\n\r'))
#initialise variables
html_to_write = []
unclosed_tags = []
debug = []
vividfilename = sys.argv[1]
if len(sys.argv)>2:
htmlfilename = sys.argv[2]
else:
htmlfilename = vividfilename.split('.')[0] + '.html'
whitespacing = 0
whitespace_array = [-1,0]
tab_spaced = False
last_type_tag = False
multi_line_text = False
multi_line_property = False
is_markdown = False
markdown_array = []
#open the file
with codecs.open(vividfilename,'rU', "utf-8-sig") as vividfile:
for line in vividfile:
#work out how many whitespaces at start
whitespace_array.append(current_ws())
#For first line with whitespace, work out the whitespacing (eg tab vs 4-space)
if whitespacing == 0 and whitespace_array[-1] > 0:
whitespacing = whitespace_array[-1]
if line[0] == '\t':
tab_spaced = True
#strip out whitespace at start and end
stripped_line = stripped(line)
#deal with multiline things:
#deal with unclosed multiline text
if multi_line_text:
#work out if it's been closed on this line, close if it has, append appropriately
if stripped_line.endswith('##'):
if len(stripped(line)) > 2:
write_indented(stripped_line[:-2],0,html_to_write)
#if it's just a closing '##', get rid of the <br /> on the previous line
else:
html_to_write.append(html_to_write[-1][:-7] + "\n")
html_to_write.pop(-2)
multi_line_text = False
else:
write_indented(stripped_line + "<br />",0,html_to_write)
whitespace_array.pop()
continue
#deal with unclosed markdown
if is_markdown:
#deal with a line that closes here, then join markdown, format and send to html array
if stripped_line.lower().endswith('#markdown#'):
markdown_array.append(stripped_line[:-10])
markdown_to_write = markdown.markdown('\n\r'.join(markdown_array))
for line in markdown_to_write.split('\n'):
write_indented(line,0,html_to_write)
is_markdown = False
else:
markdown_array.append(stripped_line)
whitespace_array[-1] -= 1
continue
#deal with multiline properties
if multi_line_property:
tagstart = html_to_write[last_tag_index()].strip()[:-2]
propertyvalue = stripped_line.strip()
if stripped_line.endswith(';'):
html_to_write[last_tag_index()] = indented(tagstart + propertyvalue[:-1] + "\">",1)
multi_line_property = False
else:
html_to_write[last_tag_index()] = indented(tagstart + propertyvalue + " \">",1)
whitespace_array.pop()
continue
#deal with text ie things starting with '#'
if stripped_line.startswith('#'):
#if the current tag has less whitespace than the last, close all tags up to this one
close_tags()
#deal with start of multiline text (first '##')
if stripped_line.startswith('##'):
#deal with when it starts and ends on the same line
if stripped_line.endswith('##') and len(stripped(line)) > 2:
write_indented(stripped_line[2:-2],0,html_to_write)
continue
#if it just starts with '##', append as necessary
if len(stripped(line)) > 2:
write_indented(stripped_line[2:] + "<br />",0,html_to_write)
multi_line_text = True
#deal with start of markdown (first '#markdown')
elif stripped_line.lower().startswith('#markdown#'):
#deal with when it starts and ends on the same line
if stripped_line.lower().endswith('#markdown#') and len(stripped(line)) > 10:
write_indented(markdown.markdown(stripped_line[10:-10]),0,html_to_write)
continue
#if it just starts with '#markdown#', add to markdown array and skip to next line
else:
markdown_array = [stripped_line[10:]]
is_markdown = True
continue
#deal with single line text
else:
write_indented(stripped_line[1:],0,html_to_write)
#skip the rest of the cycle.
whitespace_array.pop()
continue
#deal with properties eg class:20
if ":" in stripped_line:
#if the current tag has less whitespace than the last, close all tags up to this one
close_tags()
tagstart = html_to_write[last_tag_index()].strip()[:-1]
#deal with multiline properties- add 'property ="' to the last unclosed tag
if stripped_line.endswith(':'):
multi_line_property = True
property = stripped_line.strip()[:-1]
html_to_write[last_tag_index()] = indented(tagstart + " " + property + "=\" >",1)
else:
#break the property up and insert it into the last tag
propertysplit = stripped_line.strip().split(":")
property = propertysplit[0].strip()
propertyvalue = propertysplit[1].lstrip() + ':'.join(propertysplit[2:])
html_to_write[last_tag_index()] = indented(tagstart + " " + property + "=\"" + propertyvalue + "\">",1)
#set the last type to not be a tag
last_type_tag = False
#reduce whitespace by one as it's a property
# whitespace_array[-1] -= whitespacing
whitespace_array.pop()
continue
#deal with everything else:
#deal with single line tag#text
if "#" in stripped_line:
close_tags()
textsplit = stripped_line.split('#')
tag = textsplit[0]
text_to_write = "#".join(textsplit[1:])
write_indented("<"+tag+">",0,html_to_write)
write_indented(text_to_write,-1,html_to_write)
unclosed_tags.append(tag)
#set the last type to be a tag
last_type_tag = True
#deal with tags eg div
else:
#if the current tag has less whitespace than the last, close all tags up to this one
close_tags()
#append the current tag
write_indented("<"+stripped_line.strip()+">",0,html_to_write)
unclosed_tags.append(stripped_line.strip())
#set the last type to be a tag
last_type_tag = True
#close any unclosed tags at the end
for key, tag in enumerate(reversed(unclosed_tags)):
write_indented("</"+tag+">",key+1,html_to_write)
#nicer formatting for tags with no children or only single line text
for key, item in enumerate(html_to_write):
if item.lstrip().startswith('</'):
startendtag = "<"+stripped(item)[2:-1]
if stripped(html_to_write[key-1]).startswith(startendtag):
html_to_write[key-1] = html_to_write[key-1].rstrip()
html_to_write[key] = html_to_write[key].lstrip()
if stripped(html_to_write[key-2]).startswith(startendtag):
html_to_write[key-2] = html_to_write[key-2].rstrip()
html_to_write[key-1] = stripped(html_to_write[key-1])
html_to_write[key] = html_to_write[key].lstrip()
s = ''.join(html_to_write)
with codecs.open(htmlfilename,'wU','utf-8') as htmlfile:
htmlfile.write(s)