-
Notifications
You must be signed in to change notification settings - Fork 1
/
untidy.py
224 lines (175 loc) · 6.73 KB
/
untidy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
'''
untidy.py
Copyright 2006 Andres Riancho
This file is part of untidy, untidy.sourceforge.net .
untidy is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation version 2 of the License.
untidy is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with untidy; if not, write to the Free Software
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
'''
DEBUG = False
import fuzzingFunctions
import re
class xmlFuzzer:
'''
This class fuzzes an xml string.
@author: Andres Riancho ( [email protected] )
'''
def __init__(self):
self._stopfuzz = False
self._output = ''
self._current = 0
self._startFrom = []
#self._repetitions = [ 2, 3, 7, 8, 9, 15, 16, 17, 31, 32, 33, 63, 64, 65,
#127, 128, 129, 254, 255, 256, 511, 512, 513, 1023, 1024, 1025, 2047,
#2048, 2049, 4095, 4096, 4097, 10000, 10000 ]
self._repetitions = [ 30, 34 ]
self._ff = fuzzingFunctions.fuzzingFunctions()
def setRepetitions( self , rep ):
self._repetitions = rep
def _toList( self, xmlString ):
'''
This method separates an xml in a "line by line" form.
Example:
xmlString: "<xml a="b">f00</xml>"
result: [ '<xml a="b">' , 'f00', '</xml>']
REX/Python
Based on Robert D. Cameron's REX/Perl 1.0.
Original copyright notice follows:
REX/Perl 1.0
Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
Technical Report TR 1998-17, School of Computing Science, Simon Fraser
University, November, 1998.
Copyright (c) 1998, Robert D. Cameron.
The following code may be freely used and distributed provided that
this copyright and citation notice remains intact and that modifications
or additions are clearly identified.
@parameter xmlString: A string representation of the xml
@return: A list of strings.
'''
TextSE = "[^<]+"
UntilHyphen = "[^-]*-"
Until2Hyphens = UntilHyphen + "(?:[^-]" + UntilHyphen + ")*-"
CommentCE = Until2Hyphens + ">?"
UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"
CDATA_CE = UntilRSBs + "(?:[^\\]>]" + UntilRSBs + ")*>"
S = "[ \\n\\t\\r]+"
NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"
NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"
Name = "(?:" + NameStrt + ")(?:" + NameChar + ")*"
QuoteSE = "\"[^\"]*\"|'[^']*'"
DT_IdentSE = S + Name + "(?:" + S + "(?:" + Name + "|" + QuoteSE +"))*"
MarkupDeclCE = "(?:[^\\]\"'><]+|" + QuoteSE + ")*>"
S1 = "[\\n\\r\\t ]"
UntilQMs = "[^?]*\\?+"
PI_Tail = "\\?>|" + S1 + UntilQMs + "(?:[^>?]" + UntilQMs + ")*>"
DT_ItemSE = "<(?:!(?:--" + Until2Hyphens + ">|[^-]" + MarkupDeclCE + ")|\\?" + Name + "(?:" + PI_Tail + "))|%" + Name + ";|" + S
DocTypeCE = DT_IdentSE + "(?:" + S + ")?(?:\\[(?:" + DT_ItemSE + ")*](?:" + S + ")?)?>?"
DeclCE = "--(?:" + CommentCE + ")?|\\[CDATA\\[(?:" + CDATA_CE + ")?|DOCTYPE(?:" + DocTypeCE + ")?"
PI_CE = Name + "(?:" + PI_Tail + ")?"
EndTagCE = Name + "(?:" + S + ")?>?"
AttValSE = "\"[^<\"]*\"|'[^<']*'"
ElemTagCE = Name + "(?:" + S + Name + "(?:" + S + ")?=(?:" + S + ")?(?:" + AttValSE + "))*(?:" + S + ")?/?>?"
MarkupSPE = "<(?:!(?:" + DeclCE + ")?|\\?(?:" + PI_CE + ")?|/(?:" + EndTagCE + ")?|(?:" + ElemTagCE + ")?)"
XML_SPE = TextSE + "|" + MarkupSPE
res = re.findall(XML_SPE, xmlString)
res = [ x for x in res if x !='\n' ]
return res
def _getFuzzFunctions( self ):
'''
@return: A list of fuzzing functions.
'''
if DEBUG:
self._repetitions = [ 5, 28 ]
return self._ff._getTestFuzzFunctions()
else:
return self._ff._getFuzzFunctions()
def _generateIterator( self, fuzzedItems ):
'''
@return: This method generates and iterator object that returns a string
representation of a fuzzed xml on every call to next()
'''
class iterator:
def __init__( self, fuzzedItems ):
self._fi = fuzzedItems
# Init the counter
self._counter = []
for f00 in range( len(self._fi) ):
self._counter.append( 1 )
# This fixes the first call to next()
self._counter[ len( self._fi ) -1 ] = 0
self._stopIteration = False
def __next__( self ):
if self._stopIteration:
raise StopIteration
else:
# First we increment the counter
self._incrementCounter()
# Now we fetch the data from the self._fi based on the counter
itemList = self._fetchData()
# Finally the items are joined into a string and returned
xmlStr = ''.join( itemList )
return xmlStr
def _fetchData( self ):
itemList = []
# Believe it or not, this for loop worked the first time I wrote it.
for pos in range( len( self._counter ) ):
itemList.append( self._fi[ pos ][ self._counter[pos] -1 ] )
# I dont care if you dont believe me :P
return itemList
def _incrementCounter( self ):
# this is ugly !
# It is basically an abacus implementation in python
self._counter[ len(self._counter) -1 ] += 1
rpositions = list(range( len(self._counter)))
rpositions.reverse()
for position in rpositions:
if self._counter[ position ] > len( self._fi[ position ] ):
# I count from 1, still a human.
self._counter[ position ] = 1
self._counter[ position - 1 ] += 1
# If all counters are full, then set a flag that indicates that
# the iterator has no more items inside.
eq = True
for c in range( len( self._counter ) ):
if self._counter[ c ] != len( self._fi[ c ] ):
eq = False
if eq:
self._stopIteration = True
def __iter__( self ):
# Not sure if this is ok, it works... but... is it ok ?
return self
it = iterator( fuzzedItems )
return it
def fuzz( self, xmlString ):
'''
This method does all the work
@parameter xmlString: A string representation of the xml to fuzz
@return: An iterator object that returns a fuzzed xml on every call to next()
'''
xmlList = self._toList( xmlString )
fuzzedXmlItems = []
for xmlItem in xmlList:
fuzzedItems = []
for f in self._getFuzzFunctions():
# The repetitions indicate how many times the ff will repeat the pattern
# it is configured to repeat.
fuzzedItem = f(*(xmlItem, self._repetitions ))
if fuzzedItem != None:
fuzzedItems.extend( fuzzedItem )
fuzzedXmlItems.append ( fuzzedItems )
iter = self._generateIterator( fuzzedXmlItems )
return iter
if __name__ == '__main__' :
xmlString = '<xml attr1="a"><a>f00!</a></xml>'
xf = xmlFuzzer()
#xf.setRepetitions( [3,30,60] )
iter = xf.fuzz( xmlString )
for i in iter:
print(i)