@@ -62,12 +62,14 @@ def special_tokens(self) -> Iterable[str]:
62
62
FIRST_SPLIT_RE = re .compile (rf"([^\w'’{ BASE64_NONWORD } ]+)" )
63
63
BASE64_NONWORD_RE = re .compile ("[+/=]+" )
64
64
65
- _TWOHEX = "[0-9a-fA-F]{2}"
65
+ _HEX = "[0-9a-fA-F]"
66
+ ALL_HEX_RE = re .compile (f"^{ _HEX } +$" )
67
+ _TWOHEX = f"{ _HEX } {{2}}"
66
68
TWOHEX_RE = re .compile (_TWOHEX )
67
69
JS_CHAR_ESCAPE_RE = re .compile (f"(?:x|u{ _TWOHEX } ){ _TWOHEX } " )
68
70
ENTITY_STARTS = {"&" , "&#" }
69
71
ESCAPE_START_RE = re .compile (r".([&%\\])" )
70
- PREFIXED_HEX_RE = re .compile (r "^(0x)([0-9a-f] +)([+/=]*)$" , re .I )
72
+ PREFIXED_HEX_RE = re .compile (rf "^(0[xX])( { _HEX } +)([+/=]*)$" , re .I )
71
73
72
74
# XXX older bits
73
75
MAXWORDLEN = 32
@@ -461,20 +463,32 @@ def _split_base64_utf8(self, text, encoded):
461
463
return [self .base64_token , "json" ]
462
464
except json .JSONDecodeError :
463
465
pass
464
- if self .oracle .first_is_better (encoded , text ):
465
- return None # encoded is better
466
+ #with open("base64.matches", "a") as fp:
467
+ # print("text", encoded, file=fp)
468
+ #if self.oracle.first_is_better(encoded, text):
469
+ # return None # encoded is better
466
470
return [self .base64_token , "text" ]
467
471
468
472
def _split_base64_binary (self , data , encoded ):
473
+ if len (encoded ) < 9 : # XXX review
474
+ return None
469
475
filetype = sniff_bytes (data )
470
- if not filetype :
471
- if self .oracle .is_texty (encoded ):
472
- return None
473
- return [self .base64_token , "data" ]
474
- return [self .base64_token , filetype .name .lower ()]
476
+ if filetype :
477
+ return [self .base64_token , filetype .name .lower ()]
478
+ #with open("unsniffed.matches", "a") as fp:
479
+ # print(len(encoded), encoded, file=fp)
480
+ #if self.oracle.is_texty(encoded):
481
+ # return None
482
+ return [self .base64_token , "data" ]
483
+ raise NotImplementedError (encoded )
475
484
476
485
# XXX junk?
477
486
487
+ #all uppercase
488
+ #all lowercase
489
+ #all hex digits
490
+ #CamelCase
491
+
478
492
def _sub_base64 (self , splits , cursor ):
479
493
curr = splits [cursor ]
480
494
try :
0 commit comments