diff --git a/src/dom_tokenizers/pre_tokenizers/splitter.py b/src/dom_tokenizers/pre_tokenizers/splitter.py
index ef2bca2..5abcc1f 100644
--- a/src/dom_tokenizers/pre_tokenizers/splitter.py
+++ b/src/dom_tokenizers/pre_tokenizers/splitter.py
@@ -67,7 +67,7 @@ def special_tokens(self) -> Iterable[str]:
     # XXX older bits
     MAXWORDLEN = 32
     WORD_RE = re.compile(r"(?:\w+['’]?)+")
-    HEX_RE = re.compile(r"^(?:0x|[0-9a-f]{2})[0-9a-f]{6,}$")
+    HEX_RE = re.compile(r"^(?:0x|[0-9a-f]{2})[0-9a-f]{6,}$", re.I)
     DIGIT_RE = re.compile(r"\d")
     LONGEST_URLISH = 1024  # XXX?
     URLISH_LOOKBACK = 5
@@ -85,6 +85,7 @@ def special_tokens(self) -> Iterable[str]:
     LONGEST_PHITEST = 85
     BASE64_RE = base64_matcher()
     B64_PNG_RE = re.compile(r"iVBORw0KGg[o-r]")
+    B64_HEX_RE = re.compile(r"^(0x)?([0-9a-f]+)$", re.I)
     XML_HDR_RE = re.compile(r"<([a-z]{3,})\s+[a-z]+")
 
     def split(self, text: str) -> Iterable[str]:
@@ -161,6 +162,23 @@ def split(self, text: str) -> Iterable[str]:
 
             # Are we looking at something that might be base64?
             if self.BASE64_RE.match(curr):
+                if curr.isdecimal():
+                    if VERBOSE:  # pragma: no cover
+                        debug("it's a decimal number")
+                    cursor += 1
+                    continue
+
+                match = self.B64_HEX_RE.match(curr)
+                if match:
+                    if VERBOSE:  # pragma: no cover
+                        debug("it's hex")
+                    new_splits = match.groups()
+                    if new_splits[0] is not None:
+                        splits[cursor:cursor+1] = new_splits
+                        cursor += 1
+                    cursor += 1
+                    continue
+
                 cursor = self._sub_base64(splits, cursor)
                 continue