|
4 | 4 |
|
5 | 5 |
|
6 | 6 | @pytest.mark.parametrize(
|
7 |
| - ("text,expect_splits"), |
| 7 | + "text,expect_splits", |
8 | 8 | (("hello world", ["hello", " ", "world"]),
|
9 | 9 | ("$hello world", ["", "$", "hello", " ", "world"]),
|
10 | 10 | ("hello-world", ["hello", "-", "world"]),
|
@@ -55,7 +55,7 @@ def test_first_split_re(text, expect_splits):
|
55 | 55 |
|
56 | 56 |
|
57 | 57 | @pytest.mark.parametrize(
|
58 |
| - ("text,expect_tokens"), |
| 58 | + "text,expect_tokens", |
59 | 59 | (("hello world", ["hello", "world"]),
|
60 | 60 | ("hello-world", ["hello", "world"]),
|
61 | 61 | ("hello_world", ["hello", "world"]),
|
@@ -181,7 +181,28 @@ def test_decoding(text, expect_tokens):
|
181 | 181 |
|
182 | 182 |
|
183 | 183 | @pytest.mark.parametrize(
|
184 |
| - ("text,expect_tokens"), |
| 184 | + "text,expect_tokens", |
| 185 | + (("0x0", ["0x", "0"]), |
| 186 | + ("0x1234", ["0x", "1234"]), |
| 187 | + ("0x71c765", ["0x", "71c765"]), |
| 188 | + ("0xdeadbeef", |
| 189 | + ["0x", "[LONG]", "hex", "digits"]), |
| 190 | + ("0xdeadbeefL", ["0xdeadbeefL"]), |
| 191 | + ("0x4AAAAAAAAjq6WYeRDKmebM", |
| 192 | + ["0x4AAAAAAAAjq6WYeRDKmebM"]), |
| 193 | + ("0XPmYE28fJingEYE1hThk7F4SZFf1EVe2PxVNsmv", |
| 194 | + ["[BASE64]"]), |
| 195 | + ("0XBEA020C3BD417F30DE4D6BD05B0ED310AC586CC0", |
| 196 | + ["0X", "[LONG]", "hex", "digits"]), |
| 197 | + )) |
| 198 | +def test_prefixed_hex(text, expect_tokens): |
| 199 | + """Ensure prefixed hex constants are recognized and split. |
| 200 | + """ |
| 201 | + assert list(TextSplitter().split(text)) == expect_tokens |
| 202 | + |
| 203 | + |
| 204 | +@pytest.mark.parametrize( |
| 205 | + "text,expect_tokens", |
185 | 206 | (("That\u2019s all we know.",
|
186 | 207 | ["That's", "all", "we", "know"]),
|
187 | 208 | ("Page=Login&Action=Login\';\n\t\t\treturn",
|
|
0 commit comments