From dff7e7dadba55e387a8eab860e5164ddf7dc25be Mon Sep 17 00:00:00 2001 From: Mattias Wadman Date: Mon, 7 Mar 2022 22:05:00 +0100 Subject: [PATCH] interp: Cleanup binary regexp overloading and add explode Add explode for binary that explode into an array of the binary unit tobits Remove scan_toend as it feels a bit weird and one can use tobytes | .[match(...):] instead Some general cleanup also --- .vscode/settings.json | 1 + doc/usage.md | 3 +- internal/difftest/difftest.go | 29 +-- pkg/interp/binary.jq | 92 ++++++- pkg/interp/interp.go | 1 - pkg/interp/interp.jq | 1 - pkg/interp/match.jq | 99 ------- .../testdata/{buffer.fqtest => binary.fqtest} | 244 +++++++++++++++++- pkg/interp/testdata/match.fqtest | 204 ++------------- 9 files changed, 362 insertions(+), 312 deletions(-) delete mode 100644 pkg/interp/match.jq rename pkg/interp/testdata/{buffer.fqtest => binary.fqtest} (69%) diff --git a/.vscode/settings.json b/.vscode/settings.json index 48e265448..c668173a8 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -171,6 +171,7 @@ }, "[jq]": { "editor.tabSize": 2, + "files.trimTrailingWhitespace": true }, "fracturedjsonvsc.MaxInlineLength": 160, } \ No newline at end of file diff --git a/doc/usage.md b/doc/usage.md index 379e89639..364924071 100644 --- a/doc/usage.md +++ b/doc/usage.md @@ -382,7 +382,8 @@ unary uses input and if more than one argument all as arguments ignoring the inp - For `capture` the `.string` value is a binary. - If pattern is a binary it will be matched literally and not as a regexp. - If pattern is a binary or flags include "b" each input byte will be read as separate code points - - `scan_toend($v)`, `scan_toend($v; $flags)` works the same as `scan` but output binary are from start of match to + - String function are not overloaded to support binary for now as some of them are bahaviours that might be confusing. + - `explode` is overloaded to work with binary. Will explode into array of the unit of the binary. end of binary. instead of possibly multi-byte UTF-8 codepoints. This allows to match raw bytes. Ex: `match("\u00ff"; "b")` will match the byte `0xff` and not the UTF-8 encoded codepoint for 255, `match("[^\u00ff]"; "b")` will match diff --git a/internal/difftest/difftest.go b/internal/difftest/difftest.go index 9a7979082..786a0504c 100644 --- a/internal/difftest/difftest.go +++ b/internal/difftest/difftest.go @@ -1,9 +1,8 @@ -// Package difftest implement test based on serialized string output +// Package difftest implement test based on diffing serialized string output // -// User provides a function that gets a input string. It returns a output string -// based on the input somehow and a output path to file with content to compare it -// to or to write to if in write mode. -// If there is a difference test will fail with a diff. +// User provides a function that get a input path and input string and returns a +// output path and output string. Content of output path and output string is compared +// and if there is a difference the test fails with a diff. // // Test inputs are read from files matching Pattern from Path. // @@ -31,16 +30,6 @@ const green = "\x1b[32m" const red = "\x1b[31m" const reset = "\x1b[0m" -type Fn func(t *testing.T, path string, input string) (string, string, error) - -type Options struct { - Path string - Pattern string - ColorDiff bool - WriteOutput bool - Fn Fn -} - func testDeepEqual(t tf, color bool, printfFn func(format string, args ...interface{}), expected string, actual string) { t.Helper() @@ -111,6 +100,16 @@ func Fatal(t tf, expected string, actual string) { testDeepEqual(t, false, t.Fatalf, expected, actual) } +type Fn func(t *testing.T, path string, input string) (string, string, error) + +type Options struct { + Path string + Pattern string + ColorDiff bool + WriteOutput bool + Fn Fn +} + func TestWithOptions(t *testing.T, opts Options) { t.Helper() diff --git a/pkg/interp/binary.jq b/pkg/interp/binary.jq index 4866e5f2b..086dcda1a 100644 --- a/pkg/interp/binary.jq +++ b/pkg/interp/binary.jq @@ -5,4 +5,94 @@ def tobytesrange: _tobits(8; true; 0); def tobits($pad): _tobits(1; false; $pad); def tobytes($pad): _tobits(8; false; $pad); def tobitsrange($pad): _tobits(1; true; $pad); -def tobytesrange($pad): _tobits(8; true; $pad); \ No newline at end of file +def tobytesrange($pad): _tobits(8; true; $pad); + +# same as regexp.QuoteMeta +def _re_quote_meta: + gsub("(?[\\.\\+\\*\\?\\(\\)\\|\\[\\]\\{\\}\\^\\$\\)])"; "\\\(.c)"); + +# TODO: +# maybe implode, join. but what would it mean? +# "abc" | tobits | explode | implode would not work + +# helper for overloading regex/string functions to support binary +def _binary_or_orig(bfn; fn): + ( _exttype as $exttype + | if . == null or $exttype == "string" then fn + elif $exttype == "binary" then bfn + else + ( . as $s + | try + (tobytesrange | bfn) + catch ($s | fn) + ) + end + ); + +def _orig_explode: explode; +def explode: _binary_or_orig([.[range(.size)]]; _orig_explode); + +def _orig_splits($val): splits($val); +def _orig_splits($regex; $flags): splits($regex; $flags); +def _splits_binary($regex; $flags): + ( . as $b + # last null output is to do a last iteration that output from end of last match to end of binary + | foreach (_match_binary($regex; $flags), null) as $m ( + {prev: null, curr: null}; + ( .prev = .curr + | .curr = $m + ); + if .prev == null then $b[0:.curr.offset] + elif .curr == null then $b[.prev.offset+.prev.length:] + else $b[.prev.offset+.prev.length:.curr.offset+.curr.length] + end + ) + ); +def splits($val): _binary_or_orig(_splits_binary($val; "g"); _orig_splits($val)); +def splits($regex; $flags): _binary_or_orig(_splits_binary($regex; "g"+$flags); _orig_splits($regex; $flags)); + +def _orig_split($val): split($val); +def _orig_split($regex; $flags): split($regex; $flags); +# split/1 splits on string not regexp +def split($val): [splits($val | _re_quote_meta)]; +def split($regex; $flags): [splits($regex; $flags)]; + +def _orig_test($val): test($val); +def _orig_test($regex; $flags): test($regex; $flags); +def _test_binary($regex; $flags): + ( isempty(_match_binary($regex; $flags)) + | not + ); +def test($val): _binary_or_orig(_test_binary($val; ""); _orig_test($val)); +def test($regex; $flags): _binary_or_orig(_test_binary($regex; $flags); _orig_test($regex; $flags)); + +def _orig_match($val): match($val); +def _orig_match($regex; $flags): match($regex; $flags); +def match($val): _binary_or_orig(_match_binary($val); _orig_match($val)); +def match($regex; $flags): _binary_or_orig(_match_binary($regex; $flags); _orig_match($regex; $flags)); + +def _orig_capture($val): capture($val); +def _orig_capture($regex; $flags): capture($regex; $flags); +def _capture_binary($regex; $flags): + ( . as $b + | _match_binary($regex; $flags) + | .captures + | map( + ( select(.name) + | {key: .name, value: .string} + ) + ) + | from_entries + ); +def capture($val): _binary_or_orig(_capture_binary($val; ""); _orig_capture($val)); +def capture($regex; $flags): _binary_or_orig(_capture_binary($regex; $flags); _orig_capture($regex; $flags)); + +def _orig_scan($val): scan($val); +def _orig_scan($regex; $flags): scan($regex; $flags); +def _scan_binary($regex; $flags): + ( . as $b + | _match_binary($regex; $flags) + | $b[.offset:.offset+.length] + ); +def scan($val): _binary_or_orig(_scan_binary($val; "g"); _orig_scan($val)); +def scan($regex; $flags): _binary_or_orig(_scan_binary($regex; "g"+$flags); _orig_scan($regex; $flags)); diff --git a/pkg/interp/interp.go b/pkg/interp/interp.go index b37978d76..ed3b491de 100644 --- a/pkg/interp/interp.go +++ b/pkg/interp/interp.go @@ -41,7 +41,6 @@ import ( //go:embed ansi.jq //go:embed binary.jq //go:embed decode.jq -//go:embed match.jq //go:embed funcs.jq //go:embed grep.jq //go:embed args.jq diff --git a/pkg/interp/interp.jq b/pkg/interp/interp.jq index f43c7b751..c65be2c0b 100644 --- a/pkg/interp/interp.jq +++ b/pkg/interp/interp.jq @@ -2,7 +2,6 @@ include "internal"; include "options"; include "binary"; include "decode"; -include "match"; include "funcs"; include "grep"; include "args"; diff --git a/pkg/interp/match.jq b/pkg/interp/match.jq deleted file mode 100644 index e534da10c..000000000 --- a/pkg/interp/match.jq +++ /dev/null @@ -1,99 +0,0 @@ -include "binary"; - -def _binary_fn(f): - ( . as $c - | tobytesrange - | f - ); - -def _binary_try_orig(bfn; fn): - ( . as $c - | if type == "string" then fn - else - ( $c - | tobytesrange - | bfn - ) - end - ); - -# overloads to support binary - -def _orig_test($val): test($val); -def _orig_test($regex; $flags): test($regex; $flags); -def _test_binary($regex; $flags): - ( isempty(_match_binary($regex; $flags)) - | not - ); -def test($val): _binary_try_orig(_test_binary($val; ""); _orig_test($val)); -def test($regex; $flags): _binary_try_orig(_test_binary($regex; $flags); _orig_test($regex; $flags)); - -def _orig_match($val): match($val); -def _orig_match($regex; $flags): match($regex; $flags); -def match($val): _binary_try_orig(_match_binary($val); _orig_match($val)); -def match($regex; $flags): _binary_try_orig(_match_binary($regex; $flags); _orig_match($regex; $flags)); - -def _orig_capture($val): capture($val); -def _orig_capture($regex; $flags): capture($regex; $flags); -def _capture_binary($regex; $flags): - ( . as $b - | _match_binary($regex; $flags) - | .captures - | map( - ( select(.name) - | {key: .name, value: .string} - ) - ) - | from_entries - ); -def capture($val): _binary_try_orig(_capture_binary($val; ""); _orig_capture($val)); -def capture($regex; $flags): _binary_try_orig(_capture_binary($regex; $flags); _orig_capture($regex; $flags)); - -def _orig_scan($val): scan($val); -def _orig_scan($regex; $flags): scan($regex; $flags); -def _scan_binary($regex; $flags): - ( . as $b - | _match_binary($regex; $flags) - | $b[.offset:.offset+.length] - ); -def scan($val): _binary_try_orig(_scan_binary($val; "g"); _orig_scan($val)); -def scan($regex; $flags): _binary_try_orig(_scan_binary($regex; "g"+$flags); _orig_scan($regex; $flags)); - -def _orig_splits($val): splits($val); -def _orig_splits($regex; $flags): splits($regex; $flags); -def _splits_binary($regex; $flags): - ( . as $b - # last null output is to do a last iteration that output from end of last match to end of binary - | foreach (_match_binary($regex; $flags), null) as $m ( - {prev: null, curr: null}; - ( .prev = .curr - | .curr = $m - ); - if .prev == null then $b[0:.curr.offset] - elif .curr == null then $b[.prev.offset+.prev.length:] - else $b[.prev.offset+.prev.length:.curr.offset+.curr.length] - end - ) - ); -def splits($val): _binary_try_orig(_splits_binary($val; "g"); _orig_splits($val)); -def splits($regex; $flags): _binary_try_orig(_splits_binary($regex; "g"+$flags); _orig_splits($regex; $flags)); - -# same as regexp.QuoteMeta -def _quote_meta: - gsub("(?[\\.\\+\\*\\?\\(\\)\\|\\[\\]\\{\\}\\^\\$\\)])"; "\\\(.c)"); - -def _orig_split($val): split($val); -def _orig_split($regex; $flags): split($regex; $flags); -# split/1 splits on string not regexp -def split($val): [splits($val | _quote_meta)]; -def split($regex; $flags): [splits($regex; $flags)]; - -# TODO: rename -# same as scan but outputs binary from start of match to end of binary -def _scan_toend($regex; $flags): - ( . as $b - | _match_binary($regex; $flags) - | $b[.offset:] - ); -def scan_toend($val): _binary_fn(_scan_toend($val; "g")); -def scan_toend($regex; $flags): _binary_fn(_scan_toend($regex; "g"+$flags)); diff --git a/pkg/interp/testdata/buffer.fqtest b/pkg/interp/testdata/binary.fqtest similarity index 69% rename from pkg/interp/testdata/buffer.fqtest rename to pkg/interp/testdata/binary.fqtest index 364d37a64..4ec4a53de 100644 --- a/pkg/interp/testdata/buffer.fqtest +++ b/pkg/interp/testdata/binary.fqtest @@ -236,9 +236,28 @@ mp3> .frames[1] | tobytesrange | ., .start, .stop, .size, .[4:17], (tobits, toby |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0xe0| 00 00 0a 2c 43 2e 55 94 80| ...,C.U..|.: raw bits 0xe7-0xf3.7 (13) 0xf0|01 80 93 6b |...k | -mp3> "fq" | tobits | [.[range(.size)]] | map(tobits) | tobytes | tostring +mp3> .frames[1].data | tobytes | match([0x33, 0x85]), first(scan([0x33, 0x85]) | hex), first(splits([0x33, 0x85]) | hex) +{ + "captures": [], + "length": 2, + "offset": 4, + "string": "3\ufffd" +} +"3385" +"07aac38e" +mp3> scan("") + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| + | | |.: raw bits 0x0-NA (0) +mp3> .frames[1] | tobytes | mp3_frame | ., ((.header.bitrate | tobitsrange) as $v | tobitsrange | [.[:$v.start], (0xf | tobits), .[$v.start+$v.size:]] | mp3_frame) | .header.bitrate + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0| 50 | P |.header.bitrate: 64000 (5) + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0| f0 | . |.header.bitrate: 15 (bad) +mp3> ^D +$ fq -i +null> "fq" | tobits | [.[range(.size)]] | map(tobits) | tobytes | tostring "fq" -mp3> "fq" | tobits | chunk(range(17)+1) | tobytes | tostring +null> "fq" | tobits | chunk(range(17)+1) | tobytes | tostring "fq" "fq" "fq" @@ -256,7 +275,7 @@ mp3> "fq" | tobits | chunk(range(17)+1) | tobytes | tostring "fq" "fq" "fq" -mp3> 1 | tobits(range(10)) | hex +null> 1 | tobits(range(10)) | hex "80" "80" "40" @@ -267,13 +286,13 @@ mp3> 1 | tobits(range(10)) | hex "02" "01" "0080" -mp3> 1 | tobytes(range(5)) | hex +null> 1 | tobytes(range(5)) | hex "01" "01" "0001" "000001" "00000001" -mp3> range(17) | [range(.) | 1 | tobits] | tobits | hex +null> range(17) | [range(.) | 1 | tobits] | tobits | hex "" "80" "c0" @@ -291,7 +310,7 @@ mp3> range(17) | [range(.) | 1 | tobits] | tobits | hex "fffc" "fffe" "ffff" -mp3> range(17) | [range(.) | 1 | tobits] | tobytes | hex +null> range(17) | [range(.) | 1 | tobits] | tobytes | hex "" "01" "03" @@ -309,15 +328,218 @@ mp3> range(17) | [range(.) | 1 | tobits] | tobytes | hex "3fff" "7fff" "ffff" -mp3> "c9dfdac2f6ef68e5db666b6fbeee66d9c7deda66bebfbfe860bfbfbfe9d1636bbfbebf" | hex | tobits | reduce chunk(8)[] as $c ({h:[],g:[]}; .h += [(0|tobits), $c[0:7]] | .g |= . + [if length % 8 == 0 then (0|tobits) else empty end, $c[7:8]]) | .h, .g | tobytes +null> "c9dfdac2f6ef68e5db666b6fbeee66d9c7deda66bebfbfe860bfbfbfe9d1636bbfbebf" | hex | tobits | reduce chunk(8)[] as $c ({h:[],g:[]}; .h += [(0|tobits), $c[0:7]] | .g |= . + [if length % 8 == 0 then (0|tobits) else empty end, $c[7:8]]) | .h, .g | tobytes |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x00|64 6f 6d 61 7b 77 34 72 6d 33 35 37 5f 77 33 6c|doma{w4rm357_w3l|.: raw bits 0x0-0x22.7 (35) * |until 0x22.7 (end) (35) | | |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|62 6c 30 67 7d| |bl0g}| |.: raw bits 0x0-0x4.7 (5) -mp3> .frames[1] | tobytes | mp3_frame | ., ((.header.bitrate | tobitsrange) as $v | tobitsrange | [.[:$v.start], (0xf | tobits), .[$v.start+$v.size:]] | mp3_frame) | .header.bitrate +null> "åäö" | tobytes | explode, (tobits | explode) +[ + 195, + 165, + 195, + 164, + 195, + 182 +] +[ + 1, + 1, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 0, + 1, + 0, + 0, + 1, + 0, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 0, + 1, + 0, + 0, + 1, + 0, + 0, + 1, + 1, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 0, + 1, + 1, + 0, + 1, + 1, + 0 +] +null> "ååå" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", split($p), "binary_rune", (tobytes | split($p)), "binary_byte", (tobytes | split("ååå" | tobytes; "b")) +"cbbcåååccåååcbc" +"orig" +[ + "cbbc", + "cc", + "cbc" +] +"binary_rune" +[ + "cbbc", + "ccååå", + "cbc" +] +"binary_byte" +[ + "cbbc", + "ccååå", + "cbc" +] +"åååcbbc" +"orig" +[ + "", + "cbbc" +] +"binary_rune" +[ + "", + "cbbc" +] +"binary_byte" +[ + "", + "cbbc" +] +"cbbcååå" +"orig" +[ + "cbbc", + "" +] +"binary_rune" +[ + "cbbc", + "" +] +"binary_byte" +[ + "cbbc", + "" +] +null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", splits($p), "binary_rune", (tobytes | splits($p)), "binary_byte", (tobytes | splits("ååå" | tobytes; "b")) +"cbbcåååccåååcbc" +"orig" +"cbbc" +"cc" +"cbc" +"binary_rune" |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0| 50 | P |.header.bitrate: 64000 (5) +0x0|63 62 62 63 |cbbc |.: raw bits 0x0-0x3.7 (4) + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x00| 63 63 c3 a5 c3 a5| cc....|.: raw bits 0xa-0x11.7 (8) +0x10|c3 a5 |.. | + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x10| 63 62 63| | cbc| |.: raw bits 0x12-0x14.7 (3) +"binary_byte" |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0| f0 | . |.header.bitrate: 15 (bad) -mp3> ^D +0x0|63 62 62 63 |cbbc |.: raw bits 0x0-0x3.7 (4) + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x00| 63 63 c3 a5 c3 a5| cc....|.: raw bits 0xa-0x11.7 (8) +0x10|c3 a5 |.. | + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x10| 63 62 63| | cbc| |.: raw bits 0x12-0x14.7 (3) +"åååcbbc" +"orig" +"" +"cbbc" +"binary_rune" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| + | | |.: raw bits 0x0-NA (0) + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0| 63 62 62 63| | cbbc| |.: raw bits 0x6-0x9.7 (4) +"binary_byte" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| + | | |.: raw bits 0x0-NA (0) + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0| 63 62 62 63| | cbbc| |.: raw bits 0x6-0x9.7 (4) +"cbbcååå" +"orig" +"cbbc" +"" +"binary_rune" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0|63 62 62 63 |cbbc |.: raw bits 0x0-0x3.7 (4) + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| + | | |.: raw bits 0xa-NA (0) +"binary_byte" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0|63 62 62 63 |cbbc |.: raw bits 0x0-0x3.7 (4) + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| + | | |.: raw bits 0xa-NA (0) +null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", scan($p), "binary_rune", (tobytes | scan($p)), "binary_byte", (tobytes | scan("ååå" | tobytes; "b")) +"cbbcåååccåååcbc" +"orig" +[ + "å", + "å" +] +[ + "å", + "å" +] +"binary_rune" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0| c3 a5 c3 a5 c3 a5 | ...... |.: raw bits 0x4-0x9.7 (6) + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x00| c3 a5 c3 a5| ....|.: raw bits 0xc-0x11.7 (6) +0x10|c3 a5 |.. | +"binary_byte" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0| c3 a5 c3 a5 c3 a5 | ...... |.: raw bits 0x4-0x9.7 (6) + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x00| c3 a5 c3 a5| ....|.: raw bits 0xc-0x11.7 (6) +0x10|c3 a5 |.. | +"åååcbbc" +"orig" +[ + "å", + "å" +] +"binary_rune" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0|c3 a5 c3 a5 c3 a5 |...... |.: raw bits 0x0-0x5.7 (6) +"binary_byte" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0|c3 a5 c3 a5 c3 a5 |...... |.: raw bits 0x0-0x5.7 (6) +"cbbcååå" +"orig" +[ + "å", + "å" +] +"binary_rune" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0| c3 a5 c3 a5 c3 a5| | ......| |.: raw bits 0x4-0x9.7 (6) +"binary_byte" + |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| +0x0| c3 a5 c3 a5 c3 a5| | ......| |.: raw bits 0x4-0x9.7 (6) +null> ^D diff --git a/pkg/interp/testdata/match.fqtest b/pkg/interp/testdata/match.fqtest index 03cdba45b..17a8bee4b 100644 --- a/pkg/interp/testdata/match.fqtest +++ b/pkg/interp/testdata/match.fqtest @@ -6,19 +6,6 @@ mp3> .frames[1].data | tobytes | match("3\u0085"; "b") "offset": 4, "string": "3\ufffd" } -mp3> .frames[1].data | tobytes | match([0x33, 0x85]), first(scan([0x33, 0x85]) | hex), first(splits([0x33, 0x85]) | hex), first(scan_toend([0x33, 0x85]) | hex) -{ - "captures": [], - "length": 2, - "offset": 4, - "string": "3\ufffd" -} -"3385" -"07aac38e" -"3385d364f1a1c1081c581f5e1f181c46041e89e5b32e5a0fa83b136bf0f860501404030282440c4e68d1a36c1f7880100431383fc1074e74f9ce5fce72feee9f772e0f83e0f87c100c4a00c1fd2001fffffe79e79e784fb10f29075ce5375160d622866a961a7ea355336e2fe126d1e00a24261b3d0470547b4aad19361e88268b7fef149c0b0d627fffefb821463947fa099f2a1a0a057d1e05044883677ee4f2a0211342412fffe3904983d6b5090440c0994aa08811" -mp3> scan("") - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| - | | |.: raw bits 0x0-NA (0) mp3> ^D $ fq -ni null> "64ff65ff66" | hex | match("\u00ff"; "b") @@ -109,7 +96,7 @@ true "cbbcååå" true true -null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", match($p), "buffer_rune", (tobytes | match($p)), "buffer_byte", (tobytes | match("ååå" | tobytes; "b")) +null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", match($p), "binary_rune", (tobytes | match($p)), "binary_byte", (tobytes | match("ååå" | tobytes; "b")) "cbbcåååccåååcbc" "orig" { @@ -131,7 +118,7 @@ null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcåå "offset": 4, "string": "ååå" } -"buffer_rune" +"binary_rune" { "captures": [ { @@ -151,7 +138,7 @@ null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcåå "offset": 4, "string": "ååå" } -"buffer_byte" +"binary_byte" { "captures": [], "length": 6, @@ -179,7 +166,7 @@ null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcåå "offset": 0, "string": "ååå" } -"buffer_rune" +"binary_rune" { "captures": [ { @@ -199,7 +186,7 @@ null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcåå "offset": 0, "string": "ååå" } -"buffer_byte" +"binary_byte" { "captures": [], "length": 6, @@ -227,7 +214,7 @@ null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcåå "offset": 4, "string": "ååå" } -"buffer_rune" +"binary_rune" { "captures": [ { @@ -247,206 +234,57 @@ null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcåå "offset": 4, "string": "ååå" } -"buffer_byte" +"binary_byte" { "captures": [], "length": 6, "offset": 4, "string": "ååå" } -null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", capture($p), "buffer_rune", (tobytes | capture($p)), "buffer_byte", (tobytes | capture("ååå" | tobytes; "b")) +null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", capture($p), "binary_rune", (tobytes | capture($p)), "binary_byte", (tobytes | capture("ååå" | tobytes; "b")) "cbbcåååccåååcbc" "orig" { "n": "å" } -"buffer_rune" +"binary_rune" { "n": "å" } -"buffer_byte" +"binary_byte" {} "åååcbbc" "orig" { "n": "å" } -"buffer_rune" +"binary_rune" { "n": "å" } -"buffer_byte" +"binary_byte" {} "cbbcååå" "orig" { "n": "å" } -"buffer_rune" +"binary_rune" { "n": "å" } -"buffer_byte" +"binary_byte" {} -null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", scan($p), "buffer_rune", (tobytes | scan($p)), "buffer_byte", (tobytes | scan("ååå" | tobytes; "b")) -"cbbcåååccåååcbc" -"orig" -[ - "å", - "å" -] -[ - "å", - "å" -] -"buffer_rune" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0| c3 a5 c3 a5 c3 a5 | ...... |.: raw bits 0x4-0x9.7 (6) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x00| c3 a5 c3 a5| ....|.: raw bits 0xc-0x11.7 (6) -0x10|c3 a5 |.. | -"buffer_byte" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0| c3 a5 c3 a5 c3 a5 | ...... |.: raw bits 0x4-0x9.7 (6) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x00| c3 a5 c3 a5| ....|.: raw bits 0xc-0x11.7 (6) -0x10|c3 a5 |.. | -"åååcbbc" -"orig" -[ - "å", - "å" -] -"buffer_rune" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|c3 a5 c3 a5 c3 a5 |...... |.: raw bits 0x0-0x5.7 (6) -"buffer_byte" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|c3 a5 c3 a5 c3 a5 |...... |.: raw bits 0x0-0x5.7 (6) -"cbbcååå" -"orig" -[ - "å", - "å" -] -"buffer_rune" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0| c3 a5 c3 a5 c3 a5| | ......| |.: raw bits 0x4-0x9.7 (6) -"buffer_byte" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0| c3 a5 c3 a5 c3 a5| | ......| |.: raw bits 0x4-0x9.7 (6) -null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", splits($p), "buffer_rune", (tobytes | splits($p)), "buffer_byte", (tobytes | splits("ååå" | tobytes; "b")) -"cbbcåååccåååcbc" -"orig" -"cbbc" -"cc" -"cbc" -"buffer_rune" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|63 62 62 63 |cbbc |.: raw bits 0x0-0x3.7 (4) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x00| 63 63 c3 a5 c3 a5| cc....|.: raw bits 0xa-0x11.7 (8) -0x10|c3 a5 |.. | - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x10| 63 62 63| | cbc| |.: raw bits 0x12-0x14.7 (3) -"buffer_byte" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|63 62 62 63 |cbbc |.: raw bits 0x0-0x3.7 (4) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x00| 63 63 c3 a5 c3 a5| cc....|.: raw bits 0xa-0x11.7 (8) -0x10|c3 a5 |.. | - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x10| 63 62 63| | cbc| |.: raw bits 0x12-0x14.7 (3) -"åååcbbc" -"orig" -"" -"cbbc" -"buffer_rune" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| - | | |.: raw bits 0x0-NA (0) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0| 63 62 62 63| | cbbc| |.: raw bits 0x6-0x9.7 (4) -"buffer_byte" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| - | | |.: raw bits 0x0-NA (0) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0| 63 62 62 63| | cbbc| |.: raw bits 0x6-0x9.7 (4) -"cbbcååå" -"orig" -"cbbc" -"" -"buffer_rune" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|63 62 62 63 |cbbc |.: raw bits 0x0-0x3.7 (4) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| - | | |.: raw bits 0xa-NA (0) -"buffer_byte" - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| -0x0|63 62 62 63 |cbbc |.: raw bits 0x0-0x3.7 (4) - |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| - | | |.: raw bits 0xa-NA (0) -null> "ååå" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "orig", split($p), "buffer_rune", (tobytes | split($p)), "buffer_byte", (tobytes | split("ååå" | tobytes; "b")) -"cbbcåååccåååcbc" -"orig" -[ - "cbbc", - "cc", - "cbc" -] -"buffer_rune" -[ - "cbbc", - "ccååå", - "cbc" -] -"buffer_byte" -[ - "cbbc", - "ccååå", - "cbc" -] -"åååcbbc" -"orig" -[ - "", - "cbbc" -] -"buffer_rune" -[ - "", - "cbbc" -] -"buffer_byte" -[ - "", - "cbbc" -] -"cbbcååå" -"orig" -[ - "cbbc", - "" -] -"buffer_rune" -[ - "cbbc", - "" -] -"buffer_byte" -[ - "cbbc", - "" -] -null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "buffer_rune", (tobytes | scan_toend($p)), "buffer_byte", (tobytes | scan_toend("ååå" | tobytes; "b")) +null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcååå" | ., "binary_rune", (tobytes | .[match($p; "g").offset:]), "binary_byte", (tobytes | .[match("ååå" | tobytes; "gb").offset:]) "cbbcåååccåååcbc" -"buffer_rune" +"binary_rune" |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x00| c3 a5 c3 a5 c3 a5 63 63 c3 a5 c3 a5| ......cc....|.: raw bits 0x4-0x14.7 (17) 0x10|c3 a5 63 62 63| |..cbc| | |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x00| c3 a5 c3 a5| ....|.: raw bits 0xc-0x14.7 (9) 0x10|c3 a5 63 62 63| |..cbc| | -"buffer_byte" +"binary_byte" |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x00| c3 a5 c3 a5 c3 a5 63 63 c3 a5 c3 a5| ......cc....|.: raw bits 0x4-0x14.7 (17) 0x10|c3 a5 63 62 63| |..cbc| | @@ -454,17 +292,17 @@ null> "å(?å)(å)" as $p | "cbbcåååccåååcbc", "åååcbbc", "cbbcåå 0x00| c3 a5 c3 a5| ....|.: raw bits 0xc-0x14.7 (9) 0x10|c3 a5 63 62 63| |..cbc| | "åååcbbc" -"buffer_rune" +"binary_rune" |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|c3 a5 c3 a5 c3 a5 63 62 62 63| |......cbbc| |.: raw bits 0x0-0x9.7 (10) -"buffer_byte" +"binary_byte" |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0|c3 a5 c3 a5 c3 a5 63 62 62 63| |......cbbc| |.: raw bits 0x0-0x9.7 (10) "cbbcååå" -"buffer_rune" +"binary_rune" |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0| c3 a5 c3 a5 c3 a5| | ......| |.: raw bits 0x4-0x9.7 (6) -"buffer_byte" +"binary_byte" |00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef| 0x0| c3 a5 c3 a5 c3 a5| | ......| |.: raw bits 0x4-0x9.7 (6) null> ^D