Skip to content

Commit

Permalink
interp: Cleanup binary regexp overloading and add explode
Browse files Browse the repository at this point in the history
Add explode for binary that explode into an array of the binary unit tobits
Remove scan_toend as it feels a bit weird and one can use tobytes | .[match(...):] instead
Some general cleanup also
  • Loading branch information
wader committed Mar 9, 2022
1 parent c298ed7 commit dff7e7d
Show file tree
Hide file tree
Showing 9 changed files with 362 additions and 312 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@
},
"[jq]": {
"editor.tabSize": 2,
"files.trimTrailingWhitespace": true
},
"fracturedjsonvsc.MaxInlineLength": 160,
}
3 changes: 2 additions & 1 deletion doc/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,8 @@ unary uses input and if more than one argument all as arguments ignoring the inp
- For `capture` the `.string` value is a binary.
- If pattern is a binary it will be matched literally and not as a regexp.
- If pattern is a binary or flags include "b" each input byte will be read as separate code points
- `scan_toend($v)`, `scan_toend($v; $flags)` works the same as `scan` but output binary are from start of match to
- String function are not overloaded to support binary for now as some of them are bahaviours that might be confusing.
- `explode` is overloaded to work with binary. Will explode into array of the unit of the binary.
end of binary.
instead of possibly multi-byte UTF-8 codepoints. This allows to match raw bytes. Ex: `match("\u00ff"; "b")`
will match the byte `0xff` and not the UTF-8 encoded codepoint for 255, `match("[^\u00ff]"; "b")` will match
Expand Down
29 changes: 14 additions & 15 deletions internal/difftest/difftest.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
// Package difftest implement test based on serialized string output
// Package difftest implement test based on diffing serialized string output
//
// User provides a function that gets a input string. It returns a output string
// based on the input somehow and a output path to file with content to compare it
// to or to write to if in write mode.
// If there is a difference test will fail with a diff.
// User provides a function that get a input path and input string and returns a
// output path and output string. Content of output path and output string is compared
// and if there is a difference the test fails with a diff.
//
// Test inputs are read from files matching Pattern from Path.
//
Expand Down Expand Up @@ -31,16 +30,6 @@ const green = "\x1b[32m"
const red = "\x1b[31m"
const reset = "\x1b[0m"

type Fn func(t *testing.T, path string, input string) (string, string, error)

type Options struct {
Path string
Pattern string
ColorDiff bool
WriteOutput bool
Fn Fn
}

func testDeepEqual(t tf, color bool, printfFn func(format string, args ...interface{}), expected string, actual string) {
t.Helper()

Expand Down Expand Up @@ -111,6 +100,16 @@ func Fatal(t tf, expected string, actual string) {
testDeepEqual(t, false, t.Fatalf, expected, actual)
}

type Fn func(t *testing.T, path string, input string) (string, string, error)

type Options struct {
Path string
Pattern string
ColorDiff bool
WriteOutput bool
Fn Fn
}

func TestWithOptions(t *testing.T, opts Options) {
t.Helper()

Expand Down
92 changes: 91 additions & 1 deletion pkg/interp/binary.jq
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,94 @@ def tobytesrange: _tobits(8; true; 0);
def tobits($pad): _tobits(1; false; $pad);
def tobytes($pad): _tobits(8; false; $pad);
def tobitsrange($pad): _tobits(1; true; $pad);
def tobytesrange($pad): _tobits(8; true; $pad);
def tobytesrange($pad): _tobits(8; true; $pad);

# same as regexp.QuoteMeta
def _re_quote_meta:
gsub("(?<c>[\\.\\+\\*\\?\\(\\)\\|\\[\\]\\{\\}\\^\\$\\)])"; "\\\(.c)");

# TODO:
# maybe implode, join. but what would it mean?
# "abc" | tobits | explode | implode would not work

# helper for overloading regex/string functions to support binary
def _binary_or_orig(bfn; fn):
( _exttype as $exttype
| if . == null or $exttype == "string" then fn
elif $exttype == "binary" then bfn
else
( . as $s
| try
(tobytesrange | bfn)
catch ($s | fn)
)
end
);

def _orig_explode: explode;
def explode: _binary_or_orig([.[range(.size)]]; _orig_explode);

def _orig_splits($val): splits($val);
def _orig_splits($regex; $flags): splits($regex; $flags);
def _splits_binary($regex; $flags):
( . as $b
# last null output is to do a last iteration that output from end of last match to end of binary
| foreach (_match_binary($regex; $flags), null) as $m (
{prev: null, curr: null};
( .prev = .curr
| .curr = $m
);
if .prev == null then $b[0:.curr.offset]
elif .curr == null then $b[.prev.offset+.prev.length:]
else $b[.prev.offset+.prev.length:.curr.offset+.curr.length]
end
)
);
def splits($val): _binary_or_orig(_splits_binary($val; "g"); _orig_splits($val));
def splits($regex; $flags): _binary_or_orig(_splits_binary($regex; "g"+$flags); _orig_splits($regex; $flags));

def _orig_split($val): split($val);
def _orig_split($regex; $flags): split($regex; $flags);
# split/1 splits on string not regexp
def split($val): [splits($val | _re_quote_meta)];
def split($regex; $flags): [splits($regex; $flags)];

def _orig_test($val): test($val);
def _orig_test($regex; $flags): test($regex; $flags);
def _test_binary($regex; $flags):
( isempty(_match_binary($regex; $flags))
| not
);
def test($val): _binary_or_orig(_test_binary($val; ""); _orig_test($val));
def test($regex; $flags): _binary_or_orig(_test_binary($regex; $flags); _orig_test($regex; $flags));

def _orig_match($val): match($val);
def _orig_match($regex; $flags): match($regex; $flags);
def match($val): _binary_or_orig(_match_binary($val); _orig_match($val));
def match($regex; $flags): _binary_or_orig(_match_binary($regex; $flags); _orig_match($regex; $flags));

def _orig_capture($val): capture($val);
def _orig_capture($regex; $flags): capture($regex; $flags);
def _capture_binary($regex; $flags):
( . as $b
| _match_binary($regex; $flags)
| .captures
| map(
( select(.name)
| {key: .name, value: .string}
)
)
| from_entries
);
def capture($val): _binary_or_orig(_capture_binary($val; ""); _orig_capture($val));
def capture($regex; $flags): _binary_or_orig(_capture_binary($regex; $flags); _orig_capture($regex; $flags));

def _orig_scan($val): scan($val);
def _orig_scan($regex; $flags): scan($regex; $flags);
def _scan_binary($regex; $flags):
( . as $b
| _match_binary($regex; $flags)
| $b[.offset:.offset+.length]
);
def scan($val): _binary_or_orig(_scan_binary($val; "g"); _orig_scan($val));
def scan($regex; $flags): _binary_or_orig(_scan_binary($regex; "g"+$flags); _orig_scan($regex; $flags));
1 change: 0 additions & 1 deletion pkg/interp/interp.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ import (
//go:embed ansi.jq
//go:embed binary.jq
//go:embed decode.jq
//go:embed match.jq
//go:embed funcs.jq
//go:embed grep.jq
//go:embed args.jq
Expand Down
1 change: 0 additions & 1 deletion pkg/interp/interp.jq
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ include "internal";
include "options";
include "binary";
include "decode";
include "match";
include "funcs";
include "grep";
include "args";
Expand Down
99 changes: 0 additions & 99 deletions pkg/interp/match.jq

This file was deleted.

Loading

0 comments on commit dff7e7d

Please sign in to comment.