Skip to content

Commit

Permalink
Merge pull request #188 from wader/binary-regexp-explode-clenaup
Browse files Browse the repository at this point in the history
interp: Cleanup binary regexp overloading and add explode
  • Loading branch information
wader authored Mar 9, 2022
2 parents c298ed7 + dff7e7d commit b0c255f
Show file tree
Hide file tree
Showing 9 changed files with 362 additions and 312 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@
},
"[jq]": {
"editor.tabSize": 2,
"files.trimTrailingWhitespace": true
},
"fracturedjsonvsc.MaxInlineLength": 160,
}
3 changes: 2 additions & 1 deletion doc/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,8 @@ unary uses input and if more than one argument all as arguments ignoring the inp
- For `capture` the `.string` value is a binary.
- If pattern is a binary it will be matched literally and not as a regexp.
- If pattern is a binary or flags include "b" each input byte will be read as separate code points
- `scan_toend($v)`, `scan_toend($v; $flags)` works the same as `scan` but output binary are from start of match to
- String function are not overloaded to support binary for now as some of them are bahaviours that might be confusing.
- `explode` is overloaded to work with binary. Will explode into array of the unit of the binary.
end of binary.
instead of possibly multi-byte UTF-8 codepoints. This allows to match raw bytes. Ex: `match("\u00ff"; "b")`
will match the byte `0xff` and not the UTF-8 encoded codepoint for 255, `match("[^\u00ff]"; "b")` will match
Expand Down
29 changes: 14 additions & 15 deletions internal/difftest/difftest.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
// Package difftest implement test based on serialized string output
// Package difftest implement test based on diffing serialized string output
//
// User provides a function that gets a input string. It returns a output string
// based on the input somehow and a output path to file with content to compare it
// to or to write to if in write mode.
// If there is a difference test will fail with a diff.
// User provides a function that get a input path and input string and returns a
// output path and output string. Content of output path and output string is compared
// and if there is a difference the test fails with a diff.
//
// Test inputs are read from files matching Pattern from Path.
//
Expand Down Expand Up @@ -31,16 +30,6 @@ const green = "\x1b[32m"
const red = "\x1b[31m"
const reset = "\x1b[0m"

type Fn func(t *testing.T, path string, input string) (string, string, error)

type Options struct {
Path string
Pattern string
ColorDiff bool
WriteOutput bool
Fn Fn
}

func testDeepEqual(t tf, color bool, printfFn func(format string, args ...interface{}), expected string, actual string) {
t.Helper()

Expand Down Expand Up @@ -111,6 +100,16 @@ func Fatal(t tf, expected string, actual string) {
testDeepEqual(t, false, t.Fatalf, expected, actual)
}

type Fn func(t *testing.T, path string, input string) (string, string, error)

type Options struct {
Path string
Pattern string
ColorDiff bool
WriteOutput bool
Fn Fn
}

func TestWithOptions(t *testing.T, opts Options) {
t.Helper()

Expand Down
92 changes: 91 additions & 1 deletion pkg/interp/binary.jq
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,94 @@ def tobytesrange: _tobits(8; true; 0);
def tobits($pad): _tobits(1; false; $pad);
def tobytes($pad): _tobits(8; false; $pad);
def tobitsrange($pad): _tobits(1; true; $pad);
def tobytesrange($pad): _tobits(8; true; $pad);
def tobytesrange($pad): _tobits(8; true; $pad);

# same as regexp.QuoteMeta
def _re_quote_meta:
gsub("(?<c>[\\.\\+\\*\\?\\(\\)\\|\\[\\]\\{\\}\\^\\$\\)])"; "\\\(.c)");

# TODO:
# maybe implode, join. but what would it mean?
# "abc" | tobits | explode | implode would not work

# helper for overloading regex/string functions to support binary
def _binary_or_orig(bfn; fn):
( _exttype as $exttype
| if . == null or $exttype == "string" then fn
elif $exttype == "binary" then bfn
else
( . as $s
| try
(tobytesrange | bfn)
catch ($s | fn)
)
end
);

def _orig_explode: explode;
def explode: _binary_or_orig([.[range(.size)]]; _orig_explode);

def _orig_splits($val): splits($val);
def _orig_splits($regex; $flags): splits($regex; $flags);
def _splits_binary($regex; $flags):
( . as $b
# last null output is to do a last iteration that output from end of last match to end of binary
| foreach (_match_binary($regex; $flags), null) as $m (
{prev: null, curr: null};
( .prev = .curr
| .curr = $m
);
if .prev == null then $b[0:.curr.offset]
elif .curr == null then $b[.prev.offset+.prev.length:]
else $b[.prev.offset+.prev.length:.curr.offset+.curr.length]
end
)
);
def splits($val): _binary_or_orig(_splits_binary($val; "g"); _orig_splits($val));
def splits($regex; $flags): _binary_or_orig(_splits_binary($regex; "g"+$flags); _orig_splits($regex; $flags));

def _orig_split($val): split($val);
def _orig_split($regex; $flags): split($regex; $flags);
# split/1 splits on string not regexp
def split($val): [splits($val | _re_quote_meta)];
def split($regex; $flags): [splits($regex; $flags)];

def _orig_test($val): test($val);
def _orig_test($regex; $flags): test($regex; $flags);
def _test_binary($regex; $flags):
( isempty(_match_binary($regex; $flags))
| not
);
def test($val): _binary_or_orig(_test_binary($val; ""); _orig_test($val));
def test($regex; $flags): _binary_or_orig(_test_binary($regex; $flags); _orig_test($regex; $flags));

def _orig_match($val): match($val);
def _orig_match($regex; $flags): match($regex; $flags);
def match($val): _binary_or_orig(_match_binary($val); _orig_match($val));
def match($regex; $flags): _binary_or_orig(_match_binary($regex; $flags); _orig_match($regex; $flags));

def _orig_capture($val): capture($val);
def _orig_capture($regex; $flags): capture($regex; $flags);
def _capture_binary($regex; $flags):
( . as $b
| _match_binary($regex; $flags)
| .captures
| map(
( select(.name)
| {key: .name, value: .string}
)
)
| from_entries
);
def capture($val): _binary_or_orig(_capture_binary($val; ""); _orig_capture($val));
def capture($regex; $flags): _binary_or_orig(_capture_binary($regex; $flags); _orig_capture($regex; $flags));

def _orig_scan($val): scan($val);
def _orig_scan($regex; $flags): scan($regex; $flags);
def _scan_binary($regex; $flags):
( . as $b
| _match_binary($regex; $flags)
| $b[.offset:.offset+.length]
);
def scan($val): _binary_or_orig(_scan_binary($val; "g"); _orig_scan($val));
def scan($regex; $flags): _binary_or_orig(_scan_binary($regex; "g"+$flags); _orig_scan($regex; $flags));
1 change: 0 additions & 1 deletion pkg/interp/interp.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ import (
//go:embed ansi.jq
//go:embed binary.jq
//go:embed decode.jq
//go:embed match.jq
//go:embed funcs.jq
//go:embed grep.jq
//go:embed args.jq
Expand Down
1 change: 0 additions & 1 deletion pkg/interp/interp.jq
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ include "internal";
include "options";
include "binary";
include "decode";
include "match";
include "funcs";
include "grep";
include "args";
Expand Down
99 changes: 0 additions & 99 deletions pkg/interp/match.jq

This file was deleted.

Loading

0 comments on commit b0c255f

Please sign in to comment.