Skip to content

Commit

Permalink
interp: Add buffer match support to find and grep
Browse files Browse the repository at this point in the history
  • Loading branch information
wader committed Oct 16, 2021
1 parent 984ba1a commit 7298a4c
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 22 deletions.
15 changes: 10 additions & 5 deletions doc/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,11 +142,16 @@ notable is support for arbitrary-precision integers.
- `format_root/0` return root value of format for value
- `parent/0` return parent value
- `parents/0` output parents of value
- `grep/1`, `grep/2` recursively match value and buffer
- `vgrep/1`, `vgrep/2` recursively match value
- `bgrep/1`, `bgrep/2` recursively match buffer
- `fgrep/1`, `fgrep/2` recursively match field name
- `find/1`, `find/2` match in buffer and output match buffers
- `find` and `grep` all take 1 or 2 arguments. First is a scalar to match, where a string is
treated as a regexp. A buffer will be matches exact bytes. Second argument is regexp
flags with addition to "b" which will treat each byte in the input buffer as a rune, this
makes it possible to match exact bytes, ex: `find("\u00ff"; b")` will match the byte `0xff` and not
the UTF-8 codepoint `0xff`.
- `find/1`, `find/2` match in buffer and output match buffers
- `grep/1`, `grep/2` recursively match value and buffer
- `vgrep/1`, `vgrep/2` recursively match value
- `bgrep/1`, `bgrep/2` recursively match buffer
- `fgrep/1`, `fgrep/2` recursively match field name
- `open` open file for reading
- `probe` or `decode` probe format and decode
- `mp3`, `matroska`, ..., `<name>`, `decode([name])` force decode as format
Expand Down
1 change: 1 addition & 0 deletions pkg/bitio/bitio.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ func Copy(dst BitWriter, src BitReader) (n int64, err error) {
return CopyBuffer(dst, src, nil)
}

// BitsByteCount returns smallest amount of bytes to fit nBits bits
func BitsByteCount(nBits int64) int64 {
n := nBits / 8
if nBits%8 != 0 {
Expand Down
20 changes: 16 additions & 4 deletions pkg/interp/funcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -817,12 +817,24 @@ func (i *Interp) find(c interface{}, a []interface{}) gojq.Iter {
}

var re string
re, ok = a[0].(string)
if !ok {
return gojq.NewIter(gojqextra.FuncTypeError{Name: "find", Typ: "string"})
var flags string

switch a0 := a[0].(type) {
case string:
re = a0
default:
reBuf, err := toBytes(a0)
if err != nil {
return gojq.NewIter(err)
}
var reRs []rune
for _, b := range reBuf {
reRs = append(reRs, rune(b))
}
flags = "b"
re = string(reRs)
}

var flags string
if len(a) > 1 {
flags, ok = a[1].(string)
if !ok {
Expand Down
21 changes: 14 additions & 7 deletions pkg/interp/grep.jq
Original file line number Diff line number Diff line change
Expand Up @@ -15,46 +15,53 @@ def _value_grep_string_cond($v; $flags):
else false
end
)? // false;

def _value_grep_other_cond($v; $flags):
( _tovalue
| . == $v
)? // false;

def vgrep($v; $flags):
_grep(
$v;
_is_scalar;
_value_grep_string_cond($v; $flags);
_value_grep_other_cond($v; $flags)
);

def vgrep($v): vgrep($v; "");

def _buf_grep_string_cond($v; $flags):
def _buf_grep_any_cond($v; $flags):
(isempty(find($v; $flags)) | not)? // false;
def bgrep($v; $flags):
_grep(
$v;
_is_scalar;
_buf_grep_string_cond($v; $flags);
empty
_buf_grep_any_cond($v; $flags);
_buf_grep_any_cond($v; $flags)
);

def bgrep($v): bgrep($v; "");

def grep($v; $flags):
_grep(
$v;
_is_scalar;
_buf_grep_string_cond($v; $flags) or _value_grep_string_cond($v; $flags);
_value_grep_other_cond($v; $flags)
_buf_grep_any_cond($v; $flags) or _value_grep_string_cond($v; $flags);
_buf_grep_any_cond($v; $flags) or _value_grep_other_cond($v; $flags)
);

def grep($v): grep($v; "");

def _field_grep_string_cond($v; $flags):
(has("_name") and (._name | test($v; $flags)))? // false;
(._name | test($v; $flags))? // false;

def fgrep($v; $flags):
_grep(
$v;
true;
_is_decode_value;
_field_grep_string_cond($v; $flags);
empty
);

def fgrep($v): fgrep($v; "");
10 changes: 8 additions & 2 deletions pkg/interp/interp.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ func toBigInt(v interface{}) (*big.Int, error) {

func toBytes(v interface{}) ([]byte, error) {
switch v := v.(type) {
// TODO: remove?
case []byte:
return v, nil
default:
Expand Down Expand Up @@ -307,10 +308,15 @@ func toBufferEx(v interface{}, inArray bool) (*bitio.Buffer, error) {
}

if inArray {
b := [1]byte{byte(bi.Uint64())}
if bi.Cmp(big.NewInt(255)) > 0 || bi.Cmp(big.NewInt(0)) < 0 {
return nil, fmt.Errorf("buffer byte list must be bytes (0-255) got %v", bi)
}
n := bi.Uint64()
b := [1]byte{byte(n)}
return bitio.NewBufferFromBytes(b[:], -1), nil
}

// TODO: how should this work? "0xf | tobytes" 4bits or 8bits? now 4
padBefore := (8 - (bi.BitLen() % 8)) % 8
bb, err := bitio.NewBufferFromBytes(bi.Bytes(), -1).BitBufRange(int64(padBefore), int64(bi.BitLen()))
if err != nil {
Expand All @@ -319,7 +325,7 @@ func toBufferEx(v interface{}, inArray bool) (*bitio.Buffer, error) {
return bb, nil
case []interface{}:
var rr []bitio.BitReadAtSeeker
// TODO: optimize byte array case
// TODO: optimize byte array case, flatten into one slice
for _, e := range vv {
eBB, eErr := toBufferEx(e, true)
if eErr != nil {
Expand Down
15 changes: 15 additions & 0 deletions pkg/interp/testdata/buffer.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,18 @@ $ fq -d mp3 '.frames[0].padding | ("", "md5", "base64", "snippet") as $f | toval
"ca9c491ac66b2c62500882e93f3719a8"
"AAAAAAA="
"<5>AAAAAAA="
$ fq -d mp3 -i . /test.mp3
mp3> [1, 2, 3] | tobytes
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|01 02 03| |...| |.: none 0x0-0x2.7 (3)
mp3> [1, 2, 3, [1, 2, 3]] | tobytes
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|01 02 03 01 02 03| |......| |.: none 0x0-0x5.7 (6)
mp3> [1, 2, 3, [1, 2, 3], .headers[0].magic] | tobytes
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|01 02 03 01 02 03 49 44 33| |......ID3| |.: none 0x0-0x8.7 (9)
mp3> [-1] | tobytes
error: buffer byte list must be bytes (0-255) got -1
mp3> [256] | tobytes
error: buffer byte list must be bytes (0-255) got 256
mp3> ^D
17 changes: 13 additions & 4 deletions pkg/interp/testdata/grep.fqtest
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
$ fq -i -d mp3 . /test.mp3
mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x20| 40| @|.frames[0].header.sample_rate: 44100
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
Expand All @@ -14,7 +14,9 @@ mp3> grep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x20| 40| @|.frames[0].header.sample_rate: 44100
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
Expand All @@ -29,10 +31,10 @@ mp3> vgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
mp3> fgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
mp3> fgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff", [0x49, 0x44])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
Expand All @@ -41,6 +43,8 @@ mp3> bgrep(44100, "ID", "^ID3$", "^ID.?$", "Info", "magic", "\u00ff")
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x40| 49 6e 66 6f | Info |.frames[0].xing.header: "Info"
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|49 44 33 |ID3 |.headers[0].magic: "ID3" (Correct)
mp3> "64ff65ff66" | hex | bgrep("\u00ff"; "b")
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|64 ff 65 ff 66| |d.e.f| |.: none 0x0-0x4.7 (5)
Expand All @@ -59,4 +63,9 @@ mp3> "aöaöa" | find("\u00c3"; "b")
0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3)
mp3> "aöaöa" | find([0xc3])
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0| c3 b6 61 c3 b6 61| | ..a..a| |.: none 0x1-0x6.7 (6)
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0| c3 b6 61| | ..a| |.: none 0x4-0x6.7 (3)
mp3> ^D

0 comments on commit 7298a4c

Please sign in to comment.