Skip to content

Commit

Permalink
Add *grep/1/2 and find/1/2
Browse files Browse the repository at this point in the history
  • Loading branch information
wader committed Oct 12, 2021
1 parent 3044fef commit e86b45b
Show file tree
Hide file tree
Showing 15 changed files with 437 additions and 121 deletions.
5 changes: 5 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@
"APIC",
"Arity",
"BCDU",
"bgrep",
"bzip",
"CCIT",
"chzyer",
"CLIUNICODE",
"CLLID",
"coef",
"colorjson",
"cond",
"cpus",
"ctxreadseeker",
"ctxstack",
Expand All @@ -30,6 +32,7 @@
"Exif",
"Exiter",
"FALLID",
"fgrep",
"fpbits",
"fqtest",
"ftyp",
Expand Down Expand Up @@ -60,6 +63,7 @@
"ipco",
"iprint",
"iprp",
"isempty",
"itchyny",
"ldflags",
"libavformat",
Expand Down Expand Up @@ -131,6 +135,7 @@
"unparam",
"Unsychronized",
"UTCID",
"vgrep",
"WEBP",
"Xiph",
"xrange"
Expand Down
4 changes: 4 additions & 0 deletions doc/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ notable is support for arbitrary-precision integers.
- `format_root/0` return root value of format for value
- `parent/0` return parent value
- `parents/0` output parents of value
- `grep/1`, `grep/2` recursively match value and buffer
- `vgrep/1`, `vgrep/2` recursively match value
- `bgrep/1`, `bgrep/2` recursively match buffer
- `fgrep/1`, `fgrep/2` recursively match field name
- `open` open file for reading
- `probe` or `decode` probe format and decode
- `mp3`, `matroska`, ..., `<name>`, `decode([name])` force decode as format
Expand Down
28 changes: 28 additions & 0 deletions internal/gojqextra/regexp.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package gojqextra

import (
"fmt"
"regexp"
"strings"
)

// from gojq, see https://github.com/itchyny/gojq/blob/main/LICENSE
func CompileRegexp(re, allowedFlags, flags string) (*regexp.Regexp, error) {
if strings.IndexFunc(flags, func(r rune) bool {
return !strings.ContainsAny(string([]rune{r}), allowedFlags)
}) >= 0 {
return nil, fmt.Errorf("unsupported regular expression flag: %q", flags)
}
re = strings.ReplaceAll(re, "(?<", "(?P<")
if strings.ContainsRune(flags, 'i') {
re = "(?i)" + re
}
if strings.ContainsRune(flags, 'm') {
re = "(?s)" + re
}
r, err := regexp.Compile(re)
if err != nil {
return nil, fmt.Errorf("invalid regular expression %q: %w", re, err)
}
return r, nil
}
88 changes: 88 additions & 0 deletions internal/ioextra/runereader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
package ioextra

import (
"io"
"math/bits"
"unicode/utf8"
)

// ByteRuneReader reads each by as a runes from a io.ReadSeeker
// ex: when used with regexp \u00ff code point will match byte 0xff and not the utf-8 encoded version of 0xff
type ByteRuneReader struct {
RS io.ReadSeeker
}

func (brr ByteRuneReader) ReadRune() (r rune, size int, err error) {
var b [1]byte
_, err = io.ReadFull(brr.RS, b[:])
if err != nil {
return 0, 0, err
}
r = rune(b[0])
return r, 1, nil
}

func (brr ByteRuneReader) Seek(offset int64, whence int) (int64, error) {
return brr.RS.Seek(offset, whence)
}

type RuneReadSeeker struct {
RS io.ReadSeeker
}

func utf8Bytes(b byte) int {
c := bits.LeadingZeros8(^b)
// 0b0xxxxxxx 1 byte
// 0b110xxxxx 2 byte
// 0b1110xxxx 3 byte
// 0b11110xxx 4 byte
switch c {
case 0:
return 1
case 2, 3, 4:
return c
default:
return -1
}
}

// RuneReadSeeker reads runs from a io.ReadSeeker
func (brr RuneReadSeeker) ReadRune() (r rune, size int, err error) {
var b [utf8.UTFMax]byte

_, err = io.ReadFull(brr.RS, b[0:1])
if err != nil {
return 0, 0, err
}

c := b[0]
if c < utf8.RuneSelf {
return rune(c), 1, nil
}

ss := utf8Bytes(b[0])
if ss < 0 {
return utf8.RuneError, 1, nil
}

_, err = io.ReadFull(brr.RS, b[1:ss])
if err != nil {
return 0, 0, err
}

r, s := utf8.DecodeRune(b[0:ss])
// possibly rewind if DecodeRune fails as there was a invalid multi byte code point
// TODO: better way that don't require seek back? buffer? one at a time?
d := ss - s
if d > 0 {
if _, err := brr.Seek(int64(-d), io.SeekCurrent); err != nil {
return 0, 0, err
}
}

return r, s, nil
}

func (brr RuneReadSeeker) Seek(offset int64, whence int) (int64, error) {
return brr.RS.Seek(offset, whence)
}
12 changes: 7 additions & 5 deletions pkg/bitio/bitio.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
package bitio

// TODO: should return int64?
// TODO: document len(p)/nBits, should be +1 for when not aligned

import (
"errors"
"io"
Expand Down Expand Up @@ -346,8 +343,8 @@ func (r *SectionBitReader) Read(p []byte) (n int, err error) {
}

func (r *SectionBitReader) Seek(offset int64, whence int) (int64, error) {
seekBytePos, err := r.SeekBits(offset*8, whence)
return seekBytePos * 8, err
seekBitsPos, err := r.SeekBits(offset*8, whence)
return seekBitsPos / 8, err
}

// TODO: smart, track index?
Expand Down Expand Up @@ -443,3 +440,8 @@ func (m *MultiBitReader) Read(p []byte) (n int, err error) {

return int(BitsByteCount(int64(n))), nil
}

func (m *MultiBitReader) Seek(offset int64, whence int) (int64, error) {
seekBitsPos, err := m.SeekBits(offset*8, whence)
return seekBitsPos / 8, err
}
14 changes: 6 additions & 8 deletions pkg/bitio/buffer.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
package bitio

// not concurrency safe as bitsBuf is reused

// TODO:
// cache pos, len
// inline for speed?
// F -> FLT?
// UTF16/UTF32

import (
"bytes"
"errors"
Expand All @@ -20,6 +12,7 @@ import (
type Buffer struct {
br interface {
io.Reader // both Reader and SectionBitReader implement io.Reader
io.Seeker
BitReadSeeker
BitReader
BitReaderAt
Expand Down Expand Up @@ -51,6 +44,7 @@ func NewBufferFromReadSeeker(rs io.ReadSeeker) (*Buffer, error) {

func NewBufferFromBitReadSeeker(br interface {
io.Reader
io.Seeker
BitReadSeeker
BitReaderAt
}) (*Buffer, error) {
Expand Down Expand Up @@ -175,6 +169,10 @@ func (b *Buffer) Read(p []byte) (n int, err error) {
return b.br.Read(p)
}

func (b *Buffer) Seek(offset int64, whence int) (int64, error) {
return b.br.Seek(offset, whence)
}

// BytesRange reads nBytes bytes starting bit position start
// Does not update current position.
// TODO: swap args
Expand Down
1 change: 1 addition & 0 deletions pkg/interp/buffer.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ func (bv BufferView) JQValueLength() interface{} {
func (bv BufferView) JQValueSliceLen() interface{} {
return bv.JQValueLength()
}

func (bv BufferView) JQValueIndex(index int) interface{} {
if index < 0 {
return ""
Expand Down

0 comments on commit e86b45b

Please sign in to comment.