Skip to content

Commit

Permalink
decode: Rework use of TryFieldReaderRangeFormat
Browse files Browse the repository at this point in the history
Handle failure better
  • Loading branch information
wader committed Nov 24, 2021
1 parent 4f0bf92 commit 9d116df
Show file tree
Hide file tree
Showing 7 changed files with 58 additions and 54 deletions.
63 changes: 32 additions & 31 deletions format/bzip2/bzip2.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ package bzip2
import (
"compress/bzip2"
"encoding/binary"
"errors"
"hash/crc32"
"io"
"math/bits"
Expand Down Expand Up @@ -108,40 +107,42 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} {

compressedStart := d.Pos()

readCompressedSize, uncompressedBB, dv, _, err := d.TryFieldReaderRangeFormat("uncompressed", 0, d.Len(), bzip2.NewReader, probeGroup, nil)
if dv == nil && errors.As(err, &decode.FormatsError{}) {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}
readCompressedSize, uncompressedBB, dv, _, _ := d.TryFieldReaderRangeFormat("uncompressed", 0, d.Len(), bzip2.NewReader, probeGroup, nil)
if uncompressedBB != nil {
if dv == nil {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}

blockCRC32W := crc32.NewIEEE()
if _, err := d.Copy(blockCRC32W, bitFlipReader{uncompressedBB.Clone()}); err != nil {
d.IOPanic(err)
}
blockCRC32N := bits.Reverse32(binary.BigEndian.Uint32(blockCRC32W.Sum(nil)))
_ = blockCRCValue.TryScalarFn(d.ValidateU(uint64(blockCRC32N)))
streamCRCN = blockCRC32N ^ ((streamCRCN << 1) | (streamCRCN >> 31))

// HACK: bzip2.NewReader will read from start of whole buffer and then we figure out compressedSize ourself
// "It is important to note that none of the fields within a StreamBlock or StreamFooter are necessarily byte-aligned"
const footerByteSize = 10
compressedSize := (readCompressedSize - compressedStart) - footerByteSize*8
for i := 0; i < 8; i++ {
d.SeekAbs(compressedStart + compressedSize)
if d.PeekBits(48) == footerMagic {
break
blockCRC32W := crc32.NewIEEE()
if _, err := d.Copy(blockCRC32W, bitFlipReader{uncompressedBB.Clone()}); err != nil {
d.IOPanic(err)
}
compressedSize--
}
d.SeekAbs(compressedStart)
blockCRC32N := bits.Reverse32(binary.BigEndian.Uint32(blockCRC32W.Sum(nil)))
_ = blockCRCValue.TryScalarFn(d.ValidateU(uint64(blockCRC32N)))
streamCRCN = blockCRC32N ^ ((streamCRCN << 1) | (streamCRCN >> 31))

// HACK: bzip2.NewReader will read from start of whole buffer and then we figure out compressedSize ourself
// "It is important to note that none of the fields within a StreamBlock or StreamFooter are necessarily byte-aligned"
const footerByteSize = 10
compressedSize := (readCompressedSize - compressedStart) - footerByteSize*8
for i := 0; i < 8; i++ {
d.SeekAbs(compressedStart + compressedSize)
if d.PeekBits(48) == footerMagic {
break
}
compressedSize--
}
d.SeekAbs(compressedStart)

d.FieldRawLen("compressed", compressedSize)
d.FieldRawLen("compressed", compressedSize)

d.FieldStruct("footer", func(d *decode.D) {
d.FieldU48("magic", d.AssertU(footerMagic), d.Hex)
// TODO: crc of block crcs
d.FieldU32("crc", d.Hex, d.ValidateU(uint64(streamCRCN)))
d.FieldRawLen("padding", int64(d.ByteAlignBits()))
})
d.FieldStruct("footer", func(d *decode.D) {
d.FieldU48("magic", d.AssertU(footerMagic), d.Hex)
// TODO: crc of block crcs
d.FieldU32("crc", d.Hex, d.ValidateU(uint64(streamCRCN)))
d.FieldRawLen("padding", int64(d.ByteAlignBits()))
})
}

// moreStreams = false
// }
Expand Down
24 changes: 12 additions & 12 deletions format/gzip/gzip.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package gz

import (
"compress/flate"
"errors"
"hash/crc32"
"io"

Expand Down Expand Up @@ -106,18 +105,19 @@ func gzDecode(d *decode.D, in interface{}) interface{} {
rFn = func(r io.Reader) io.Reader { return flate.NewReader(r) }
}

readCompressedSize, uncompressedBB, dv, _, err := d.TryFieldReaderRangeFormat("uncompressed", d.Pos(), d.BitsLeft(), rFn, probeFormat, nil)
if dv == nil && errors.As(err, &decode.FormatsError{}) {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
readCompressedSize, uncompressedBB, dv, _, _ := d.TryFieldReaderRangeFormat("uncompressed", d.Pos(), d.BitsLeft(), rFn, probeFormat, nil)
if uncompressedBB != nil {
if dv == nil {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}
d.FieldRawLen("compressed", readCompressedSize)
crc32W := crc32.NewIEEE()
if _, err := io.Copy(crc32W, uncompressedBB.Clone()); err != nil {
d.IOPanic(err)
}
d.FieldU32("crc32", d.ValidateUBytes(crc32W.Sum(nil)), d.Hex)
d.FieldU32("isize")
}
d.FieldRawLen("compressed", readCompressedSize)

crc32W := crc32.NewIEEE()
if _, err := io.Copy(crc32W, uncompressedBB.Clone()); err != nil {
d.IOPanic(err)
}
d.FieldU32("crc32", d.ValidateUBytes(crc32W.Sum(nil)), d.Hex)
d.FieldU32("isize")

return nil
}
10 changes: 7 additions & 3 deletions format/json/json.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ import (
"github.com/wader/fq/pkg/decode"
)

// TODO: should read multiple json values or just one?
// TODO: root not array/struct how to add unknown gaps?
// TODO: ranges not end up correct
// TODO: use jd.InputOffset() * 8?

func init() {
registry.MustRegister(decode.Format{
Name: format.JSON,
Expand All @@ -31,10 +36,9 @@ func decodeJSON(d *decode.D, in interface{}) interface{} {
default:
d.Fatalf("root not object or array")
}
// TODO: root not array/struct how to add unknown gaps?
// TODO: ranges not end up correct

d.Value.V = s
d.Value.Range.Len = jd.InputOffset() * 8
d.Value.Range.Len = d.Len()

return nil
}
4 changes: 2 additions & 2 deletions format/json/testdata/json.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ $ fq . /json.gz
0x00| 65 0a 08 61 | e..a | mtime: 1627916901
0x00| 00 | . | extra_flags: 0
0x00| 03 | . | os: "Unix" (3)
0x0|7b 22 61 22 3a 20 31 32 33 7d |{"a": 123} | uncompressed: {} (json)
0x0|7b 22 61 22 3a 20 31 32 33 7d 0a| |{"a": 123}.| | uncompressed: {} (json)
0x00| ab 56 4a 54 b2 52| .VJT.R| compressed: raw bits
0x10|30 34 32 ae e5 02 00 |042.... |
0x10| 20 ac d2 9c | ... | crc32: 0x9cd2ac20 (valid)
Expand Down Expand Up @@ -57,4 +57,4 @@ $ fq tovalue /json.gz
}
$ fq .uncompressed /json.gz
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|
0x0|7b 22 61 22 3a 20 31 32 33 7d |{"a": 123} |.uncompressed: {} (json)
0x0|7b 22 61 22 3a 20 31 32 33 7d 0a| |{"a": 123}.| |.uncompressed: {} (json)
5 changes: 2 additions & 3 deletions format/vorbis/vorbis_comment.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package vorbis

import (
"encoding/base64"
"errors"
"io"
"strings"

Expand Down Expand Up @@ -44,8 +43,8 @@ func commentDecode(d *decode.D, in interface{}) interface{} {

rFn := func(r io.Reader) io.Reader { return base64.NewDecoder(base64.StdEncoding, r) }

_, uncompressedBB, dv, _, err := d.TryFieldReaderRangeFormat("picture", userCommentStart+base64Offset, base64Len, rFn, flacPicture, nil)
if dv == nil && errors.As(err, &decode.FormatsError{}) {
_, uncompressedBB, dv, _, _ := d.TryFieldReaderRangeFormat("picture", userCommentStart+base64Offset, base64Len, rFn, flacPicture, nil)
if dv == nil && uncompressedBB != nil {
d.FieldRootBitBuf("picture", uncompressedBB)
}
}
Expand Down
5 changes: 2 additions & 3 deletions format/zip/zip.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package zip
import (
"bytes"
"compress/flate"
"errors"
"io"

"github.com/wader/fq/format"
Expand Down Expand Up @@ -290,8 +289,8 @@ func zipDecode(d *decode.D, in interface{}) interface{} {
}

if rFn != nil {
readCompressedSize, uncompressedBB, dv, _, err := d.TryFieldReaderRangeFormat("uncompressed", d.Pos(), compressedLimit, rFn, probeFormat, nil)
if dv == nil && errors.As(err, &decode.FormatsError{}) {
readCompressedSize, uncompressedBB, dv, _, _ := d.TryFieldReaderRangeFormat("uncompressed", d.Pos(), compressedLimit, rFn, probeFormat, nil)
if dv == nil && uncompressedBB != nil {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}
if compressedSize == 0 {
Expand Down
1 change: 1 addition & 0 deletions pkg/decode/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,7 @@ func (d *D) FieldFormatReaderLen(name string, nBits int64, fn func(r io.Reader)
return d.FieldFormatBitBuf(name, zbb, group, nil)
}

// TODO: too mant return values
func (d *D) TryFieldReaderRangeFormat(name string, startBit int64, nBits int64, fn func(r io.Reader) io.Reader, group Group, inArg interface{}) (int64, *bitio.Buffer, *Value, interface{}, error) {
bitLen := nBits
if bitLen == -1 {
Expand Down

0 comments on commit 9d116df

Please sign in to comment.