Skip to content

Commit

Permalink
decode: Fix bitbuf root handling a bit
Browse files Browse the repository at this point in the history
  • Loading branch information
wader committed Nov 18, 2021
1 parent c083a9e commit 6fba1a8
Show file tree
Hide file tree
Showing 11 changed files with 131 additions and 104 deletions.
50 changes: 35 additions & 15 deletions format/bzip2/bzip2.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,17 @@ func init() {
Name: format.BZIP2,
Description: "bzip2 compression",
Groups: []string{format.PROBE},
DecodeFn: gzDecode,
DecodeFn: bzip2Decode,
Dependencies: []decode.Dependency{
{Names: []string{format.PROBE}, Group: &probeGroup},
},
})
}

func gzDecode(d *decode.D, in interface{}) interface{} {
const blockMagic = 0x31_41_59_26_53_59
const footerMagic = 0x17_72_45_38_50_90

func bzip2Decode(d *decode.D, in interface{}) interface{} {
// moreStreams := true

// d.FieldArray("streams", func(d *decode.D) {
Expand All @@ -43,13 +46,12 @@ func gzDecode(d *decode.D, in interface{}) interface{} {
d.FieldU8("hundred_k_blocksize")

d.FieldStruct("block", func(d *decode.D) {
const blockHeaderMagic = 0x31_41_59_26_53_59
// if d.PeekBits(48) != blockHeaderMagic {
// moreStreams = false
// return
// }
d.FieldU48("compressed_magic", d.AssertU(blockHeaderMagic), d.Hex)
d.FieldU32("crc")
d.FieldU48("magic", d.AssertU(blockMagic), d.Hex)
d.FieldU32("crc", d.Hex)
d.FieldU1("randomised")
d.FieldU24("origptr")
d.FieldU16("syncmapl1")
Expand Down Expand Up @@ -87,11 +89,13 @@ func gzDecode(d *decode.D, in interface{}) interface{} {
})
})

compressedStart := d.Pos()

compressedBB := d.BitBufRange(0, d.Len())
deflateR := bzip2.NewReader(compressedBB)
uncompressed := &bytes.Buffer{}
crc32W := crc32.NewIEEE()
if _, err := decode.Copy(d, io.MultiWriter(uncompressed, crc32W), deflateR); err != nil {
if _, err := d.Copy(io.MultiWriter(uncompressed, crc32W), deflateR); err != nil {
d.Fatalf(err.Error())
}
// calculatedCRC32 := crc32W.Sum(nil)
Expand All @@ -101,16 +105,32 @@ func gzDecode(d *decode.D, in interface{}) interface{} {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}

// if calculatedCRC32 != nil {
// d.FieldChecksumLen("crc32", 32, calculatedCRC32, decode.LittleEndian)
// } else {
// d.FieldU32LE("crc32")
// }
p, err := compressedBB.Pos()
if err != nil {
d.IOPanic((err))
}

// d.FieldU48("footer_magic")
// d.FieldU32("crc")
// byte align padding
// })
// TODO: compressedSize is a horrible hack for now
// "It is important to note that none of the fields within a StreamBlock or StreamFooter are necessarily byte-aligned"
const footerByteSize = 10
compressedSize := (p - compressedStart) - footerByteSize*8
for i := 0; i < 8; i++ {
d.SeekAbs(compressedStart + compressedSize)
if d.PeekBits(48) == footerMagic {
break
}
compressedSize--
}
d.SeekAbs(compressedStart)

d.FieldRawLen("compressed", compressedSize)

d.FieldStruct("footer", func(d *decode.D) {
d.FieldU48("magic", d.AssertU(footerMagic), d.Hex)
// TODO: crc of block crcs
d.FieldU32("crc", d.Hex)
d.FieldRawLen("padding", int64(d.ByteAlignBits()))
})

// moreStreams = false

Expand Down
14 changes: 9 additions & 5 deletions format/bzip2/testdata/test.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,11 @@
$ fq -d bzip2 verbose /test.bz2
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.: {} /test.bz2 (bzip2) 0x0-0x2c.7 (45)
0x00|42 5a |BZ | magic: "BZ" (valid) 0x0-0x1.7 (2)
0x0|74 65 73 74 0a| |test.| | uncompressed: raw bits 0x0-0x4.7 (5)
0x00| 68 | h | version: 104 0x2-0x2.7 (1)
0x00| 39 | 9 | hundred_k_blocksize: 57 0x3-0x3.7 (1)
| | | block: {} 0x4-0x1c.7 (25)
0x00| 31 41 59 26 53 59 | 1AY&SY | compressed_magic: 0x314159265359 (valid) 0x4-0x9.7 (6)
0x00| cc c3 71 d4 | ..q. | crc: 3435360724 0xa-0xd.7 (4)
0x00| 31 41 59 26 53 59 | 1AY&SY | magic: 0x314159265359 (valid) 0x4-0x9.7 (6)
0x00| cc c3 71 d4 | ..q. | crc: 0xccc371d4 0xa-0xd.7 (4)
0x00| 00 | . | randomised: 0 0xe-0xe (0.1)
0x00| 00 00| ..| origptr: 4 0xe.1-0x11 (3)
0x10|02 41 |.A |
Expand All @@ -20,5 +19,10 @@ $ fq -d bzip2 verbose /test.bz2
| | | trees: [2] 0x1b.4-0x1c.7 (1.4)
0x10| 21 9a | !. | [0]: tree 3 0x1b.4-0x1c.1 (0.6)
0x10| 9a | . | [1]: tree 13 0x1c.2-0x1c.7 (0.6)
0x10| 68 33 4d| h3M| unknown0: raw bits 0x1d-0x2c.7 (16)
0x20|19 97 8b b9 22 9c 28 48 66 61 b8 ea 00| |....".(Hfa...| |
0x0|74 65 73 74 0a| |test.| | uncompressed: raw bits 0x0-0x4.7 (5)
0x10| 68 33 4d| h3M| compressed: raw bits 0x1d-0x22 (5.1)
0x20|19 97 8b |... |
| | | footer: {} 0x22.1-0x2c.7 (10.7)
0x20| 8b b9 22 9c 28 48 66 | ..".(Hf | magic: 0x177245385090 (valid) 0x22.1-0x28 (6)
0x20| 66 61 b8 ea 00| | fa...| | crc: 0xccc371d4 0x28.1-0x2c (4)
0x20| 00| | .| | padding: raw bits 0x2c.1-0x2c.7 (0.7)
10 changes: 4 additions & 6 deletions format/flac/testdata/picture_seek_gain.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,8 @@ $ fq -d flac verbose /picture_seek_gain.flac
0x01f0| 45 | E | private: false 0x1f6.3-0x1f6.3 (0.1)
0x01f0| 58 | X | reserved: true 0x1f7.3-0x1f7.3 (0.1)
0x01f0| 74 | t | safe_to_copy: true 0x1f8.3-0x1f8.3 (0.1)
0x01f0| 64 61 74 65 3a 63 72| date:cr| keyword: "date:create" 0x1f9-0x203.7 (11)
0x0200|65 61 74 65 |eate |
0x0200| 00 | . | null: "\x00" 0x204-0x204.7 (1)
0x01f0| 64 61 74 65 3a 63 72| date:cr| keyword: "date:create" 0x1f9-0x204.7 (12)
0x0200|65 61 74 65 00 |eate. |
0x0200| 32 30 32 31 2d 30 32 2d 32 37 54| 2021-02-27T| text: "2021-02-27T22:59:28+00:00" 0x205-0x21d.7 (25)
0x0210|32 32 3a 35 39 3a 32 38 2b 30 30 3a 30 30 |22:59:28+00:00 |
0x0210| 3b 7f| ;.| crc: "3b7fd305" (raw bits) (valid) 0x21e-0x221.7 (4)
Expand All @@ -183,9 +182,8 @@ $ fq -d flac verbose /picture_seek_gain.flac
0x0220| 45 | E | private: false 0x227.3-0x227.3 (0.1)
0x0220| 58 | X | reserved: true 0x228.3-0x228.3 (0.1)
0x0220| 74 | t | safe_to_copy: true 0x229.3-0x229.3 (0.1)
0x0220| 64 61 74 65 3a 6d| date:m| keyword: "date:modify" 0x22a-0x234.7 (11)
0x0230|6f 64 69 66 79 |odify |
0x0230| 00 | . | null: "\x00" 0x235-0x235.7 (1)
0x0220| 64 61 74 65 3a 6d| date:m| keyword: "date:modify" 0x22a-0x235.7 (12)
0x0230|6f 64 69 66 79 00 |odify. |
0x0230| 32 30 32 31 2d 30 32 2d 32 37| 2021-02-27| text: "2021-02-27T22:59:28+00:00" 0x236-0x24e.7 (25)
0x0240|54 32 32 3a 35 39 3a 32 38 2b 30 30 3a 30 30 |T22:59:28+00:00 |
0x0240| 4a| J| crc: "4a226bb9" (raw bits) (valid) 0x24f-0x252.7 (4)
Expand Down
2 changes: 1 addition & 1 deletion format/gzip/testdata/test.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
$ fq -d gzip verbose /test.gz
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.: {} /test.gz (gzip) 0x0-0x18.7 (25)
0x00|1f 8b |.. | identification: raw bits (valid) 0x0-0x1.7 (2)
0x0|74 65 73 74 0a| |test.| | uncompressed: raw bits 0x0-0x4.7 (5)
0x00| 08 | . | compression_method: "deflate" (8) 0x2-0x2.7 (1)
| | | flags: {} 0x3-0x3.7 (1)
0x00| 00 | . | text: false 0x3-0x3 (0.1)
Expand All @@ -16,5 +15,6 @@ $ fq -d gzip verbose /test.gz
0x00| 03 | . | os: "Unix" (3) 0x9-0x9.7 (1)
0x00| 2b 49 2d 2e e1 02| +I-...| compressed: raw bits 0xa-0x10.7 (7)
0x10|00 |. |
0x0|74 65 73 74 0a| |test.| | uncompressed: raw bits 0x0-0x4.7 (5)
0x10| c6 35 b9 3b | .5.; | crc32: "c635b93b" (raw bits) (valid) 0x11-0x14.7 (4)
0x10| 05 00 00 00| | ....| | isize: 5 0x15-0x18.7 (4)
10 changes: 4 additions & 6 deletions format/id3/testdata/utf16-apic.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,8 @@ $ fq -d id3v2 verbose /utf16-apic
0x0c0| 45 | E | private: false 0xc6.3-0xc6.3 (0.1)
0x0c0| 58 | X | reserved: true 0xc7.3-0xc7.3 (0.1)
0x0c0| 74 | t | safe_to_copy: true 0xc8.3-0xc8.3 (0.1)
0x0c0| 64 61 74 65 3a 63 72| date:cr| keyword: "date:create" 0xc9-0xd3.7 (11)
0x0d0|65 61 74 65 |eate |
0x0d0| 00 | . | null: "\x00" 0xd4-0xd4.7 (1)
0x0c0| 64 61 74 65 3a 63 72| date:cr| keyword: "date:create" 0xc9-0xd4.7 (12)
0x0d0|65 61 74 65 00 |eate. |
0x0d0| 32 30 32 31 2d 30 35 2d 32 30 54| 2021-05-20T| text: "2021-05-20T20:53:36+00:00" 0xd5-0xed.7 (25)
0x0e0|32 30 3a 35 33 3a 33 36 2b 30 30 3a 30 30 |20:53:36+00:00 |
0x0e0| 67 53| gS| crc: "6753fe7a" (raw bits) (valid) 0xee-0xf1.7 (4)
Expand All @@ -134,9 +133,8 @@ $ fq -d id3v2 verbose /utf16-apic
0x0f0| 45 | E | private: false 0xf7.3-0xf7.3 (0.1)
0x0f0| 58 | X | reserved: true 0xf8.3-0xf8.3 (0.1)
0x0f0| 74 | t | safe_to_copy: true 0xf9.3-0xf9.3 (0.1)
0x0f0| 64 61 74 65 3a 6d| date:m| keyword: "date:modify" 0xfa-0x104.7 (11)
0x100|6f 64 69 66 79 |odify |
0x100| 00 | . | null: "\x00" 0x105-0x105.7 (1)
0x0f0| 64 61 74 65 3a 6d| date:m| keyword: "date:modify" 0xfa-0x105.7 (12)
0x100|6f 64 69 66 79 00 |odify. |
0x100| 32 30 32 31 2d 30 35 2d 32 30| 2021-05-20| text: "2021-05-20T20:53:36+00:00" 0x106-0x11e.7 (25)
0x110|54 32 30 3a 35 33 3a 33 36 2b 30 30 3a 30 30 |T20:53:36+00:00 |
0x110| 16| .| crc: "160e46c6" (raw bits) (valid) 0x11f-0x122.7 (4)
Expand Down
2 changes: 1 addition & 1 deletion format/json/testdata/json.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@ $ fq .b[1] /test.json
$ fq . /json.gz
|00 01 02 03 04 05 06 07 08 09 0a 0b 0c 0d 0e 0f|0123456789abcdef|.: {} /json.gz (gzip)
0x00|1f 8b |.. | identification: raw bits (valid)
0x0|7b 22 61 22 3a 20 31 32 33 7d |{"a": 123} | uncompressed: {} (json)
0x00| 08 | . | compression_method: "deflate" (8)
0x00| 00 | . | flags: {}
0x00| 65 0a 08 61 | e..a | mtime: 1627916901
0x00| 00 | . | extra_flags: 0
0x00| 03 | . | os: "Unix" (3)
0x00| ab 56 4a 54 b2 52| .VJT.R| compressed: raw bits
0x10|30 34 32 ae e5 02 00 |042.... |
0x0|7b 22 61 22 3a 20 31 32 33 7d |{"a": 123} | uncompressed: {} (json)
0x10| 20 ac d2 9c | ... | crc32: "20acd29c" (raw bits) (valid)
0x10| 0b 00 00 00| | ....|| isize: 11
$ fq tovalue /json.gz
Expand Down
51 changes: 20 additions & 31 deletions format/png/png.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ import (
"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/decode"
"github.com/wader/fq/pkg/ranges"
)

var iccProfileFormat decode.Group
Expand Down Expand Up @@ -101,49 +100,39 @@ func pngDecode(d *decode.D, in interface{}) interface{} {
1: "Adam7 interlace",
}))
case "tEXt":
// TODO: latin1
keywordLen := int(d.PeekFindByte(0, 80))
d.FieldUTF8("keyword", keywordLen)
d.FieldUTF8("null", 1)
d.FieldUTF8("text", chunkLength-keywordLen-1)
d.FieldUTF8Null("keyword")
d.FieldUTF8("text", int(d.BitsLeft())/8)
case "zTXt":
// TODO: latin1
keywordLen := int(d.PeekFindByte(0, 80))
d.FieldUTF8("keyword", keywordLen)
d.FieldUTF8("null", 1)
d.FieldUTF8Null("keyword")
compressionMethod := d.FieldU8("compression_method", d.MapUToStrSym(compressionNames))
// +2 to skip null and compression_method
dataLen := (chunkLength - (keywordLen + 2)) * 8
dataLen := d.BitsLeft()

// TODO: make nicer
d.FieldRawLen("compressed", dataLen)
d.SeekRel(-dataLen)

switch compressionMethod {
case compressionDeflate:
dd := d.FieldStruct("data", func(d *decode.D) {
d.FieldFormatReaderLen("uncompressed", int64(dataLen), zlib.NewReader, decode.FormatFn(func(d *decode.D, in interface{}) interface{} {
d.FieldUTF8("text", int(d.BitsLeft()/8))
return nil
}))
})
// TODO: depends on isRoot in postProcess
dd.Value.Range = ranges.Range{Start: d.Pos() - int64(dataLen), Len: int64(dataLen)}
d.FieldFormatReaderLen("uncompressed", int64(dataLen), zlib.NewReader, decode.FormatFn(func(d *decode.D, in interface{}) interface{} {
d.FieldUTF8("text", int(d.BitsLeft()/8))
return nil
}))
default:
d.FieldRawLen("data", int64(dataLen))
d.FieldRawLen("data", dataLen)
}
case "iCCP":
profileNameLen := int(d.PeekFindByte(0, 80))
d.FieldUTF8("profile_name", profileNameLen)
d.FieldUTF8("null", 1)
d.FieldUTF8Null("profile_name")
compressionMethod := d.FieldU8("compression_method", d.MapUToStrSym(compressionNames))
// +2 to skip null and compression_method
dataLen := (chunkLength - (profileNameLen + 2)) * 8
dataLen := d.BitsLeft()

d.FieldRawLen("compressed", dataLen)
d.SeekRel(-dataLen)

switch compressionMethod {
case compressionDeflate:
dd := d.FieldStruct("data", func(d *decode.D) {
d.FieldFormatReaderLen("uncompressed", int64(dataLen), zlib.NewReader, iccProfileFormat)
})
dd.Value.Range = ranges.Range{Start: d.Pos() - int64(dataLen), Len: int64(dataLen)}
d.FieldFormatReaderLen("uncompressed", dataLen, zlib.NewReader, iccProfileFormat)
default:
d.FieldRawLen("data", int64(dataLen))
d.FieldRawLen("data", dataLen)
}
case "pHYs":
d.FieldU32("x_pixels_per_unit")
Expand Down

0 comments on commit 6fba1a8

Please sign in to comment.