Skip to content

Commit

Permalink
gzip,bzip2: Calculate CRC
Browse files Browse the repository at this point in the history
  • Loading branch information
wader committed Nov 21, 2021
1 parent 606c0b6 commit ee611a4
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 15 deletions.
36 changes: 28 additions & 8 deletions format/bzip2/bzip2.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@ package bzip2

import (
"compress/bzip2"
"encoding/binary"
"errors"
"hash/crc32"
"io"
"math/bits"

"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
Expand All @@ -31,13 +35,28 @@ func init() {
const blockMagic = 0x31_41_59_26_53_59
const footerMagic = 0x17_72_45_38_50_90

type bitFlipReader struct {
r io.Reader
}

func (bfr bitFlipReader) Read(p []byte) (n int, err error) {
n, err = bfr.r.Read(p)
for i := 0; i < n; i++ {
p[i] = bits.Reverse8(p[i])
}
return n, err
}

func bzip2Decode(d *decode.D, in interface{}) interface{} {
// moreStreams := true

// d.FieldArray("streams", func(d *decode.D) {
// for moreStreams {
// d.FieldStruct("stream", func(d *decode.D) {

var blockCRCValue *decode.Value
var streamCRCN uint32

d.FieldUTF8("magic", 2, d.AssertStr("BZ"))
d.FieldU8("version")
d.FieldU8("hundred_k_blocksize")
Expand All @@ -49,6 +68,7 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} {
// }
d.FieldU48("magic", d.AssertU(blockMagic), d.Hex)
d.FieldU32("crc", d.Hex)
blockCRCValue = d.FieldGet("crc")
d.FieldU1("randomised")
d.FieldU24("origptr")
d.FieldU16("syncmapl1")
Expand Down Expand Up @@ -93,12 +113,13 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} {
d.FieldRootBitBuf("uncompressed", uncompressedBB)
}

// uncompressed := &bytes.Buffer{}
// crc32W := crc32.NewIEEE()
// if _, err := d.Copy(io.MultiWriter(uncompressed, crc32W), deflateR); err != nil {
// d.Fatalf(err.Error())
// }
// // calculatedCRC32 := crc32W.Sum(nil)
blockCRC32W := crc32.NewIEEE()
if _, err := d.Copy(blockCRC32W, bitFlipReader{uncompressedBB.Copy()}); err != nil {
d.IOPanic(err)
}
blockCRC32N := bits.Reverse32(binary.BigEndian.Uint32(blockCRC32W.Sum(nil)))
_ = blockCRCValue.TryScalarFn(d.ValidateU(uint64(blockCRC32N)))
streamCRCN = blockCRC32N ^ ((streamCRCN << 1) | (streamCRCN >> 31))

// HACK: bzip2.NewReader will read from start of whole buffer and then we figure out compressedSize ourself
// "It is important to note that none of the fields within a StreamBlock or StreamFooter are necessarily byte-aligned"
Expand All @@ -118,12 +139,11 @@ func bzip2Decode(d *decode.D, in interface{}) interface{} {
d.FieldStruct("footer", func(d *decode.D) {
d.FieldU48("magic", d.AssertU(footerMagic), d.Hex)
// TODO: crc of block crcs
d.FieldU32("crc", d.Hex)
d.FieldU32("crc", d.Hex, d.ValidateU(uint64(streamCRCN)))
d.FieldRawLen("padding", int64(d.ByteAlignBits()))
})

// moreStreams = false

// }
// })

Expand Down
4 changes: 2 additions & 2 deletions format/bzip2/testdata/test.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ $ fq -d bzip2 verbose /test.bz2
0x00| 39 | 9 | hundred_k_blocksize: 57 0x3-0x3.7 (1)
| | | block: {} 0x4-0x1c.7 (25)
0x00| 31 41 59 26 53 59 | 1AY&SY | magic: 0x314159265359 (valid) 0x4-0x9.7 (6)
0x00| cc c3 71 d4 | ..q. | crc: 0xccc371d4 0xa-0xd.7 (4)
0x00| cc c3 71 d4 | ..q. | crc: 0xccc371d4 (valid) 0xa-0xd.7 (4)
0x00| 00 | . | randomised: 0 0xe-0xe (0.1)
0x00| 00 00| ..| origptr: 4 0xe.1-0x11 (3)
0x10|02 41 |.A |
Expand All @@ -24,5 +24,5 @@ $ fq -d bzip2 verbose /test.bz2
0x20|19 97 8b |... |
| | | footer: {} 0x22.1-0x2c.7 (10.7)
0x20| 8b b9 22 9c 28 48 66 | ..".(Hf | magic: 0x177245385090 (valid) 0x22.1-0x28 (6)
0x20| 66 61 b8 ea 00| | fa...| | crc: 0xccc371d4 0x28.1-0x2c (4)
0x20| 66 61 b8 ea 00| | fa...| | crc: 0xccc371d4 (valid) 0x28.1-0x2c (4)
0x20| 00| | .| | padding: raw bits 0x2c.1-0x2c.7 (0.7)
4 changes: 2 additions & 2 deletions format/gzip/gzip.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ package gz

import (
"compress/flate"
"encoding/binary"
"errors"
"hash/crc32"
"io"

"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/bitio"
"github.com/wader/fq/pkg/decode"
)

Expand Down Expand Up @@ -115,7 +115,7 @@ func gzDecode(d *decode.D, in interface{}) interface{} {
if _, err := io.Copy(crc32W, uncompressedBB.Copy()); err != nil {
d.IOPanic(err)
}
d.FieldRawLen("crc32", 32, d.ValidateBitBuf(bitio.ReverseBytes(crc32W.Sum(nil))), d.RawHex)
d.FieldU32("crc32", d.ValidateU(uint64(binary.LittleEndian.Uint32(crc32W.Sum(nil)))), d.Hex)
d.FieldU32LE("isize")

return nil
Expand Down
2 changes: 1 addition & 1 deletion format/gzip/testdata/test.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ $ fq -d gzip verbose /test.gz
0x0|74 65 73 74 0a| |test.| | uncompressed: raw bits 0x0-0x4.7 (5)
0x00| 2b 49 2d 2e e1 02| +I-...| compressed: raw bits 0xa-0x10.7 (7)
0x10|00 |. |
0x10| c6 35 b9 3b | .5.; | crc32: "c635b93b" (raw bits) (valid) 0x11-0x14.7 (4)
0x10| c6 35 b9 3b | .5.; | crc32: 0xc635b93b (valid) 0x11-0x14.7 (4)
0x10| 05 00 00 00| | ....| | isize: 5 0x15-0x18.7 (4)
4 changes: 2 additions & 2 deletions format/json/testdata/json.fqtest
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@ $ fq . /json.gz
0x0|7b 22 61 22 3a 20 31 32 33 7d |{"a": 123} | uncompressed: {} (json)
0x00| ab 56 4a 54 b2 52| .VJT.R| compressed: raw bits
0x10|30 34 32 ae e5 02 00 |042.... |
0x10| 20 ac d2 9c | ... | crc32: "20acd29c" (raw bits) (valid)
0x10| 20 ac d2 9c | ... | crc32: 0x20acd29c (valid)
0x10| 0b 00 00 00| | ....|| isize: 11
$ fq tovalue /json.gz
{
"compressed": "<13>q1ZKVLJSMDQyruUCAA==",
"compression_method": "deflate",
"crc32": "20acd29c",
"crc32": 548197020,
"extra_flags": 0,
"flags": {
"comment": false,
Expand Down

0 comments on commit ee611a4

Please sign in to comment.