Skip to content

Commit

Permalink
cbor: Add decoder
Browse files Browse the repository at this point in the history
Does not decode sematic tag types
Also fixes broken float16 support

Fixes #71
  • Loading branch information
wader committed Jan 12, 2022
1 parent b667294 commit 0b0f28e
Show file tree
Hide file tree
Showing 14 changed files with 1,894 additions and 10 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"bson",
"bxor",
"bzip",
"cbor",
"CCIT",
"chzyer",
"CLIUNICODE",
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ go run fq.go

[./formats_list.jq]: sh-start

aac_frame, adts, adts_frame, apev2, ar, av1_ccr, av1_frame, av1_obu, avc_annexb, avc_au, avc_dcr, avc_nalu, avc_pps, avc_sei, avc_sps, bencode, bsd_loopback_frame, bson, bzip2, dns, dns_tcp, elf, ether8023_frame, exif, flac, flac_frame, flac_metadatablock, flac_metadatablocks, flac_picture, flac_streaminfo, gif, gzip, hevc_annexb, hevc_au, hevc_dcr, hevc_nalu, icc_profile, icmp, id3v1, id3v11, id3v2, ipv4_packet, jpeg, json, matroska, mp3, mp3_frame, mp4, mpeg_asc, mpeg_es, mpeg_pes, mpeg_pes_packet, mpeg_spu, mpeg_ts, msgpack, ogg, ogg_page, opus_packet, pcap, pcapng, png, protobuf, protobuf_widevine, pssh_playready, raw, sll2_packet, sll_packet, tar, tcp_segment, tiff, udp_datagram, vorbis_comment, vorbis_packet, vp8_frame, vp9_cfm, vp9_frame, vpx_ccr, wav, webp, xing, zip
aac_frame, adts, adts_frame, apev2, ar, av1_ccr, av1_frame, av1_obu, avc_annexb, avc_au, avc_dcr, avc_nalu, avc_pps, avc_sei, avc_sps, bencode, bsd_loopback_frame, bson, bzip2, cbor, dns, dns_tcp, elf, ether8023_frame, exif, flac, flac_frame, flac_metadatablock, flac_metadatablocks, flac_picture, flac_streaminfo, gif, gzip, hevc_annexb, hevc_au, hevc_dcr, hevc_nalu, icc_profile, icmp, id3v1, id3v11, id3v2, ipv4_packet, jpeg, json, matroska, mp3, mp3_frame, mp4, mpeg_asc, mpeg_es, mpeg_pes, mpeg_pes_packet, mpeg_spu, mpeg_ts, msgpack, ogg, ogg_page, opus_packet, pcap, pcapng, png, protobuf, protobuf_widevine, pssh_playready, raw, sll2_packet, sll_packet, tar, tcp_segment, tiff, udp_datagram, vorbis_comment, vorbis_packet, vp8_frame, vp9_cfm, vp9_frame, vpx_ccr, wav, webp, xing, zip

[#]: sh-end

Expand Down
1 change: 1 addition & 0 deletions doc/formats.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
|`bsd_loopback_frame` |BSD&nbsp;loopback&nbsp;frame |<sub>`ipv4_packet`</sub>|
|`bson` |Binary&nbsp;JSON |<sub></sub>|
|`bzip2` |bzip2&nbsp;compression |<sub>`probe`</sub>|
|`cbor` |Concise&nbsp;Binary&nbsp;Object&nbsp;Representation |<sub></sub>|
|`dns` |DNS&nbsp;packet |<sub></sub>|
|`dns_tcp` |DNS&nbsp;packet&nbsp;(TCP) |<sub></sub>|
|`elf` |Executable&nbsp;and&nbsp;Linkable&nbsp;Format |<sub></sub>|
Expand Down
24 changes: 15 additions & 9 deletions doc/formats.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions format/all/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
_ "github.com/wader/fq/format/bencode"
_ "github.com/wader/fq/format/bson"
_ "github.com/wader/fq/format/bzip2"
_ "github.com/wader/fq/format/cbor"
_ "github.com/wader/fq/format/dns"
_ "github.com/wader/fq/format/elf"
_ "github.com/wader/fq/format/flac"
Expand Down
267 changes: 267 additions & 0 deletions format/cbor/cbor.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
package cbor

// https://en.wikipedia.org/wiki/CBOR
// https://www.rfc-editor.org/rfc/rfc8949.html

// TODO: streaming bytes test?
// TODO: decode some sematic tags

import (
"bytes"
"embed"
"strings"

"github.com/wader/fq/format"
"github.com/wader/fq/format/registry"
"github.com/wader/fq/pkg/bitio"
"github.com/wader/fq/pkg/decode"
"github.com/wader/fq/pkg/scalar"
)

//go:embed *.jq
var cborFS embed.FS

func init() {
registry.MustRegister(decode.Format{
Name: format.CBOR,
Description: "Concise Binary Object Representation",
DecodeFn: decodeCBOR,
Files: cborFS,
ToRepr: "_cbor_torepr",
})
}

type majorTypeEntry struct {
s scalar.S
d func(d *decode.D, shortCount uint64, count uint64) interface{}
}

type majorTypeEntries map[uint64]majorTypeEntry

func (mts majorTypeEntries) MapScalar(s scalar.S) (scalar.S, error) {
u := s.ActualU()
if fe, ok := mts[u]; ok {
s = fe.s
s.Actual = u
}
return s, nil
}

const (
shortCountVariable8Bit = 24
shortCountVariable16Bit = 25
shortCountVariable32Bit = 26
shortCountVariable64Bit = 27
shortCountIndefinite = 31

shortCountSpecialFalse = 20
shortCountSpecialTrue = 21
shortCountSpecialNull = 22
shortCountSpecialUndefined = 23

shortCountSpecialFloat16Bit = 25
shortCountSpecialFloat32Bit = 26
shortCountSpecialFloat64Bit = 27
)

var shortCountMap = scalar.UToSymStr{
shortCountVariable8Bit: "8bit",
shortCountVariable16Bit: "16bit",
shortCountVariable32Bit: "32bit",
shortCountVariable64Bit: "64bit",
shortCountIndefinite: "indefinite",
}

var tagMap = scalar.UToSymStr{
0: "date_time",
1: "epoch_date_time",
2: "unsigned_bignum",
3: "negative_bignum",
4: "decimal_fraction",
5: "bigfloat",
21: "base64url",
22: "base64",
23: "base16",
24: "encoded_cbor",
32: "uri",
33: "base64url",
34: "base64",
36: "mime_message",
55799: "self_described_cbor",
}

const (
majorTypePositiveInt = 0
majorTypeNegativeInt = 1
majorTypeBytes = 2
majorTypeUTF8 = 3
majorTypeArray = 4
majorTypeMap = 5
majorTypeSematic = 6
majorTypeSpecialFloat = 7
)

const (
breakMarker = 0xff
)

func decodeCBORValue(d *decode.D) interface{} {
majorTypeMap := majorTypeEntries{
majorTypePositiveInt: {s: scalar.S{Sym: "positive_int"}, d: func(d *decode.D, shortCount uint64, count uint64) interface{} {
d.FieldValueU("value", count)
return count
}},
majorTypeNegativeInt: {s: scalar.S{Sym: "negative_int"}, d: func(d *decode.D, shortCount uint64, count uint64) interface{} {
d.FieldValueS("value", int64(^count))
return count
}},
majorTypeBytes: {s: scalar.S{Sym: "bytes"}, d: func(d *decode.D, shortCount uint64, count uint64) interface{} {
if shortCount == shortCountIndefinite {
bb := &bytes.Buffer{}
d.FieldArray("items", func(d *decode.D) {
for d.PeekBits(8) != breakMarker {
d.FieldStruct("item", func(d *decode.D) {
v := decodeCBORValue(d)
switch v := v.(type) {
case []byte:
bb.Write(v)
default:
d.Fatalf("non-bytes in bytes stream %v", v)
}
})
}
})
d.FieldRootBitBuf("value", bitio.NewBufferFromBytes(bb.Bytes(), -1))
// nil, nested indefinite bytes is not allowed
return nil
}

bib := d.FieldRawLen("value", int64(count)*8)
bs, err := bib.Bytes()
if err != nil {
d.IOPanic(err, "bytes bb.Bytes")
}
return bs
}},
majorTypeUTF8: {s: scalar.S{Sym: "utf8"}, d: func(d *decode.D, shortCount uint64, count uint64) interface{} {
if shortCount == shortCountIndefinite {
sb := &strings.Builder{}
d.FieldArray("items", func(d *decode.D) {
for d.PeekBits(8) != breakMarker {
d.FieldStruct("item", func(d *decode.D) {
v := decodeCBORValue(d)
switch v := v.(type) {
case string:
sb.WriteString(v)
default:
d.Fatalf("non-string in string stream %v", v)
}
})
}
})
d.FieldValueStr("value", sb.String())
// nil, nested indefinite string is not allowed
return nil
}

return d.FieldUTF8("value", int(count))
}},
majorTypeArray: {s: scalar.S{Sym: "array"}, d: func(d *decode.D, shortCount uint64, count uint64) interface{} {
d.FieldArray("elements", func(d *decode.D) {
for i := uint64(0); true; i++ {
if shortCount == shortCountIndefinite && d.PeekBits(8) == breakMarker {
break
} else if i >= count {
break
}
d.FieldStruct("element", func(d *decode.D) { decodeCBORValue(d) })
}
})
if shortCount == shortCountIndefinite {
d.FieldU8("break")
}
return nil
}},
majorTypeMap: {s: scalar.S{Sym: "map"}, d: func(d *decode.D, shortCount uint64, count uint64) interface{} {
d.FieldArray("pairs", func(d *decode.D) {
for i := uint64(0); true; i++ {
if shortCount == shortCountIndefinite && d.PeekBits(8) == breakMarker {
break
} else if i >= count {
break
}
d.FieldStruct("pair", func(d *decode.D) {
d.FieldStruct("key", func(d *decode.D) { decodeCBORValue(d) })
d.FieldStruct("value", func(d *decode.D) { decodeCBORValue(d) })
})
}
})
if shortCount == shortCountIndefinite {
d.FieldU8("break")
}
return nil
}},
majorTypeSematic: {s: scalar.S{Sym: "semantic"}, d: func(d *decode.D, shortCount uint64, count uint64) interface{} {
d.FieldValueU("tag", count, tagMap)
d.FieldStruct("value", func(d *decode.D) { decodeCBORValue(d) })
return count
}},
majorTypeSpecialFloat: {s: scalar.S{Sym: "special_float"}, d: func(d *decode.D, shortCount uint64, count uint64) interface{} {
switch shortCount {
// TODO: 0-19
case shortCountSpecialFalse:
d.FieldValueBool("value", false)
case shortCountSpecialTrue:
d.FieldValueBool("value", true)
case shortCountSpecialNull:
// TODO: null
case shortCountSpecialUndefined:
// TODO: undefined
case 24:
// TODO: future
case shortCountSpecialFloat16Bit:
d.FieldF16("value")
case shortCountSpecialFloat32Bit:
d.FieldF32("value")
case shortCountSpecialFloat64Bit:
d.FieldF64("value")
case 28, 29, 30:
// TODO: future
}
return nil
}},
}

typ := d.FieldU3("major_type", majorTypeMap)
shortCount := d.FieldU5("short_count", shortCountMap)
count := shortCount
if typ != majorTypeSpecialFloat {
switch count {
// 0-23 value in shortCount
case shortCountVariable8Bit:
count = d.FieldU8("variable_count")
case shortCountVariable16Bit:
count = d.FieldU16("variable_count")
case shortCountVariable32Bit:
count = d.FieldU32("variable_count")
case shortCountVariable64Bit:
count = d.FieldU64("variable_count")
case 28, 29, 30:
d.Fatalf("incorrect shortCount %d", count)
}
}

if mt, ok := majorTypeMap[typ]; ok {
if mt.d != nil {
return mt.d(d, shortCount, count)
}
return nil
}

panic("unreachable")
}

func decodeCBOR(d *decode.D, in interface{}) interface{} {
decodeCBORValue(d)
return nil
}
13 changes: 13 additions & 0 deletions format/cbor/cbor.jq
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
def _cbor_torepr:
def _f:
( if .major_type == "map" then
( .pairs
| map({key: (.key | _f), value: (.value | _f)})
| from_entries
)
elif .major_type == "array" then .elements | map(_f)
elif .major_type == "bytes" then .value | tostring
else .value | tovalue
end
);
_f;

0 comments on commit 0b0f28e

Please sign in to comment.