-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add impl for int encoder/decoder. Adds protos. Adds generics.
- Loading branch information
Bo Du
committed
Dec 7, 2018
1 parent
f49aae4
commit c86852a
Showing
30 changed files
with
1,614 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
package encoding | ||
|
||
import ( | ||
"io" | ||
|
||
bitstream "github.com/dgryski/go-bitstream" | ||
) | ||
|
||
const ( | ||
negativeSign = 1 | ||
) | ||
|
||
// DeltaIntIterator iterates over a stream of delta encoded data. | ||
type DeltaIntIterator struct { | ||
bitReader *bitstream.BitReader | ||
bitsPerEncodedValue int64 | ||
negativeBit uint64 | ||
curr int | ||
err error | ||
closed bool | ||
} | ||
|
||
// NewDeltaIntIterator returns a new delta encoded int iterator. | ||
func NewDeltaIntIterator( | ||
reader io.Reader, | ||
bitsPerEncodedValue int64, | ||
deltaStart int64, | ||
) *DeltaIntIterator { | ||
return &DeltaIntIterator{ | ||
bitReader: bitstream.NewReader(reader), | ||
bitsPerEncodedValue: bitsPerEncodedValue, | ||
negativeBit: 1 << uint(bitsPerEncodedValue), | ||
curr: int(deltaStart), | ||
} | ||
} | ||
|
||
// Next iteration. | ||
func (d *DeltaIntIterator) Next() bool { | ||
if d.closed || d.err != nil { | ||
return false | ||
} | ||
|
||
// Read in an extra bit for the sign. | ||
var ( | ||
delta uint64 | ||
) | ||
delta, d.err = d.bitReader.ReadBits(int(d.bitsPerEncodedValue) + 1) | ||
if d.err != nil { | ||
return false | ||
} | ||
// Check if negative bit is set. | ||
isNegative := delta&d.negativeBit == d.negativeBit | ||
if isNegative { | ||
// Zero out the negative bit. | ||
delta &^= d.negativeBit | ||
d.curr -= int(delta) | ||
} else { | ||
d.curr += int(delta) | ||
} | ||
|
||
return true | ||
} | ||
|
||
// Current returns the current int. | ||
func (d *DeltaIntIterator) Current() int { | ||
return d.curr | ||
} | ||
|
||
// Err returns any error recorded while iterating. | ||
func (d *DeltaIntIterator) Err() error { | ||
return d.err | ||
} | ||
|
||
// Close the iterator. | ||
func (d *DeltaIntIterator) Close() error { | ||
d.closed = true | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
package encoding | ||
|
||
import ( | ||
"github.com/xichen2020/eventdb/generated/proto/encodingpb" | ||
"github.com/xichen2020/eventdb/x/io" | ||
"github.com/xichen2020/eventdb/x/proto" | ||
|
||
bitstream "github.com/dgryski/go-bitstream" | ||
) | ||
|
||
// DictionaryBasedIntIterator iterates through a dict encoded stream of ints. | ||
type DictionaryBasedIntIterator struct { | ||
bitReader *bitstream.BitReader | ||
bytesPerDictionaryValue int64 | ||
bitsPerEncodedValue int64 | ||
extBuf *[]byte | ||
dict []byte | ||
curr int | ||
err error | ||
closed bool | ||
} | ||
|
||
// NewDictionaryBasedIntIterator returns a new dictionary based int iterator. | ||
func NewDictionaryBasedIntIterator( | ||
reader io.Reader, | ||
extProto *encodingpb.IntDictionary, // extProto is an external proto for memory re-use. | ||
extBuf *[]byte, // extBuf is an external byte buffer for memory re-use. | ||
bytesPerDictionaryValue int64, | ||
bitsPerEncodedValue int64, | ||
) (*DictionaryBasedIntIterator, error) { | ||
if err := proto.DecodeIntDictionary(extProto, extBuf, reader); err != nil { | ||
return nil, err | ||
} | ||
// Zero out extBuf so we can re-use it during iteration. | ||
endianness.PutUint64(*extBuf, uint64(0)) | ||
return &DictionaryBasedIntIterator{ | ||
bitReader: bitstream.NewReader(reader), | ||
bytesPerDictionaryValue: bytesPerDictionaryValue, | ||
bitsPerEncodedValue: bitsPerEncodedValue, | ||
extBuf: extBuf, | ||
dict: extProto.Data, | ||
}, nil | ||
} | ||
|
||
// Next iteration. | ||
func (d *DictionaryBasedIntIterator) Next() bool { | ||
if d.closed || d.err != nil { | ||
return false | ||
} | ||
|
||
// Read the idx into the dict first. | ||
var dictIdx uint64 | ||
dictIdx, d.err = d.bitReader.ReadBits(int(d.bitsPerEncodedValue)) | ||
if d.err != nil { | ||
return false | ||
} | ||
|
||
// Use idx to fetch value. | ||
start := int64(dictIdx) * d.bytesPerDictionaryValue | ||
copy((*d.extBuf)[:d.bytesPerDictionaryValue], d.dict[start:start+d.bytesPerDictionaryValue]) | ||
d.curr = int(endianness.Uint64(*d.extBuf)) | ||
return true | ||
} | ||
|
||
// Current returns the current int. | ||
func (d *DictionaryBasedIntIterator) Current() int { | ||
return d.curr | ||
} | ||
|
||
// Err returns any error recorded while iterating. | ||
func (d *DictionaryBasedIntIterator) Err() error { | ||
return d.err | ||
} | ||
|
||
// Close the iterator. | ||
func (d *DictionaryBasedIntIterator) Close() error { | ||
d.closed = true | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,15 @@ | ||
package encoding | ||
|
||
import ( | ||
"io" | ||
"encoding/binary" | ||
) | ||
|
||
// Reader is both an io.Reader and an io.ByteReader. | ||
type Reader interface { | ||
io.Reader | ||
io.ByteReader | ||
} | ||
// ByteOrder for data serialization. | ||
var ( | ||
endianness = binary.LittleEndian | ||
) | ||
|
||
// For allocating a buffer large enough to hold uint64 values. | ||
const ( | ||
uint64SizeBytes = 8 | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,75 @@ | ||
package encoding | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/xichen2020/eventdb/generated/proto/encodingpb" | ||
"github.com/xichen2020/eventdb/x/io" | ||
"github.com/xichen2020/eventdb/x/proto" | ||
) | ||
|
||
// IntDecoder decodes int values. | ||
type IntDecoder interface { | ||
// Decode decodes ints from reader. | ||
Decode(reader Reader) (ForwardIntIterator, error) | ||
Decode(reader io.Reader) (ForwardIntIterator, error) | ||
|
||
// Reset resets the decoder. | ||
Reset() | ||
} | ||
|
||
// IntDec is a int Decoder. | ||
type IntDec struct { | ||
dictionaryProto encodingpb.IntDictionary | ||
metaProto encodingpb.IntMeta | ||
buf []byte | ||
} | ||
|
||
// NewIntDecoder creates a new int Decoder. | ||
func NewIntDecoder() *IntDec { | ||
return &IntDec{ | ||
// Make buf at least big enough to hold Uint64 values. | ||
buf: make([]byte, uint64SizeBytes), | ||
} | ||
} | ||
|
||
// Decode encoded int data in a streaming fashion. | ||
func (dec *IntDec) Decode(reader io.Reader) (ForwardIntIterator, error) { | ||
// Decode metadata first. | ||
if err := proto.DecodeIntMeta(&dec.metaProto, &dec.buf, reader); err != nil { | ||
return nil, err | ||
} | ||
|
||
var ( | ||
iter ForwardIntIterator | ||
err error | ||
) | ||
switch dec.metaProto.Encoding { | ||
case encodingpb.EncodingType_DELTA: | ||
iter = dec.decodeDelta(reader, dec.metaProto.BitsPerEncodedValue, dec.metaProto.DeltaStart) | ||
case encodingpb.EncodingType_DICTIONARY: | ||
iter, err = dec.decodeDictionary(reader, dec.metaProto.BytesPerDictionaryValue, dec.metaProto.BitsPerEncodedValue) | ||
default: | ||
return nil, fmt.Errorf("Invalid encoding type: %v", dec.metaProto.Encoding) | ||
} | ||
|
||
return iter, err | ||
} | ||
|
||
// Reset the int decoder. | ||
func (dec *IntDec) Reset() {} | ||
|
||
func (dec *IntDec) decodeDelta( | ||
reader io.Reader, | ||
bitsPerEncodedValue int64, | ||
deltaStart int64, | ||
) *DeltaIntIterator { | ||
return NewDeltaIntIterator(reader, bitsPerEncodedValue, deltaStart) | ||
} | ||
|
||
func (dec *IntDec) decodeDictionary( | ||
reader io.Reader, | ||
bytesPerDictionaryValue int64, | ||
bitsPerEncodedValue int64, | ||
) (*DictionaryBasedIntIterator, error) { | ||
return NewDictionaryBasedIntIterator(reader, &dec.dictionaryProto, &dec.buf, bytesPerDictionaryValue, bitsPerEncodedValue) | ||
} |
Oops, something went wrong.