Skip to content

Commit

Permalink
Add method ImportElementsFrom on inverse bloom filter
Browse files Browse the repository at this point in the history
ReadFrom and GobDecode methods allowed a bloom filter to be
exported and then recreated as part of a new array. The new
ImportElementsFrom method uses the filter.Add method to add
the elements written out by one filter to the array of another
filter.

This allows for use cases such as converting from one
filter size to another. For example, rolling a size 1,000
filter into a size 1,000,000 filter.
  • Loading branch information
sam-lowenkamp committed Feb 6, 2017
1 parent 6cd71b8 commit dee13ba
Show file tree
Hide file tree
Showing 3 changed files with 147 additions and 21 deletions.
4 changes: 2 additions & 2 deletions classic.go
Expand Up @@ -144,7 +144,7 @@ func (b *BloomFilter) WriteTo(stream io.Writer) (int64, error) {
return 0, err
}

return writtenSize + int64(3 * binary.Size(uint64(0))), err
return writtenSize + int64(3*binary.Size(uint64(0))), err
}

// ReadFrom reads a binary representation of BloomFilter (such as might
Expand Down Expand Up @@ -176,7 +176,7 @@ func (b *BloomFilter) ReadFrom(stream io.Reader) (int64, error) {
b.m = uint(m)
b.k = uint(k)
b.buckets = &buckets
return readSize + int64(3 * binary.Size(uint64(0))), nil
return readSize + int64(3*binary.Size(uint64(0))), nil
}

// GobEncode implements gob.GobEncoder interface.
Expand Down
63 changes: 47 additions & 16 deletions inverse.go
Expand Up @@ -181,19 +181,62 @@ func (i *InverseBloomFilter) WriteTo(stream io.Writer) (int64, error) {
}

// ReadFrom reads a binary representation of InverseBloomFilter (such as might
// have been written by WriteTo()) from an i/o stream. It returns the number
// have been written by WriteTo()) from an i/o stream. ReadFrom replaces the
// array of its filter with the one read from disk. It returns the number
// of bytes read.
func (i *InverseBloomFilter) ReadFrom(stream io.Reader) (int64, error) {
decoded, capacity, size, err := i.decodeToArray(stream)
if err != nil {
return int64(0), err
}

// Create []*[]byte and point to each item in decoded
decodedWithPointers := make([]*[]byte, capacity)
for p := range decodedWithPointers {
if len(decoded[p]) == 0 {
decodedWithPointers[p] = nil
} else {
decodedWithPointers[p] = &decoded[p]
}
}

i.array = decodedWithPointers
i.capacity = uint(capacity)
return int64(size) + int64(2*binary.Size(uint64(0))), nil
}

// ImportElementsFrom reads a binary representation of InverseBloomFilter (such as might
// have been written by WriteTo()) from an i/o stream into a new bloom filter using the
// Add() method (skipping empty elements, if any). It returns the number of
// elements decoded from disk.
func (i *InverseBloomFilter) ImportElementsFrom(stream io.Reader) (int, error) {
decoded, _, _, err := i.decodeToArray(stream)
if err != nil {
return 0, err
}

// Create []*[]byte and point to each item in decoded
for p := range decoded {
if len(decoded[p]) > 0 {
i.Add(decoded[p])
}
}

return len(decoded), nil
}

// decodeToArray decodes an inverse bloom filter from an i/o stream into a 2-d byte slice.
func (i *InverseBloomFilter) decodeToArray(stream io.Reader) ([][]byte, uint64, uint64, error) {
var capacity, size uint64

err := binary.Read(stream, binary.BigEndian, &capacity)
if err != nil {
return 0, err
return nil, 0, 0, err
}

err = binary.Read(stream, binary.BigEndian, &size)
if err != nil {
return 0, err
return nil, 0, 0, err
}

// Read the encoded slice and decode into [][]byte
Expand All @@ -204,19 +247,7 @@ func (i *InverseBloomFilter) ReadFrom(stream io.Reader) (int64, error) {
decoded := make([][]byte, capacity)
dec.Decode(&decoded)

// Create []*[]byte and point to each item in decoded
decodedWithPointers := make([]*[]byte, capacity)
for p := range decodedWithPointers {
if len(decoded[p]) == 0 {
decodedWithPointers[p] = nil
} else {
decodedWithPointers[p] = &decoded[p]
}
}

i.array = decodedWithPointers
i.capacity = uint(capacity)
return int64(len(encoded)) + int64(2*binary.Size(uint64(0))), nil
return decoded, capacity, size, nil
}

// GobEncode implements gob.GobEncoder interface.
Expand Down
101 changes: 98 additions & 3 deletions inverse_test.go
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"encoding/gob"
"github.com/d4l3k/messagediff"
"os"
"strconv"
"testing"
)
Expand Down Expand Up @@ -70,6 +71,56 @@ func TestInverseTestAndAdd(t *testing.T) {
}
}

// Ensures an InverseBloomFilter can read and write successfully
func TestInverseBloomFilter_ReadFrom(t *testing.T) {
d, err := os.Create("TestInverseBloomFilter_ReadFrom.dat")

// Write a filter
f := NewInverseBloomFilter(10000)

for i := 0; i < 1000; i++ {
f.Add([]byte(strconv.Itoa(i)))
}

if _, err := f.WriteTo(d); err != nil {
t.Error(err)
}
d.Close()

// Read the filter into a new one
f2 := NewInverseBloomFilter(10000)
d, err = os.Open("TestInverseBloomFilter_ReadFrom.dat")
read, err := f2.ReadFrom(d)
if err != nil {
t.Error(err)
}
d.Close()

if read != 12814 {
t.Errorf("Expected to read 12814 bytes, read %v", read)
}

if f.capacity != f2.capacity {
t.Error("Different capacities")
}

if len(f.array) != len(f2.array) {
t.Error("Different data")
}

if diff, equal := messagediff.PrettyDiff(f.array, f2.array); !equal {
t.Errorf("BloomFilter WriteTo and ReadFrom = %+v; not %+v\n%s", f2, f, diff)
}

for i := 0; i < 100000; i++ {
if f.Test([]byte(strconv.Itoa(i))) != f2.Test([]byte(strconv.Itoa(i))) {
t.Errorf("Expected both filters to Test the same for %d", i)
}
}

os.Remove("TestInverseBloomFilter_ReadFrom.dat")
}

// Tests that an InverseBloomFilter can be encoded and decoded properly without error
func TestInverseBloomFilter_Encode(t *testing.T) {
f := NewInverseBloomFilter(10000)
Expand All @@ -89,11 +140,11 @@ func TestInverseBloomFilter_Encode(t *testing.T) {
}

if f.capacity != f2.capacity {
t.Errorf("Different capacities")
t.Errorf("Expected capacity %v is different from actual capacity %v", f.capacity, f2.capacity)
}

if len(f.array) != len(f2.array) {
t.Errorf("Different data")
t.Error("Different data between filter 1 and filter 2.")
}

if diff, equal := messagediff.PrettyDiff(f.array, f2.array); !equal {
Expand All @@ -102,11 +153,55 @@ func TestInverseBloomFilter_Encode(t *testing.T) {

for i := 0; i < 100000; i++ {
if f.Test([]byte(strconv.Itoa(i))) != f2.Test([]byte(strconv.Itoa(i))) {
t.Errorf("Expected both filters to Test the same for %i", i)
t.Errorf("Expected both filters to test the same for %d", i)
}
}
}

func TestInverseBloomFilter_ImportElementsFrom(t *testing.T) {
// Write out a bloom filter of size 3
f1 := NewInverseBloomFilter(3)
for _, b := range [][]byte{[]byte(`a`), []byte(`b`), []byte(`c`)} {
f1.Add(b)
}

d, err := os.Create("TestInverseBloomFilter_ImportElementsFrom.dat")
if err != nil {
t.Errorf("Failed to create test file: %v", err)
}

f1.WriteTo(d)
d.Close()

// Read the data into a new filter of size 10
f2 := NewInverseBloomFilter(5)
d, err = os.Open("TestInverseBloomFilter_ImportElementsFrom.dat")
if err != nil {
t.Errorf("Failed to open test file: %v", err)
}

f2.ImportElementsFrom(d)

if f2.TestAndAdd([]byte(`a`)) != true {
t.Error("f2 should have 'a' but returned false")
}

if f2.TestAndAdd([]byte(`b`)) != true {
t.Error("f2 should have 'b' but returned false")
}

if f2.TestAndAdd([]byte(`c`)) != true {
t.Error("f2 should have 'c' but returned false")
}

// Assert that the new filter is still of the new size
if len(f2.array) != 5 {
t.Errorf("Expected len of f2.array to be 5, instead found %v", len(f2.array))
}

os.Remove("TestInverseBloomFilter_ImportElementsFrom.dat")
}

func BenchmarkInverseAdd(b *testing.B) {
b.StopTimer()
f := NewInverseBloomFilter(100000)
Expand Down

0 comments on commit dee13ba

Please sign in to comment.