diff --git a/classic.go b/classic.go index 2c0ba2b..1ede1ff 100644 --- a/classic.go +++ b/classic.go @@ -144,7 +144,7 @@ func (b *BloomFilter) WriteTo(stream io.Writer) (int64, error) { return 0, err } - return writtenSize + int64(3 * binary.Size(uint64(0))), err + return writtenSize + int64(3*binary.Size(uint64(0))), err } // ReadFrom reads a binary representation of BloomFilter (such as might @@ -176,7 +176,7 @@ func (b *BloomFilter) ReadFrom(stream io.Reader) (int64, error) { b.m = uint(m) b.k = uint(k) b.buckets = &buckets - return readSize + int64(3 * binary.Size(uint64(0))), nil + return readSize + int64(3*binary.Size(uint64(0))), nil } // GobEncode implements gob.GobEncoder interface. diff --git a/inverse.go b/inverse.go index a24df64..23e8446 100644 --- a/inverse.go +++ b/inverse.go @@ -181,19 +181,62 @@ func (i *InverseBloomFilter) WriteTo(stream io.Writer) (int64, error) { } // ReadFrom reads a binary representation of InverseBloomFilter (such as might -// have been written by WriteTo()) from an i/o stream. It returns the number +// have been written by WriteTo()) from an i/o stream. ReadFrom replaces the +// array of its filter with the one read from disk. It returns the number // of bytes read. func (i *InverseBloomFilter) ReadFrom(stream io.Reader) (int64, error) { + decoded, capacity, size, err := i.decodeToArray(stream) + if err != nil { + return int64(0), err + } + + // Create []*[]byte and point to each item in decoded + decodedWithPointers := make([]*[]byte, capacity) + for p := range decodedWithPointers { + if len(decoded[p]) == 0 { + decodedWithPointers[p] = nil + } else { + decodedWithPointers[p] = &decoded[p] + } + } + + i.array = decodedWithPointers + i.capacity = uint(capacity) + return int64(size) + int64(2*binary.Size(uint64(0))), nil +} + +// ImportElementsFrom reads a binary representation of InverseBloomFilter (such as might +// have been written by WriteTo()) from an i/o stream into a new bloom filter using the +// Add() method (skipping empty elements, if any). It returns the number of +// elements decoded from disk. +func (i *InverseBloomFilter) ImportElementsFrom(stream io.Reader) (int, error) { + decoded, _, _, err := i.decodeToArray(stream) + if err != nil { + return 0, err + } + + // Create []*[]byte and point to each item in decoded + for p := range decoded { + if len(decoded[p]) > 0 { + i.Add(decoded[p]) + } + } + + return len(decoded), nil +} + +// decodeToArray decodes an inverse bloom filter from an i/o stream into a 2-d byte slice. +func (i *InverseBloomFilter) decodeToArray(stream io.Reader) ([][]byte, uint64, uint64, error) { var capacity, size uint64 err := binary.Read(stream, binary.BigEndian, &capacity) if err != nil { - return 0, err + return nil, 0, 0, err } err = binary.Read(stream, binary.BigEndian, &size) if err != nil { - return 0, err + return nil, 0, 0, err } // Read the encoded slice and decode into [][]byte @@ -204,19 +247,7 @@ func (i *InverseBloomFilter) ReadFrom(stream io.Reader) (int64, error) { decoded := make([][]byte, capacity) dec.Decode(&decoded) - // Create []*[]byte and point to each item in decoded - decodedWithPointers := make([]*[]byte, capacity) - for p := range decodedWithPointers { - if len(decoded[p]) == 0 { - decodedWithPointers[p] = nil - } else { - decodedWithPointers[p] = &decoded[p] - } - } - - i.array = decodedWithPointers - i.capacity = uint(capacity) - return int64(len(encoded)) + int64(2*binary.Size(uint64(0))), nil + return decoded, capacity, size, nil } // GobEncode implements gob.GobEncoder interface. diff --git a/inverse_test.go b/inverse_test.go index faa5401..a7443ac 100644 --- a/inverse_test.go +++ b/inverse_test.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/gob" "github.com/d4l3k/messagediff" + "os" "strconv" "testing" ) @@ -70,6 +71,56 @@ func TestInverseTestAndAdd(t *testing.T) { } } +// Ensures an InverseBloomFilter can read and write successfully +func TestInverseBloomFilter_ReadFrom(t *testing.T) { + d, err := os.Create("TestInverseBloomFilter_ReadFrom.dat") + + // Write a filter + f := NewInverseBloomFilter(10000) + + for i := 0; i < 1000; i++ { + f.Add([]byte(strconv.Itoa(i))) + } + + if _, err := f.WriteTo(d); err != nil { + t.Error(err) + } + d.Close() + + // Read the filter into a new one + f2 := NewInverseBloomFilter(10000) + d, err = os.Open("TestInverseBloomFilter_ReadFrom.dat") + read, err := f2.ReadFrom(d) + if err != nil { + t.Error(err) + } + d.Close() + + if read != 12814 { + t.Errorf("Expected to read 12814 bytes, read %v", read) + } + + if f.capacity != f2.capacity { + t.Error("Different capacities") + } + + if len(f.array) != len(f2.array) { + t.Error("Different data") + } + + if diff, equal := messagediff.PrettyDiff(f.array, f2.array); !equal { + t.Errorf("BloomFilter WriteTo and ReadFrom = %+v; not %+v\n%s", f2, f, diff) + } + + for i := 0; i < 100000; i++ { + if f.Test([]byte(strconv.Itoa(i))) != f2.Test([]byte(strconv.Itoa(i))) { + t.Errorf("Expected both filters to Test the same for %d", i) + } + } + + os.Remove("TestInverseBloomFilter_ReadFrom.dat") +} + // Tests that an InverseBloomFilter can be encoded and decoded properly without error func TestInverseBloomFilter_Encode(t *testing.T) { f := NewInverseBloomFilter(10000) @@ -89,11 +140,11 @@ func TestInverseBloomFilter_Encode(t *testing.T) { } if f.capacity != f2.capacity { - t.Errorf("Different capacities") + t.Errorf("Expected capacity %v is different from actual capacity %v", f.capacity, f2.capacity) } if len(f.array) != len(f2.array) { - t.Errorf("Different data") + t.Error("Different data between filter 1 and filter 2.") } if diff, equal := messagediff.PrettyDiff(f.array, f2.array); !equal { @@ -102,11 +153,55 @@ func TestInverseBloomFilter_Encode(t *testing.T) { for i := 0; i < 100000; i++ { if f.Test([]byte(strconv.Itoa(i))) != f2.Test([]byte(strconv.Itoa(i))) { - t.Errorf("Expected both filters to Test the same for %i", i) + t.Errorf("Expected both filters to test the same for %d", i) } } } +func TestInverseBloomFilter_ImportElementsFrom(t *testing.T) { + // Write out a bloom filter of size 3 + f1 := NewInverseBloomFilter(3) + for _, b := range [][]byte{[]byte(`a`), []byte(`b`), []byte(`c`)} { + f1.Add(b) + } + + d, err := os.Create("TestInverseBloomFilter_ImportElementsFrom.dat") + if err != nil { + t.Errorf("Failed to create test file: %v", err) + } + + f1.WriteTo(d) + d.Close() + + // Read the data into a new filter of size 10 + f2 := NewInverseBloomFilter(5) + d, err = os.Open("TestInverseBloomFilter_ImportElementsFrom.dat") + if err != nil { + t.Errorf("Failed to open test file: %v", err) + } + + f2.ImportElementsFrom(d) + + if f2.TestAndAdd([]byte(`a`)) != true { + t.Error("f2 should have 'a' but returned false") + } + + if f2.TestAndAdd([]byte(`b`)) != true { + t.Error("f2 should have 'b' but returned false") + } + + if f2.TestAndAdd([]byte(`c`)) != true { + t.Error("f2 should have 'c' but returned false") + } + + // Assert that the new filter is still of the new size + if len(f2.array) != 5 { + t.Errorf("Expected len of f2.array to be 5, instead found %v", len(f2.array)) + } + + os.Remove("TestInverseBloomFilter_ImportElementsFrom.dat") +} + func BenchmarkInverseAdd(b *testing.B) { b.StopTimer() f := NewInverseBloomFilter(100000)