Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-1848 Bug: HNSW index fails if initial insert has doc id > 24999 #1851

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
56 changes: 40 additions & 16 deletions adapters/repos/db/vector/hnsw/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
package hnsw

import (
"context"
"testing"

"github.com/semi-technologies/weaviate/adapters/repos/db/vector/hnsw/distancer"
Expand All @@ -20,22 +21,7 @@ import (
)

func TestHnswIndex(t *testing.T) {
// mock out commit logger before adding data so we don't leave a disk
// footprint. Commit logging and deserializing from a (condensed) commit log
// is tested in a separate integration test that takes care of providing and
// cleaning up the correct place on disk to write test files
makeCL := MakeNoopCommitLogger
index, err := New(Config{
RootPath: "doesnt-matter-as-committlogger-is-mocked-out",
ID: "unittest",
MakeCommitLoggerThunk: makeCL,
DistanceProvider: distancer.NewCosineProvider(),
VectorForIDThunk: testVectorForID,
}, UserConfig{
MaxConnections: 30,
EFConstruction: 60,
})
require.Nil(t, err)
index := createEmptyHnswIndexForTests(t, testVectorForID)

for i, vec := range testVectors {
err := index.Add(uint64(i), vec)
Expand Down Expand Up @@ -74,3 +60,41 @@ func TestHnswIndex(t *testing.T) {
}, res)
})
}

func TestHnswIndexGrow(t *testing.T) {
vector := []float32{0.1, 0.2}
vecForIDFn := func(ctx context.Context, id uint64) ([]float32, error) {
return vector, nil
}
index := createEmptyHnswIndexForTests(t, vecForIDFn)

t.Run("should grow initial empty index", func(t *testing.T) {
// when we invoke Add method suggesting a size bigger then the default
// initial size, then if we don't grow an index at initial state
// we get: panic: runtime error: index out of range [25001] with length 25000
// in order to avoid this, insertInitialElement method is now able
// to grow it's size at initial state
err := index.Add(uint64(initialSize+1), vector)
require.Nil(t, err)
})
}

func createEmptyHnswIndexForTests(t *testing.T, vecForIDFn VectorForID) *hnsw {
// mock out commit logger before adding data so we don't leave a disk
// footprint. Commit logging and deserializing from a (condensed) commit log
// is tested in a separate integration test that takes care of providing and
// cleaning up the correct place on disk to write test files
makeCL := MakeNoopCommitLogger
index, err := New(Config{
RootPath: "doesnt-matter-as-committlogger-is-mocked-out",
ID: "unittest",
MakeCommitLoggerThunk: makeCL,
DistanceProvider: distancer.NewCosineProvider(),
VectorForIDThunk: vecForIDFn,
}, UserConfig{
MaxConnections: 30,
EFConstruction: 60,
})
require.Nil(t, err)
return index
}
5 changes: 5 additions & 0 deletions adapters/repos/db/vector/hnsw/insert.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ func (h *hnsw) insertInitialElement(node *vertex, nodeVec []float32) error {
return err
}

err := h.growIndexToAccomodateNode(node.id, h.logger)
if err != nil {
return errors.Wrapf(err, "grow HNSW index to accommodate node %d", node.id)
}

h.nodes[node.id] = node

// go h.insertHook(node.id, 0, node.connections)
Expand Down