-
Notifications
You must be signed in to change notification settings - Fork 682
/
compress.go
109 lines (96 loc) · 2.74 KB
/
compress.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: hello@weaviate.io
//
package hnsw
import (
"context"
"errors"
"fmt"
"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"
"github.com/weaviate/weaviate/entities/storobj"
ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
)
func (h *hnsw) calculateOptimalSegments(dims int) int {
if dims >= 2048 && dims%8 == 0 {
return dims / 8
} else if dims >= 768 && dims%6 == 0 {
return dims / 6
} else if dims >= 256 && dims%4 == 0 {
return dims / 4
} else if dims%2 == 0 {
return dims / 2
}
return dims
}
func (h *hnsw) compress(cfg ent.UserConfig) error {
if !cfg.PQ.Enabled && !cfg.BQ.Enabled {
return nil
}
h.compressActionLock.Lock()
defer h.compressActionLock.Unlock()
data := h.cache.All()
if cfg.PQ.Enabled {
if h.isEmpty() {
return errors.New("Compress command cannot be executed before inserting some data. Please, insert your data first.")
}
dims := int(h.dims)
if cfg.PQ.Segments <= 0 {
cfg.PQ.Segments = h.calculateOptimalSegments(dims)
h.pqConfig.Segments = cfg.PQ.Segments
}
cleanData := make([][]float32, 0, len(data))
for i := range data {
// Rather than just taking the cache dump at face value, let's explicitly
// request the vectors. Otherwise we would miss any vector that's currently
// not in the cache, for example because the cache is not hot yet after a
// restart.
p, err := h.cache.Get(context.Background(), uint64(i))
if err != nil {
var e storobj.ErrNotFound
if errors.As(err, &e) {
// already deleted, ignore
continue
} else {
return fmt.Errorf("unexpected error obtaining vectors for fitting: %w", err)
}
}
if p == nil {
// already deleted, ignore
continue
}
cleanData = append(cleanData, p)
}
var err error
h.compressor, err = compressionhelpers.NewHNSWPQCompressor(
cfg.PQ, h.distancerProvider, dims, 1e12, h.logger, cleanData, h.store,
h.allocChecker)
if err != nil {
return fmt.Errorf("Compressing vectors: %w", err)
}
h.commitLog.AddPQ(h.compressor.ExposeFields())
} else {
var err error
h.compressor, err = compressionhelpers.NewBQCompressor(
h.distancerProvider, 1e12, h.logger, h.store, h.allocChecker)
if err != nil {
return err
}
}
compressionhelpers.Concurrently(h.logger, uint64(len(data)),
func(index uint64) {
if data[index] == nil {
return
}
h.compressor.Preload(index, data[index])
})
h.compressed.Store(true)
h.cache.Drop()
return nil
}