gh-1852 sort memtable KV pairs on read

The memtable for Map is a binary tree so it's always sorted. However, since this is type 'Map' each "row key" holds a map. This map was unsorted in the past. In #1832 we introduced a change that made sure this change would always be sorted ON DISK, i.e. in the segments. It was very natural to also keep it sorted in the memtable, as we did not have to do any sorting when flushing. However, the performance tests on imports that make heavy use of the inverted index had a large performance degradation after #1832. In a test I did locally the import time went up by over 30%. This fix goes back to keeping the KV pairs unsorted and making each change an append only operation. This means it now needs to be sorted in just two places (as opposed to on every single insertion): 1. On a read query. Those should be rare on memtable, since memtables are mostly meant for writing. The added overhead here (minimal) is not a problem since it was also there before #1832 2. When flushing. Flushing is an async operation and the small overhead of sorting each row's Map KVs is neglible. This new implementation has the same import speed as prior to #1832 while keeping all the runtime benefits of having the KV pairs sorted on disk. closes #1852
weaviate · Mar 10, 2022 · d3d4de5 · d3d4de5
1 parent 877688d
commit d3d4de5
Showing 1 changed file with 36 additions and 32 deletions.
diff --git a/adapters/repos/db/lsmkv/binary_search_tree_map.go b/adapters/repos/db/lsmkv/binary_search_tree_map.go
@@ -11,7 +11,10 @@
 
 package lsmkv
 
-import "bytes"
+import (
+	"bytes"
+	"sort"
+)
 
 type binarySearchTreeMap struct {
 	root *binarySearchNodeMap
@@ -54,7 +57,8 @@ type binarySearchNodeMap struct {
 
 func (n *binarySearchNodeMap) insert(key []byte, pair MapPair) {
 	if bytes.Equal(key, n.key) {
-		n.values = insertSorted(n.values, pair)
+		// n.values = insertSorted(n.values, pair)
+		n.values = append(n.values, pair)
 		return
 	}
 
@@ -85,7 +89,7 @@ func (n *binarySearchNodeMap) insert(key []byte, pair MapPair) {
 
 func (n *binarySearchNodeMap) get(key []byte) ([]MapPair, error) {
 	if bytes.Equal(n.key, key) {
-		return n.values, nil
+		return sortAndDedupValues(n.values), nil
 	}
 
 	if bytes.Compare(key, n.key) < 0 {
@@ -115,41 +119,41 @@ func (n *binarySearchNodeMap) flattenInOrder() []*binarySearchNodeMap {
 		right = n.right.flattenInOrder()
 	}
 
+	// the values are sorted on read for performance reasons, the assumption is
+	// that while a memtable is open writes a much more common, thus we write map
+	// KVs unosrted and only sort/dedup them on read.
+	n.values = sortAndDedupValues(n.values)
+
 	right = append([]*binarySearchNodeMap{n}, right...)
 	return append(left, right...)
 }
 
-// insertSorted will insert at the right position by key sorting. If the exact
-// key exists, it will replace the elem. It only uses a linear search, as the
-// assumption is that not too many keys will be held in the memtable per elem
-func insertSorted(list []MapPair, newElem MapPair) []MapPair {
-	bestPos := 0
-	for i := range list {
-		if bestPos == len(list) {
-			return append(list, newElem)
+// takes a list of MapPair and sorts it while keeping the original order. Then
+// removes redundnancies (from updates or deletes after previous inserts) using
+// a simple deduplication process.
+func sortAndDedupValues(in []MapPair) []MapPair {
+	// use SliceStable so that we keep the insert order on duplicates. This is
+	// important because otherwise we can't dedup them correctly if we don't know
+	// in which order they came in.
+	sort.SliceStable(in, func(a, b int) bool {
+		return bytes.Compare(in[a].Key, in[b].Key) < 0
+	})
+
+	// now deduping is as simple as looking one key ahead - if it's the same key
+	// simply skip the current element. Meaning "out" will be a subset of
+	// (sorted) "in".
+	out := make([]MapPair, len(in))
+
+	outIndex := 0
+	for inIndex, pair := range in {
+		// look ahead
+		if inIndex+1 < len(in) && bytes.Equal(in[inIndex+1].Key, pair.Key) {
+			continue
 		}
 
-		cmp := bytes.Compare(newElem.Key, list[i].Key)
-
-		if cmp == 0 {
-			// entry exists already, replace
-			list[i] = newElem
-			return list
-		}
-
-		if cmp == -1 {
-			break
-		}
-
-		bestPos++
-	}
-
-	if len(list) == bestPos {
-		return append(list, newElem)
+		out[outIndex] = pair
+		outIndex++
 	}
 
-	list = append(list[:bestPos+1], list[bestPos:]...)
-	list[bestPos] = newElem
-
-	return list
+	return out[:outIndex]
 }