brain/kvbrain: implement speaking

For #41.
zephyrtronium · Mar 16, 2024 · 18e3841 · 18e3841
1 parent e9a9ce7
commit 18e3841
Show file tree

Hide file tree

Showing 6 changed files with 307 additions and 17 deletions.
diff --git a/brain/kvbrain/forget.go b/brain/kvbrain/forget.go
@@ -3,7 +3,6 @@ package kvbrain
 import (
 	"bytes"
 	"context"
-	"encoding/binary"
 	"fmt"
 	"slices"
 	"strings"
@@ -109,14 +108,14 @@ func (br *Brain) Forget(ctx context.Context, tag string, tuples []brain.Tuple) e
 		return p
 	})
 	err := br.knowledge.Update(func(txn *badger.Txn) error {
-		th := hashTag(tag)
 		opts := badger.DefaultIteratorOptions
-		opts.Prefix = binary.LittleEndian.AppendUint64(nil, th)
+		opts.Prefix = hashTag(nil, tag)
 		it := txn.NewIterator(badger.DefaultIteratorOptions)
 		defer it.Close()
 		var b []byte
 		for _, t := range tuples {
-			b = keystart(b[:0], tag, t.Prefix)
+			b = hashTag(b[:0], tag)
+			b = append(appendPrefix(b, t.Prefix), '\xff') // terminate the prefix
 			it.Seek(b)
 			for it.ValidForPrefix(b) {
 				v := it.Item()

diff --git a/brain/kvbrain/kvbrain.go b/brain/kvbrain/kvbrain.go
@@ -56,8 +56,9 @@ func (br *Brain) Order() int {
 	return 250
 }
 
-func hashTag(tag string) uint64 {
+// hashTag appends the hash of a tag to b to serve as the start of a knowledge key.
+func hashTag(b []byte, tag string) []byte {
 	h := fnv.New64a()
 	io.WriteString(h, tag)
-	return h.Sum64()
+	return h.Sum(b)
 }
diff --git a/brain/kvbrain/learn.go b/brain/kvbrain/learn.go
@@ -3,7 +3,6 @@ package kvbrain
 import (
 	"bytes"
 	"context"
-	"encoding/binary"
 	"errors"
 	"fmt"
 
@@ -28,18 +27,19 @@ func (br *Brain) Learn(ctx context.Context, meta *brain.MessageMeta, tuples []br
 	var b []byte
 	tag := meta.Tag
 	for i, t := range tuples {
-		b = keystart(b[:0], tag, t.Prefix)
+		b = hashTag(b[:0], tag)
+		b = append(appendPrefix(b, t.Prefix), '\xff')
 		// Write message ID.
 		b = append(b, meta.ID[:]...)
 		keys[i] = bytes.Clone(b)
 		vals[i] = []byte(t.Suffix)
 	}
 
-	p, _ := br.past.Load(meta.Tag)
+	p, _ := br.past.Load(tag)
 	if p == nil {
 		// We might race with others also creating this past. Ensure we don't
 		// overwrite if that happens.
-		p, _ = br.past.LoadOrStore(meta.Tag, new(past))
+		p, _ = br.past.LoadOrStore(tag, new(past))
 	}
 	p.record(meta.ID, meta.User, meta.Time.UnixNano(), keys)
 
@@ -58,16 +58,18 @@ func (br *Brain) Learn(ctx context.Context, meta *brain.MessageMeta, tuples []br
 	return nil
 }
 
-// keystart appends the tag and prefix components for a knowledge key to b.
-func keystart(b []byte, tag string, prefix []string) []byte {
-	b = binary.LittleEndian.AppendUint64(b, hashTag(tag))
+// appendPrefix appends the prefix components for a knowledge key to b,
+// not including the sentinel marking the end of the prefix. To serve as a
+// knowledge key, b should already contain the hashed tag. The caller should
+// append a final \xff to terminate the prefix before appending the message ID
+// to form a complete key.
+func appendPrefix(b []byte, prefix []string) []byte {
 	for i := len(prefix) - 1; i >= 0; i-- {
 		if prefix[i] == "" {
 			break
 		}
 		b = append(b, prefix[i]...)
 		b = append(b, '\xff')
 	}
-	b = append(b, '\xff')
 	return b
 }
diff --git a/brain/kvbrain/learn_test.go b/brain/kvbrain/learn_test.go
@@ -2,7 +2,6 @@ package kvbrain
 
 import (
 	"context"
-	"encoding/binary"
 	"testing"
 	"time"
 
@@ -14,8 +13,8 @@ import (
 )
 
 func mkey(tag, toks string, id uuid.UUID) string {
-	b := make([]byte, 8, 8+len(toks)+len(id))
-	binary.LittleEndian.PutUint64(b, hashTag(tag))
+	b := make([]byte, 0, 8+len(toks)+len(id))
+	b = hashTag(b, tag)
 	b = append(b, toks...)
 	b = append(b, id[:]...)
 	return string(b)

diff --git a/brain/kvbrain/speak.go b/brain/kvbrain/speak.go
@@ -0,0 +1,136 @@
+package kvbrain
+
+import (
+	"context"
+	"fmt"
+	"math/rand/v2"
+
+	"github.com/dgraph-io/badger/v4"
+
+	"github.com/zephyrtronium/robot/brain"
+)
+
+// New finds a prompt to begin a random message. When a message is
+// generated with no prompt, the result from New is passed directly to
+// Speak; it is the speaker's responsibility to ensure it meets
+// requirements with regard to length and matchable content. Only data
+// originally learned with the given tag should be used to generate a
+// prompt.
+func (br *Brain) New(ctx context.Context, tag string) ([]string, error) {
+	return br.Speak(ctx, tag, nil)
+}
+
+// Speak generates a full message from the given prompt. The prompt is
+// guaranteed to have length equal to the value returned from Order, unless
+// it is a prompt returned from New. If the number of tokens in the prompt
+// is smaller than Order, the difference is made up by prepending empty
+// strings to the prompt. The speaker should use ReduceEntropy on all
+// tokens, including those in the prompt, when generating a message.
+// Empty strings at the start and end of the result will be trimmed. Only
+// data originally learned with the given tag should be used to generate a
+// message.
+func (br *Brain) Speak(ctx context.Context, tag string, prompt []string) ([]string, error) {
+	terms := make([]string, 0, len(prompt))
+	for i, s := range prompt {
+		if s == "" {
+			continue
+		}
+		terms = append(terms, s)
+		prompt[i] = brain.ReduceEntropy(s)
+	}
+	var b []byte
+	opts := badger.DefaultIteratorOptions
+	// We don't actually need to iterate over values, only the single value
+	// that we decide to use per suffix. So, we can disable value prefetch.
+	opts.PrefetchValues = false
+	opts.Prefix = hashTag(nil, tag)
+	for {
+		var err error
+		var s string
+		b = hashTag(b[:0], tag)
+		s, b, prompt, err = br.next(b, prompt, opts)
+		if err != nil {
+			return nil, err
+		}
+		if s == "" {
+			return terms, nil
+		}
+		terms = append(terms, s)
+		prompt = append(prompt, brain.ReduceEntropy(s))
+	}
+}
+
+// next finds a single token to continue a prompt.
+// The returned values are, in order, the new term, b with possibly appended
+// memory, the suffix of prompt which matched to produce the new term, and
+// any error. If the returned term is the empty string, generation should end.
+func (br *Brain) next(b []byte, prompt []string, opts badger.IteratorOptions) (string, []byte, []string, error) {
+	// These definitions are outside the loop to ensure we don't bias toward
+	// smaller contexts.
+	var (
+		key    []byte
+		m      uint64
+		picked int
+	)
+	b = appendPrefix(b, prompt)
+	if len(prompt) == 0 {
+		// If we have no prompt, then we want to make sure we select only
+		// options that start a message.
+		b = append(b, '\xff')
+	}
+	for {
+		err := br.knowledge.View(func(txn *badger.Txn) error {
+			it := txn.NewIterator(opts)
+			defer it.Close()
+			it.Seek(b)
+			for it.ValidForPrefix(b) {
+				// We generate a uniform variate per key, then choose the key
+				// that gets the maximum variate.
+				u := rand.Uint64()
+				if m <= u {
+					item := it.Item()
+					// TODO(zeph): for #43, check deleted uuids so we never
+					// pick a message that has been deleted
+					key = item.KeyCopy(key[:0])
+					m = u
+					picked++
+				}
+				it.Next()
+			}
+			return nil
+		})
+		if err != nil {
+			return "", b, prompt, fmt.Errorf("couldn't read knowledge: %w", err)
+		}
+		if picked < 3 && len(prompt) > 1 {
+			// We haven't seen enough options, and we have context we could
+			// lose. Do so and try again from the beginning.
+			// TODO(zeph): we could save the start of the prompt so we don't
+			// reallocate, and we could construct the next key to use by
+			// trimming off the end of the current one
+			prompt = prompt[1:]
+			b = appendPrefix(b[:8], prompt)
+			continue
+		}
+		if key == nil {
+			// We never saw any options. Since we always select the first, this
+			// means there were no options. Don't look for nothing in the DB.
+			return "", b, prompt, nil
+		}
+		err = br.knowledge.View(func(txn *badger.Txn) error {
+			item, err := txn.Get(key)
+			if err != nil {
+				return fmt.Errorf("couldn't get item for key %q: %w", key, err)
+			}
+			b, err = item.ValueCopy(b[:0])
+			if err != nil {
+				return fmt.Errorf("couldn't get value for key %q: %w", key, err)
+			}
+			return nil
+		})
+		if err != nil {
+			return "", b, prompt, err
+		}
+		return string(b), b, prompt, nil
+	}
+}