Skip to content

Commit

Permalink
brain/kvbrain: implement speaking
Browse files Browse the repository at this point in the history
For #41.
  • Loading branch information
zephyrtronium committed Mar 16, 2024
1 parent e9a9ce7 commit 18e3841
Show file tree
Hide file tree
Showing 6 changed files with 307 additions and 17 deletions.
7 changes: 3 additions & 4 deletions brain/kvbrain/forget.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package kvbrain
import (
"bytes"
"context"
"encoding/binary"
"fmt"
"slices"
"strings"
Expand Down Expand Up @@ -109,14 +108,14 @@ func (br *Brain) Forget(ctx context.Context, tag string, tuples []brain.Tuple) e
return p
})
err := br.knowledge.Update(func(txn *badger.Txn) error {
th := hashTag(tag)
opts := badger.DefaultIteratorOptions
opts.Prefix = binary.LittleEndian.AppendUint64(nil, th)
opts.Prefix = hashTag(nil, tag)
it := txn.NewIterator(badger.DefaultIteratorOptions)
defer it.Close()
var b []byte
for _, t := range tuples {
b = keystart(b[:0], tag, t.Prefix)
b = hashTag(b[:0], tag)
b = append(appendPrefix(b, t.Prefix), '\xff') // terminate the prefix
it.Seek(b)
for it.ValidForPrefix(b) {
v := it.Item()
Expand Down
5 changes: 3 additions & 2 deletions brain/kvbrain/kvbrain.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,9 @@ func (br *Brain) Order() int {
return 250
}

func hashTag(tag string) uint64 {
// hashTag appends the hash of a tag to b to serve as the start of a knowledge key.
func hashTag(b []byte, tag string) []byte {
h := fnv.New64a()
io.WriteString(h, tag)
return h.Sum64()
return h.Sum(b)
}
18 changes: 10 additions & 8 deletions brain/kvbrain/learn.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package kvbrain
import (
"bytes"
"context"
"encoding/binary"
"errors"
"fmt"

Expand All @@ -28,18 +27,19 @@ func (br *Brain) Learn(ctx context.Context, meta *brain.MessageMeta, tuples []br
var b []byte
tag := meta.Tag
for i, t := range tuples {
b = keystart(b[:0], tag, t.Prefix)
b = hashTag(b[:0], tag)
b = append(appendPrefix(b, t.Prefix), '\xff')
// Write message ID.
b = append(b, meta.ID[:]...)
keys[i] = bytes.Clone(b)
vals[i] = []byte(t.Suffix)
}

p, _ := br.past.Load(meta.Tag)
p, _ := br.past.Load(tag)
if p == nil {
// We might race with others also creating this past. Ensure we don't
// overwrite if that happens.
p, _ = br.past.LoadOrStore(meta.Tag, new(past))
p, _ = br.past.LoadOrStore(tag, new(past))
}
p.record(meta.ID, meta.User, meta.Time.UnixNano(), keys)

Expand All @@ -58,16 +58,18 @@ func (br *Brain) Learn(ctx context.Context, meta *brain.MessageMeta, tuples []br
return nil
}

// keystart appends the tag and prefix components for a knowledge key to b.
func keystart(b []byte, tag string, prefix []string) []byte {
b = binary.LittleEndian.AppendUint64(b, hashTag(tag))
// appendPrefix appends the prefix components for a knowledge key to b,
// not including the sentinel marking the end of the prefix. To serve as a
// knowledge key, b should already contain the hashed tag. The caller should
// append a final \xff to terminate the prefix before appending the message ID
// to form a complete key.
func appendPrefix(b []byte, prefix []string) []byte {
for i := len(prefix) - 1; i >= 0; i-- {
if prefix[i] == "" {
break
}
b = append(b, prefix[i]...)
b = append(b, '\xff')
}
b = append(b, '\xff')
return b
}
5 changes: 2 additions & 3 deletions brain/kvbrain/learn_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package kvbrain

import (
"context"
"encoding/binary"
"testing"
"time"

Expand All @@ -14,8 +13,8 @@ import (
)

func mkey(tag, toks string, id uuid.UUID) string {
b := make([]byte, 8, 8+len(toks)+len(id))
binary.LittleEndian.PutUint64(b, hashTag(tag))
b := make([]byte, 0, 8+len(toks)+len(id))
b = hashTag(b, tag)
b = append(b, toks...)
b = append(b, id[:]...)
return string(b)
Expand Down
136 changes: 136 additions & 0 deletions brain/kvbrain/speak.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
package kvbrain

import (
"context"
"fmt"
"math/rand/v2"

"github.com/dgraph-io/badger/v4"

"github.com/zephyrtronium/robot/brain"
)

// New finds a prompt to begin a random message. When a message is
// generated with no prompt, the result from New is passed directly to
// Speak; it is the speaker's responsibility to ensure it meets
// requirements with regard to length and matchable content. Only data
// originally learned with the given tag should be used to generate a
// prompt.
func (br *Brain) New(ctx context.Context, tag string) ([]string, error) {
return br.Speak(ctx, tag, nil)
}

// Speak generates a full message from the given prompt. The prompt is
// guaranteed to have length equal to the value returned from Order, unless
// it is a prompt returned from New. If the number of tokens in the prompt
// is smaller than Order, the difference is made up by prepending empty
// strings to the prompt. The speaker should use ReduceEntropy on all
// tokens, including those in the prompt, when generating a message.
// Empty strings at the start and end of the result will be trimmed. Only
// data originally learned with the given tag should be used to generate a
// message.
func (br *Brain) Speak(ctx context.Context, tag string, prompt []string) ([]string, error) {
terms := make([]string, 0, len(prompt))
for i, s := range prompt {
if s == "" {
continue
}
terms = append(terms, s)
prompt[i] = brain.ReduceEntropy(s)
}
var b []byte
opts := badger.DefaultIteratorOptions
// We don't actually need to iterate over values, only the single value
// that we decide to use per suffix. So, we can disable value prefetch.
opts.PrefetchValues = false
opts.Prefix = hashTag(nil, tag)
for {
var err error
var s string
b = hashTag(b[:0], tag)
s, b, prompt, err = br.next(b, prompt, opts)
if err != nil {
return nil, err
}
if s == "" {
return terms, nil
}
terms = append(terms, s)
prompt = append(prompt, brain.ReduceEntropy(s))
}
}

// next finds a single token to continue a prompt.
// The returned values are, in order, the new term, b with possibly appended
// memory, the suffix of prompt which matched to produce the new term, and
// any error. If the returned term is the empty string, generation should end.
func (br *Brain) next(b []byte, prompt []string, opts badger.IteratorOptions) (string, []byte, []string, error) {
// These definitions are outside the loop to ensure we don't bias toward
// smaller contexts.
var (
key []byte
m uint64
picked int
)
b = appendPrefix(b, prompt)
if len(prompt) == 0 {
// If we have no prompt, then we want to make sure we select only
// options that start a message.
b = append(b, '\xff')
}
for {
err := br.knowledge.View(func(txn *badger.Txn) error {
it := txn.NewIterator(opts)
defer it.Close()
it.Seek(b)
for it.ValidForPrefix(b) {
// We generate a uniform variate per key, then choose the key
// that gets the maximum variate.
u := rand.Uint64()
if m <= u {
item := it.Item()
// TODO(zeph): for #43, check deleted uuids so we never
// pick a message that has been deleted
key = item.KeyCopy(key[:0])
m = u
picked++
}
it.Next()
}
return nil
})
if err != nil {
return "", b, prompt, fmt.Errorf("couldn't read knowledge: %w", err)
}
if picked < 3 && len(prompt) > 1 {
// We haven't seen enough options, and we have context we could
// lose. Do so and try again from the beginning.
// TODO(zeph): we could save the start of the prompt so we don't
// reallocate, and we could construct the next key to use by
// trimming off the end of the current one
prompt = prompt[1:]
b = appendPrefix(b[:8], prompt)
continue
}
if key == nil {
// We never saw any options. Since we always select the first, this
// means there were no options. Don't look for nothing in the DB.
return "", b, prompt, nil
}
err = br.knowledge.View(func(txn *badger.Txn) error {
item, err := txn.Get(key)
if err != nil {
return fmt.Errorf("couldn't get item for key %q: %w", key, err)
}
b, err = item.ValueCopy(b[:0])
if err != nil {
return fmt.Errorf("couldn't get value for key %q: %w", key, err)
}
return nil
})
if err != nil {
return "", b, prompt, err
}
return string(b), b, prompt, nil
}
}
Loading

0 comments on commit 18e3841

Please sign in to comment.