Skip to content

Commit

Permalink
brain/kvbrain: start of badger/bbolt version of brain
Browse files Browse the repository at this point in the history
  • Loading branch information
zephyrtronium committed Mar 9, 2024
1 parent f0596f8 commit c6925f3
Show file tree
Hide file tree
Showing 5 changed files with 389 additions and 0 deletions.
85 changes: 85 additions & 0 deletions brain/kvbrain/kvbrain.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package kvbrain

import (
"context"
"time"

"github.com/dgraph-io/badger/v4"
"github.com/google/uuid"

"github.com/zephyrtronium/robot/brain"
"github.com/zephyrtronium/robot/userhash"
)

/*
Message key structure:
Tag × Tuples × UUID
- Tag is a 16 byte string padded with \x00.
- Tuple terms are separated by \xff sentinels. Terms are recorded in reverse order.
- The final tuple term is the empty string, so the tuple portion ends with \xff\xff.
- UUID is the raw uuid.
As with the SQL approach, we record every prefix with its suffix, including the
final empty prefix.
Operations:
- Find a start tuple: Search for a prefix of tag × \xff.
- Find a continuation:
+ With full context, just search for it, again in reverse order.
+ When we reduce context, record by how much and only search for that much.
+ In both cases, and with start tuple, check message UUID and tags we
select against the deletions db.
- Learn: Construct the key according to above. The suffix is the entire value.
Record a mapping of tag, UUID, timestamp, and userhash to keys.
- Forget tuples: thinking…
- ForgetMessage, ForgetDuring, ForgetUserSince: Look up the actual keys to
delete in the recording taken during learning.
*/

type Brain struct {
knowledge *badger.DB
}

var _ brain.Learner = (*Brain)(nil)

func New(knowledge *badger.DB) *Brain {
return &Brain{
knowledge: knowledge,
}
}

// tagBytes is the number of bytes used to record tags in the KV database.
const tagBytes = 8 // TODO(zeph): we should just use a hash instead

// Order returns the number of elements in the prefix of a chain. It is
// called once at the beginning of learning. The returned value must always
// be at least 1.
func (br *Brain) Order() int {
// TOOD(zeph): this can go away one day
return 250
}

// Forget removes a set of recorded tuples. The tuples provided are as for
// Learn. If a tuple has been recorded multiple times, only the first
// should be deleted. If a tuple has not been recorded, it should be
// ignored.
func (br *Brain) Forget(ctx context.Context, tag string, tuples []brain.Tuple) error {
panic("not implemented") // TODO: Implement
}

// ForgetMessage forgets everything learned from a single given message.
// If nothing has been learned from the message, it should be ignored.
func (br *Brain) ForgetMessage(ctx context.Context, tag string, msg uuid.UUID) error {
panic("not implemented") // TODO: Implement
}

// ForgetDuring forgets all messages learned in the given time span.
func (br *Brain) ForgetDuring(ctx context.Context, tag string, since, before time.Time) error {
panic("not implemented") // TODO: Implement
}

// ForgetUserSince forgets all messages learned from a user since a given
// time.
func (br *Brain) ForgetUserSince(ctx context.Context, user *userhash.Hash, since time.Time) error {
panic("not implemented") // TODO: Implement
}
73 changes: 73 additions & 0 deletions brain/kvbrain/learn.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package kvbrain

import (
"bytes"
"context"
"errors"
"fmt"
"slices"

"github.com/zephyrtronium/robot/brain"
)

// Learn records a set of tuples. Each tuple prefix has length equal to the
// result of Order. The tuples begin with empty strings in the prefix to
// denote the start of the message and end with one empty suffix to denote
// the end; all other tokens are non-empty. Each tuple's prefix has entropy
// reduction transformations applied.
func (br *Brain) Learn(ctx context.Context, meta *brain.MessageMeta, tuples []brain.Tuple) error {
if len(tuples) == 0 {
return errors.New("no tuples to learn")
}
// Construct the keys and values we will use.
// There are probably things we could do to control allocations since we're
// using many overlapping tuples for keys, but it's tremendously easier to
// just fill up a buffer for each.
type entry struct {
key []byte
val []byte
}
entries := make([]entry, len(tuples))
var b bytes.Buffer
p := make([]string, 0, len(tuples[0].Prefix))
for i, t := range tuples {
b.Reset()
// Write the tag.
u := make([]byte, tagBytes)
copy(u, meta.Tag)
b.Write(u)
// Write prefixes.
k := slices.IndexFunc(t.Prefix, func(s string) bool { return s != "" })
if k < 0 {
// First prefix of the message. We want to write only the separator.
k = len(t.Prefix)
}
p = append(p[:0], t.Prefix[k:]...)
slices.Reverse(p)
for _, s := range p {
b.WriteString(s)
b.WriteByte('\xff')
}
b.WriteByte('\xff')
// Write message ID.
b.Write(meta.ID[:])
entries[i] = entry{
key: bytes.Clone(b.Bytes()),
val: []byte(t.Suffix),
}
}
// TODO(zeph): record mapping of metadata to key
batch := br.knowledge.NewWriteBatch()
defer batch.Cancel()
for _, e := range entries {
err := batch.Set(e.key, e.val)
if err != nil {
return err
}
}
err := batch.Flush()
if err != nil {
return fmt.Errorf("couldn't commit learned knowledge: %w", err)
}
return nil
}
132 changes: 132 additions & 0 deletions brain/kvbrain/learn_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
package kvbrain

import (
"context"
"testing"
"time"

"github.com/dgraph-io/badger/v4"
"github.com/google/uuid"

"github.com/zephyrtronium/robot/brain"
"github.com/zephyrtronium/robot/userhash"
)

func TestLearn(t *testing.T) {
mkey := func(tag, toks string, id uuid.UUID) string {
b := make([]byte, tagBytes, tagBytes+len(toks)+len(id))
copy(b, tag)
b = append(b, toks...)
b = append(b, id[:]...)
return string(b)
}
uu := uuid.UUID{':', ')', ':', ')', ':', ')', ':', ')', ':', ')', ':', ')', ':', ')', ':', ')'}
h := userhash.Hash{2}
cases := []struct {
name string
msg brain.MessageMeta
tups []brain.Tuple
want map[string]string
}{
{
name: "single",
msg: brain.MessageMeta{
ID: uu,
User: h,
Tag: "kessoku",
Time: time.Unix(0, 0),
},
tups: []brain.Tuple{
{
Prefix: []string{""},
Suffix: "bocchi",
},
},
want: map[string]string{
mkey("kessoku", "\xff", uu): "bocchi",
},
},
{
name: "full",
msg: brain.MessageMeta{
ID: uu,
User: h,
Tag: "kessoku",
Time: time.Unix(0, 0),
},
tups: []brain.Tuple{
{
Prefix: []string{"", "", "", ""},
Suffix: "bocchi",
},
{
Prefix: []string{"", "", "", "bocchi"},
Suffix: "ryou",
},
{
Prefix: []string{"", "", "bocchi", "ryou"},
Suffix: "nijika",
},
{
Prefix: []string{"", "bocchi", "ryou", "nijika"},
Suffix: "kita",
},
{
Prefix: []string{"bocchi", "ryou", "nijika", "kita"},
Suffix: "seika",
},
{
Prefix: []string{"ryou", "nijika", "kita", "seika"},
Suffix: "",
},
},
want: map[string]string{
mkey("kessoku", "\xff", uu): "bocchi",
mkey("kessoku", "bocchi\xff\xff", uu): "ryou",
mkey("kessoku", "ryou\xffbocchi\xff\xff", uu): "nijika",
mkey("kessoku", "nijika\xffryou\xffbocchi\xff\xff", uu): "kita",
mkey("kessoku", "kita\xffnijika\xffryou\xffbocchi\xff\xff", uu): "seika",
mkey("kessoku", "seika\xffkita\xffnijika\xffryou\xff\xff", uu): "",
},
},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
t.Parallel()
ctx := context.Background()
db, err := badger.Open(badger.DefaultOptions("").WithInMemory(true).WithLogger(nil))
if err != nil {
t.Fatal(err)
}
br := New(db)
if err := br.Learn(ctx, &c.msg, c.tups); err != nil {
t.Errorf("failed to learn: %v", err)
}
seen := 0
err = db.View(func(txn *badger.Txn) error {
opts := badger.IteratorOptions{}
it := txn.NewIterator(opts)
defer it.Close()
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
k := string(item.Key())
v, err := item.ValueCopy(nil)
if err != nil {
t.Errorf("couldn't get value for key %q: %v", k, err)
}
if got := string(v); c.want[k] != got {
t.Errorf("wrong value for key %q: want %q, got %q", k, c.want[k], got)
}
seen++
}
return nil
})
if err != nil {
t.Errorf("view failed: %v", err)
}
if seen != len(c.want) {
t.Errorf("saw wrong number of items: want %d, got %d", len(c.want), seen)
}
})
}
}
13 changes: 13 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ go 1.22.0

require (
github.com/BurntSushi/toml v1.3.2
github.com/dgraph-io/badger/v4 v4.2.0
github.com/google/go-cmp v0.6.0
github.com/google/uuid v1.6.0
github.com/mattn/go-sqlite3 v1.14.22
Expand All @@ -18,8 +19,20 @@ require (
)

require (
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/dgraph-io/ristretto v0.1.1 // indirect
github.com/dustin/go-humanize v1.0.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/glog v1.0.0 // indirect
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/golang/snappy v0.0.3 // indirect
github.com/google/flatbuffers v1.12.1 // indirect
github.com/klauspost/compress v1.12.3 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e // indirect
go.opencensus.io v0.22.5 // indirect
golang.org/x/net v0.21.0 // indirect
golang.org/x/sys v0.17.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/protobuf v1.32.0 // indirect
Expand Down

0 comments on commit c6925f3

Please sign in to comment.