-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
brain/kvbrain: start of badger/bbolt version of brain
- Loading branch information
1 parent
f0596f8
commit c6925f3
Showing
5 changed files
with
389 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
package kvbrain | ||
|
||
import ( | ||
"context" | ||
"time" | ||
|
||
"github.com/dgraph-io/badger/v4" | ||
"github.com/google/uuid" | ||
|
||
"github.com/zephyrtronium/robot/brain" | ||
"github.com/zephyrtronium/robot/userhash" | ||
) | ||
|
||
/* | ||
Message key structure: | ||
Tag × Tuples × UUID | ||
- Tag is a 16 byte string padded with \x00. | ||
- Tuple terms are separated by \xff sentinels. Terms are recorded in reverse order. | ||
- The final tuple term is the empty string, so the tuple portion ends with \xff\xff. | ||
- UUID is the raw uuid. | ||
As with the SQL approach, we record every prefix with its suffix, including the | ||
final empty prefix. | ||
Operations: | ||
- Find a start tuple: Search for a prefix of tag × \xff. | ||
- Find a continuation: | ||
+ With full context, just search for it, again in reverse order. | ||
+ When we reduce context, record by how much and only search for that much. | ||
+ In both cases, and with start tuple, check message UUID and tags we | ||
select against the deletions db. | ||
- Learn: Construct the key according to above. The suffix is the entire value. | ||
Record a mapping of tag, UUID, timestamp, and userhash to keys. | ||
- Forget tuples: thinking… | ||
- ForgetMessage, ForgetDuring, ForgetUserSince: Look up the actual keys to | ||
delete in the recording taken during learning. | ||
*/ | ||
|
||
type Brain struct { | ||
knowledge *badger.DB | ||
} | ||
|
||
var _ brain.Learner = (*Brain)(nil) | ||
|
||
func New(knowledge *badger.DB) *Brain { | ||
return &Brain{ | ||
knowledge: knowledge, | ||
} | ||
} | ||
|
||
// tagBytes is the number of bytes used to record tags in the KV database. | ||
const tagBytes = 8 // TODO(zeph): we should just use a hash instead | ||
|
||
// Order returns the number of elements in the prefix of a chain. It is | ||
// called once at the beginning of learning. The returned value must always | ||
// be at least 1. | ||
func (br *Brain) Order() int { | ||
// TOOD(zeph): this can go away one day | ||
return 250 | ||
} | ||
|
||
// Forget removes a set of recorded tuples. The tuples provided are as for | ||
// Learn. If a tuple has been recorded multiple times, only the first | ||
// should be deleted. If a tuple has not been recorded, it should be | ||
// ignored. | ||
func (br *Brain) Forget(ctx context.Context, tag string, tuples []brain.Tuple) error { | ||
panic("not implemented") // TODO: Implement | ||
} | ||
|
||
// ForgetMessage forgets everything learned from a single given message. | ||
// If nothing has been learned from the message, it should be ignored. | ||
func (br *Brain) ForgetMessage(ctx context.Context, tag string, msg uuid.UUID) error { | ||
panic("not implemented") // TODO: Implement | ||
} | ||
|
||
// ForgetDuring forgets all messages learned in the given time span. | ||
func (br *Brain) ForgetDuring(ctx context.Context, tag string, since, before time.Time) error { | ||
panic("not implemented") // TODO: Implement | ||
} | ||
|
||
// ForgetUserSince forgets all messages learned from a user since a given | ||
// time. | ||
func (br *Brain) ForgetUserSince(ctx context.Context, user *userhash.Hash, since time.Time) error { | ||
panic("not implemented") // TODO: Implement | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package kvbrain | ||
|
||
import ( | ||
"bytes" | ||
"context" | ||
"errors" | ||
"fmt" | ||
"slices" | ||
|
||
"github.com/zephyrtronium/robot/brain" | ||
) | ||
|
||
// Learn records a set of tuples. Each tuple prefix has length equal to the | ||
// result of Order. The tuples begin with empty strings in the prefix to | ||
// denote the start of the message and end with one empty suffix to denote | ||
// the end; all other tokens are non-empty. Each tuple's prefix has entropy | ||
// reduction transformations applied. | ||
func (br *Brain) Learn(ctx context.Context, meta *brain.MessageMeta, tuples []brain.Tuple) error { | ||
if len(tuples) == 0 { | ||
return errors.New("no tuples to learn") | ||
} | ||
// Construct the keys and values we will use. | ||
// There are probably things we could do to control allocations since we're | ||
// using many overlapping tuples for keys, but it's tremendously easier to | ||
// just fill up a buffer for each. | ||
type entry struct { | ||
key []byte | ||
val []byte | ||
} | ||
entries := make([]entry, len(tuples)) | ||
var b bytes.Buffer | ||
p := make([]string, 0, len(tuples[0].Prefix)) | ||
for i, t := range tuples { | ||
b.Reset() | ||
// Write the tag. | ||
u := make([]byte, tagBytes) | ||
copy(u, meta.Tag) | ||
b.Write(u) | ||
// Write prefixes. | ||
k := slices.IndexFunc(t.Prefix, func(s string) bool { return s != "" }) | ||
if k < 0 { | ||
// First prefix of the message. We want to write only the separator. | ||
k = len(t.Prefix) | ||
} | ||
p = append(p[:0], t.Prefix[k:]...) | ||
slices.Reverse(p) | ||
for _, s := range p { | ||
b.WriteString(s) | ||
b.WriteByte('\xff') | ||
} | ||
b.WriteByte('\xff') | ||
// Write message ID. | ||
b.Write(meta.ID[:]) | ||
entries[i] = entry{ | ||
key: bytes.Clone(b.Bytes()), | ||
val: []byte(t.Suffix), | ||
} | ||
} | ||
// TODO(zeph): record mapping of metadata to key | ||
batch := br.knowledge.NewWriteBatch() | ||
defer batch.Cancel() | ||
for _, e := range entries { | ||
err := batch.Set(e.key, e.val) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
err := batch.Flush() | ||
if err != nil { | ||
return fmt.Errorf("couldn't commit learned knowledge: %w", err) | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
package kvbrain | ||
|
||
import ( | ||
"context" | ||
"testing" | ||
"time" | ||
|
||
"github.com/dgraph-io/badger/v4" | ||
"github.com/google/uuid" | ||
|
||
"github.com/zephyrtronium/robot/brain" | ||
"github.com/zephyrtronium/robot/userhash" | ||
) | ||
|
||
func TestLearn(t *testing.T) { | ||
mkey := func(tag, toks string, id uuid.UUID) string { | ||
b := make([]byte, tagBytes, tagBytes+len(toks)+len(id)) | ||
copy(b, tag) | ||
b = append(b, toks...) | ||
b = append(b, id[:]...) | ||
return string(b) | ||
} | ||
uu := uuid.UUID{':', ')', ':', ')', ':', ')', ':', ')', ':', ')', ':', ')', ':', ')', ':', ')'} | ||
h := userhash.Hash{2} | ||
cases := []struct { | ||
name string | ||
msg brain.MessageMeta | ||
tups []brain.Tuple | ||
want map[string]string | ||
}{ | ||
{ | ||
name: "single", | ||
msg: brain.MessageMeta{ | ||
ID: uu, | ||
User: h, | ||
Tag: "kessoku", | ||
Time: time.Unix(0, 0), | ||
}, | ||
tups: []brain.Tuple{ | ||
{ | ||
Prefix: []string{""}, | ||
Suffix: "bocchi", | ||
}, | ||
}, | ||
want: map[string]string{ | ||
mkey("kessoku", "\xff", uu): "bocchi", | ||
}, | ||
}, | ||
{ | ||
name: "full", | ||
msg: brain.MessageMeta{ | ||
ID: uu, | ||
User: h, | ||
Tag: "kessoku", | ||
Time: time.Unix(0, 0), | ||
}, | ||
tups: []brain.Tuple{ | ||
{ | ||
Prefix: []string{"", "", "", ""}, | ||
Suffix: "bocchi", | ||
}, | ||
{ | ||
Prefix: []string{"", "", "", "bocchi"}, | ||
Suffix: "ryou", | ||
}, | ||
{ | ||
Prefix: []string{"", "", "bocchi", "ryou"}, | ||
Suffix: "nijika", | ||
}, | ||
{ | ||
Prefix: []string{"", "bocchi", "ryou", "nijika"}, | ||
Suffix: "kita", | ||
}, | ||
{ | ||
Prefix: []string{"bocchi", "ryou", "nijika", "kita"}, | ||
Suffix: "seika", | ||
}, | ||
{ | ||
Prefix: []string{"ryou", "nijika", "kita", "seika"}, | ||
Suffix: "", | ||
}, | ||
}, | ||
want: map[string]string{ | ||
mkey("kessoku", "\xff", uu): "bocchi", | ||
mkey("kessoku", "bocchi\xff\xff", uu): "ryou", | ||
mkey("kessoku", "ryou\xffbocchi\xff\xff", uu): "nijika", | ||
mkey("kessoku", "nijika\xffryou\xffbocchi\xff\xff", uu): "kita", | ||
mkey("kessoku", "kita\xffnijika\xffryou\xffbocchi\xff\xff", uu): "seika", | ||
mkey("kessoku", "seika\xffkita\xffnijika\xffryou\xff\xff", uu): "", | ||
}, | ||
}, | ||
} | ||
for _, c := range cases { | ||
t.Run(c.name, func(t *testing.T) { | ||
t.Parallel() | ||
ctx := context.Background() | ||
db, err := badger.Open(badger.DefaultOptions("").WithInMemory(true).WithLogger(nil)) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
br := New(db) | ||
if err := br.Learn(ctx, &c.msg, c.tups); err != nil { | ||
t.Errorf("failed to learn: %v", err) | ||
} | ||
seen := 0 | ||
err = db.View(func(txn *badger.Txn) error { | ||
opts := badger.IteratorOptions{} | ||
it := txn.NewIterator(opts) | ||
defer it.Close() | ||
for it.Rewind(); it.Valid(); it.Next() { | ||
item := it.Item() | ||
k := string(item.Key()) | ||
v, err := item.ValueCopy(nil) | ||
if err != nil { | ||
t.Errorf("couldn't get value for key %q: %v", k, err) | ||
} | ||
if got := string(v); c.want[k] != got { | ||
t.Errorf("wrong value for key %q: want %q, got %q", k, c.want[k], got) | ||
} | ||
seen++ | ||
} | ||
return nil | ||
}) | ||
if err != nil { | ||
t.Errorf("view failed: %v", err) | ||
} | ||
if seen != len(c.want) { | ||
t.Errorf("saw wrong number of items: want %d, got %d", len(c.want), seen) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.