brain/sqlbrain: use reservoir sampling · zephyrtronium/robot@f0596f8

Commit

brain/sqlbrain: use reservoir sampling

goos: linux
goarch: amd64
pkg: github.com/zephyrtronium/robot/brain/sqlbrain
cpu: AMD Ryzen 9 7900X 12-Core Processor
                                   │    old.bench    │               new.bench                │
                                   │     sec/op      │     sec/op      vs base                │
Speak/2/similar-new-1000-4            730.4µ ±    1%    561.3µ ±   5%   -23.16% (p=0.002 n=6)
Speak/2/similar-new-10000-4           8.424m ±    2%    7.904m ±   2%    -6.18% (p=0.002 n=6)
Speak/2/similar-new-100000-4          121.3m ±    8%    159.9m ±   1%   +31.76% (p=0.002 n=6)
Speak/2/distinct-new-1000-4           2.061m ±    6%    1.490m ±   4%   -27.73% (p=0.002 n=6)
Speak/2/distinct-new-10000-4          32.44m ±   20%    19.92m ±   7%   -38.59% (p=0.002 n=6)
Speak/2/distinct-new-100000-4        1082.8m ±  151%    539.2m ± 102%         ~ (p=0.394 n=6)
Speak/2/distinct-prompted-1000-4      2.049m ±    7%    1.268m ±   2%   -38.12% (p=0.002 n=6)
Speak/2/distinct-prompted-10000-4     28.92m ±   22%    16.77m ±  12%   -42.01% (p=0.002 n=6)
Speak/2/distinct-prompted-100000-4    948.2m ±  236%    220.8m ±  27%   -76.72% (p=0.002 n=6)
Speak/4/similar-new-1000-4           1207.8µ ±    1%    907.5µ ±   1%   -24.86% (p=0.002 n=6)
Speak/4/similar-new-10000-4           14.25m ±    1%    14.17m ±   1%         ~ (p=0.065 n=6)
Speak/4/similar-new-100000-4          226.8m ±    2%    321.6m ±   3%   +41.80% (p=0.002 n=6)
Speak/4/distinct-new-1000-4           3.639m ±    8%    2.233m ±   4%   -38.63% (p=0.002 n=6)
Speak/4/distinct-new-10000-4          53.12m ±   26%    31.86m ±  12%   -40.02% (p=0.002 n=6)
Speak/4/distinct-new-100000-4          1.764 ±  117%     1.555 ±  53%         ~ (p=0.132 n=6)
Speak/4/distinct-prompted-1000-4      3.339m ±    6%    1.982m ±   3%   -40.62% (p=0.002 n=6)
Speak/4/distinct-prompted-10000-4     54.34m ± 1882%    26.93m ±   6%   -50.43% (p=0.002 n=6)
Speak/4/distinct-prompted-100000-4     1.470 ±  240%     1.490 ±  55%         ~ (p=0.485 n=6)
Speak/6/similar-new-1000-4            1.707m ±    3%    1.307m ±   1%   -23.47% (p=0.002 n=6)
Speak/6/similar-new-10000-4           21.67m ±    2%    22.54m ±   3%    +4.05% (p=0.002 n=6)
Speak/6/similar-new-100000-4          335.0m ±   19%   1182.2m ±   1%  +252.86% (p=0.002 n=6)
Speak/6/distinct-new-1000-4           4.191m ±    8%    2.396m ±   5%   -42.82% (p=0.002 n=6)
Speak/6/distinct-new-10000-4          60.07m ±   53%    31.89m ±   7%   -46.91% (p=0.002 n=6)
Speak/6/distinct-new-100000-4          3.438 ±   52%     1.273 ±  42%   -62.98% (p=0.002 n=6)
Speak/6/distinct-prompted-1000-4      3.968m ±    5%    2.048m ±   2%   -48.39% (p=0.002 n=6)
Speak/6/distinct-prompted-10000-4     68.95m ±   54%    28.30m ±   6%   -58.96% (p=0.002 n=6)
Speak/6/distinct-prompted-100000-4   2329.1m ±   83%    877.3m ±  65%   -62.33% (p=0.002 n=6)
geomean                               38.70m            26.72m          -30.96%

                                   │   old.bench    │                  new.bench                   │
                                   │      B/op      │       B/op         vs base                   │
Speak/2/similar-new-1000-4           4.214Ki ±   0%     163.106Ki ±  0%    +3770.70% (p=0.002 n=6)
Speak/2/similar-new-10000-4          4.245Ki ±   0%    1604.558Ki ±  0%   +37697.72% (p=0.002 n=6)
Speak/2/similar-new-100000-4         4.807Ki ±   5%   16136.353Ki ±  0%  +335609.57% (p=0.002 n=6)
Speak/2/distinct-new-1000-4          16.21Ki ±   6%       82.80Ki ±  2%     +410.65% (p=0.002 n=6)
Speak/2/distinct-new-10000-4         17.74Ki ±  15%      754.20Ki ±  3%    +4151.23% (p=0.002 n=6)
Speak/2/distinct-new-100000-4        17.14Ki ± 184%     7961.01Ki ± 20%   +46334.68% (p=0.002 n=6)
Speak/2/distinct-prompted-1000-4     16.22Ki ±   4%       35.76Ki ±  2%     +120.51% (p=0.002 n=6)
Speak/2/distinct-prompted-10000-4    16.04Ki ±  15%      279.89Ki ±  8%    +1645.18% (p=0.002 n=6)
Speak/2/distinct-prompted-100000-4   16.84Ki ± 147%     2769.11Ki ± 21%   +16343.77% (p=0.002 n=6)
Speak/4/similar-new-1000-4           7.419Ki ±   0%     259.888Ki ±  0%    +3403.03% (p=0.002 n=6)
Speak/4/similar-new-10000-4          7.495Ki ±   1%    2545.145Ki ±  0%   +33859.59% (p=0.002 n=6)
Speak/4/similar-new-100000-4         8.232Ki ±   5%   25515.364Ki ±  0%  +309837.51% (p=0.002 n=6)
Speak/4/distinct-new-1000-4          22.93Ki ±   7%      109.96Ki ±  2%     +379.53% (p=0.002 n=6)
Speak/4/distinct-new-10000-4         23.05Ki ±  38%      980.48Ki ±  7%    +4153.13% (p=0.002 n=6)
Speak/4/distinct-new-100000-4        20.91Ki ± 120%    11115.19Ki ±  0%   +53046.96% (p=0.002 n=6)
Speak/4/distinct-prompted-1000-4     21.99Ki ±   7%       62.57Ki ±  3%     +184.55% (p=0.002 n=6)
Speak/4/distinct-prompted-10000-4    23.81Ki ± 446%      524.83Ki ±  8%    +2103.86% (p=0.002 n=6)
Speak/4/distinct-prompted-100000-4   16.73Ki ± 223%     6422.23Ki ± 23%   +38295.38% (p=0.002 n=6)
Speak/6/similar-new-1000-4           11.38Ki ±   0%      357.43Ki ±  0%    +3040.62% (p=0.002 n=6)
Speak/6/similar-new-10000-4          11.76Ki ±   2%     3486.99Ki ±  0%   +29556.81% (p=0.002 n=6)
Speak/6/similar-new-100000-4         12.96Ki ±   5%    34893.04Ki ±  0%  +269187.95% (p=0.002 n=6)
Speak/6/distinct-new-1000-4          30.81Ki ±   8%      119.52Ki ±  1%     +287.99% (p=0.002 n=6)
Speak/6/distinct-new-10000-4         30.89Ki ±  32%     1072.85Ki ±  4%    +3373.10% (p=0.002 n=6)
Speak/6/distinct-new-100000-4        51.84Ki ±  58%    11157.33Ki ± 16%   +21421.08% (p=0.002 n=6)
Speak/6/distinct-prompted-1000-4     30.16Ki ±   4%       71.17Ki ±  1%     +135.99% (p=0.002 n=6)
Speak/6/distinct-prompted-10000-4    32.90Ki ±  51%      594.33Ki ±  5%    +1706.40% (p=0.002 n=6)
Speak/6/distinct-prompted-100000-4   31.12Ki ± 200%     6672.47Ki ± 22%   +21343.05% (p=0.002 n=6)
geomean                              15.76Ki              1.031Mi          +6598.86%

                                   │   old.bench   │                 new.bench                  │
                                   │   allocs/op   │    allocs/op     vs base                   │
Speak/2/similar-new-1000-4            128.0 ±   0%      8110.0 ±  0%    +6235.94% (p=0.002 n=6)
Speak/2/similar-new-10000-4           128.0 ±   0%     80110.0 ±  0%   +62485.94% (p=0.002 n=6)
Speak/2/similar-new-100000-4          143.0 ±   5%    800123.0 ±  0%  +559426.57% (p=0.002 n=6)
Speak/2/distinct-new-1000-4           459.0 ±   6%      3383.0 ±  1%     +637.04% (p=0.002 n=6)
Speak/2/distinct-new-10000-4          502.5 ±  15%     32016.0 ±  3%    +6271.34% (p=0.002 n=6)
Speak/2/distinct-new-100000-4         486.5 ± 176%    339462.0 ± 20%   +69676.36% (p=0.002 n=6)
Speak/2/distinct-prompted-1000-4      454.5 ±   4%      1377.5 ±  2%     +203.08% (p=0.002 n=6)
Speak/2/distinct-prompted-10000-4     449.0 ±  14%     11783.0 ±  8%    +2524.28% (p=0.002 n=6)
Speak/2/distinct-prompted-100000-4    468.0 ± 147%    117968.5 ± 21%   +25106.94% (p=0.002 n=6)
Speak/4/similar-new-1000-4            199.0 ±   0%     12173.0 ±  0%    +6017.09% (p=0.002 n=6)
Speak/4/similar-new-10000-4           201.0 ±   1%    120174.5 ±  0%   +59688.31% (p=0.002 n=6)
Speak/4/similar-new-100000-4          219.0 ±   4%   1200208.0 ±  0%  +547940.18% (p=0.002 n=6)
Speak/4/distinct-new-1000-4           599.0 ±   7%      4465.5 ±  2%     +645.49% (p=0.002 n=6)
Speak/4/distinct-new-10000-4          604.5 ±  38%     41601.5 ±  7%    +6781.97% (p=0.002 n=6)
Speak/4/distinct-new-100000-4         548.0 ± 118%    473945.0 ±  0%   +86386.31% (p=0.002 n=6)
Speak/4/distinct-prompted-1000-4      569.5 ±   7%      2446.5 ±  3%     +329.59% (p=0.002 n=6)
Speak/4/distinct-prompted-10000-4     616.0 ± 450%     22156.5 ±  8%    +3496.83% (p=0.002 n=6)
Speak/4/distinct-prompted-100000-4    430.0 ± 225%    273714.0 ± 23%   +63554.42% (p=0.002 n=6)
Speak/6/similar-new-1000-4            282.0 ±   0%     16248.0 ±  0%    +5661.70% (p=0.002 n=6)
Speak/6/similar-new-10000-4           292.0 ±   2%    160262.5 ±  0%   +54784.42% (p=0.002 n=6)
Speak/6/similar-new-100000-4          320.5 ±   5%   1600260.0 ±  0%  +499201.09% (p=0.002 n=6)
Speak/6/distinct-new-1000-4           760.5 ±   8%      4813.5 ±  1%     +532.94% (p=0.002 n=6)
Speak/6/distinct-new-10000-4          763.0 ±  32%     45473.5 ±  4%    +5859.83% (p=0.002 n=6)
Speak/6/distinct-new-100000-4        1.287k ±  57%    475.695k ± 16%   +36861.54% (p=0.002 n=6)
Speak/6/distinct-prompted-1000-4      738.0 ±   4%      2758.5 ±  1%     +273.78% (p=0.002 n=6)
Speak/6/distinct-prompted-10000-4     806.5 ±  51%     25067.0 ±  5%    +3008.12% (p=0.002 n=6)
Speak/6/distinct-prompted-100000-4    755.0 ± 202%    284334.0 ± 22%   +37560.13% (p=0.002 n=6)
geomean                               417.8             45.85k         +10873.39%

Loading branch information

zephyrtronium committed Mar 7, 2024

1 parent 352f33f commit f0596f8

brain/sqlbrain/speak.go

-Original file line number
+Diff line change
@@ Expand Up / @@ -5,17 +5,54 @@ import ( @@
     	"database/sql"
     	_ "embed"
     	"fmt"
+    	"math"
+    	"math/rand/v2"
     	"strconv"
     	"gitlab.com/zephyrtronium/sq"
     	"github.com/zephyrtronium/robot/brain"
     )
+    func gumbelscan(rows *sq.Rows) (string, error) {
+    	var s string
+    	w := rand.Float64()
+    	if rows.Next() {
+    		err := rows.Scan(&s)
+    		if err != nil {
+    			return "", fmt.Errorf("couldn't scan first string in sample: %w", err)
+    		}
+    	}
+    Loop:
+    	for rows.Next() {
+    		u := math.Log(rand.Float64())/math.Log(1-w) + 1
+    		if math.IsNaN(u) || u <= 0 {
+    			continue
+    		}
+    		for range uint64(u) {
+    			if !rows.Next() {
+    				break Loop
+    			}
+    		}
+    		err := rows.Scan(&s)
+    		if err != nil {
+    			return "", fmt.Errorf("couldn't scan string for sample: %w", err)
+    		}
+    		w *= rand.Float64()
+    	}
+    	if rows.Err() != nil {
+    		return "", fmt.Errorf("couldn't get sample: %w", rows.Err())
+    	}
+    	return s, nil
+    }
     // New creates a new prompt.
     func (br *Brain) New(ctx context.Context, tag string) ([]string, error) {
-    	var s string
-    	err := br.stmts.newTuple.QueryRow(ctx, tag).Scan(&s)
+    	rows, err := br.stmts.newTuple.Query(ctx, tag)
+    	if err != nil {
+    		return nil, fmt.Errorf("couldn't run query for new chain: %w", err)
+    	}
+    	s, err := gumbelscan(rows)
     	if err != nil {
     		return nil, fmt.Errorf("couldn't get new chain: %w", err)
     	}
@@ Expand All @@
     		p[i] = names[i]
     	}
     	for nn < 500 {
-    		var w string
-    		err := br.stmts.selectTuple.QueryRow(ctx, p...).Scan(&w)
+    		rows, err := br.stmts.selectTuple.Query(ctx, p...)
+    		if err != nil {
+    			return nil, fmt.Errorf("couldn't run query to continue chain with terms %v: %w", terms, err)
+    		}
+    		w, err := gumbelscan(rows)
     		if err != nil {
-    			return nil, fmt.Errorf("couldn't scan chain with terms %v: %w", terms, err)
+    			return nil, fmt.Errorf("couldn't continue chain with terms %v: %w", terms, err)
     		}
     		if w == "" {
     			break
     		}
+    		nn += len(w) + 1
     		prompt = append(prompt, w)
     		// Note that each p[i] is a named arg, and each name for prefix
     		// elements aliases an element of terms. So, just updating terms is
@@ Expand Down @@

brain/sqlbrain/templates/tuple.new.sql

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -1,14 +1,6 @@
  
    -- Select a start-of-message term with a given tag.

    -- Select all start-of-message terms with a given tag.

    -- The template input requires $.NM1 to be (order-1).

    WITH InitialSet AS (

        SELECT

            RANDOM() AS ordinal,

            suffix

        FROM MessageTuple

        WHERE tag = ?

            AND p{{$.NM1}} = ''

    )

    SELECT suffix

    FROM InitialSet

    ORDER BY ordinal

    LIMIT 1

    FROM MessageTuple

    WHERE tag = ?

        AND p{{$.NM1}} = ''

brain/sqlbrain/templates/tuple.select.sql

-Original file line number
+Diff line change
@@ Expand Up / @@ -26,13 +26,9 @@ WITH InitialSet AS ( @@
             suffix
         FROM InitialSet
     ), Thresholded AS (
-        SELECT
-            RANDOM() AS ordinal,
-            suffix
+        SELECT suffix
         FROM Scored
         WHERE score >= {{$.MinScore}}
     )
     SELECT suffix
     FROM Thresholded
-    ORDER BY ordinal
-    LIMIT 1

0 comments on commit `f0596f8`

Please sign in to comment.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Commit

There are no files selected for viewing

0 comments on commit `f0596f8`

Commit

There are no files selected for viewing

0 comments on commit f0596f8

0 comments on commit `f0596f8`