/
randomdata.go
164 lines (136 loc) · 4.46 KB
/
randomdata.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/*
Copyright 2017 GitHub Inc.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package vttest
import (
"fmt"
"math/rand"
"strings"
)
// FieldGenerator is a callback that generates the value of a random field in
// when seeding the database with random data. `name` is the name of the column
// where the field belongs, `t` is its SQL tyoe, and `rng` is the RNG currently
// being used, as to ensure reproducible generation between runs.
// A FieldGenerator must return the raw SQL data for the field, ready to be
// placed into a SQL statement. The returned value will _NOT_ be escaped.
type FieldGenerator func(name, t string, rng *rand.Rand) (string, error)
// SeedConfig are the settings to enable the initialization of the
// local cluster with random data. This struct must be set in Config
// before Setup() is called.
type SeedConfig struct {
// RngSeed is the seed uset to initialize the random number
// generator that will be used to fill the database with
// random data. Multiple runs with the same seed will result
// in the same initial data
RngSeed int
// MinSize is the minimum number of initial rows in each tale shard
MinSize int
// MaxSize is the maximum number of initial rows in each table shard
MaxSize int
// NullProbability is the chance to initialize a field a NULL value.
// Only applies to fields that can contain NULL values
NullProbability float64
// RandomField is a callback to generate the value of a random field
RandomField FieldGenerator
}
// SeedConfigDefaults returns the default values for SeedConfig
func SeedConfigDefaults() *SeedConfig {
return &SeedConfig{
RngSeed: rand.Int(),
MinSize: 1000,
MaxSize: 10000,
NullProbability: 0.1,
}
}
const batchInsertSize = 1000
func (db *LocalCluster) batchInsert(dbname, table string, fields []string, rows [][]string) error {
var (
fieldNames = strings.Join(fields, ",")
values []string
sql string
)
for _, row := range rows {
values = append(values, "("+strings.Join(row, ",")+")")
}
sql = fmt.Sprintf("INSERT IGNORE INTO %s (%s) VALUES %s",
table, fieldNames, strings.Join(values, ","),
)
return db.Execute([]string{sql}, dbname)
}
func (db *LocalCluster) randomField(name, t string, allowNull bool, rng *rand.Rand) (string, error) {
if allowNull && rng.Float64() < db.Seed.NullProbability {
return "NULL", nil
}
return db.Seed.RandomField(name, t, rng)
}
func (db *LocalCluster) populateTable(dbname, table string, rng *rand.Rand) error {
fieldInfo, err := db.Query(fmt.Sprintf("DESCRIBE %s", table), dbname, 1024)
if err != nil {
return err
}
var (
minRows = db.Seed.MinSize
maxRows = db.Seed.MaxSize
numRows = rng.Intn(maxRows-minRows) + minRows
rows [][]string
fieldNames []string
)
for i := 0; i < numRows; i++ {
var fields []string
for _, row := range fieldInfo.Rows {
fieldName := row[0].ToString()
fieldType := row[1].ToString()
allowNull := row[2].ToString() == "YES"
f, err := db.randomField(fieldName, fieldType, allowNull, rng)
if err != nil {
return err
}
fields = append(fields, f)
}
rows = append(rows, fields)
}
for _, row := range fieldInfo.Rows {
fieldNames = append(fieldNames, row[0].ToString())
}
for i := 0; i < len(rows); i += batchInsertSize {
if err := db.batchInsert(dbname, table, fieldNames, rows); err != nil {
return err
}
}
return nil
}
func (db *LocalCluster) populateShard(dbname string, rng *rand.Rand) error {
q, err := db.Query("SHOW TABLES", dbname, 1024)
if err != nil {
return err
}
for _, row := range q.Rows {
if err := db.populateTable(dbname, row[0].ToString(), rng); err != nil {
return err
}
}
return nil
}
func (db *LocalCluster) populateWithRandomData() error {
rng := rand.New(rand.NewSource(int64(db.Seed.RngSeed)))
for _, kpb := range db.Topology.Keyspaces {
if kpb.ServedFrom != "" {
continue
}
for _, dbname := range db.shardNames(kpb) {
if err := db.populateShard(dbname, rng); err != nil {
return err
}
}
}
return nil
}