Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: cache reserved bind variables in queries #7698

Merged
merged 5 commits into from
Mar 18, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions go/test/fuzzing/parser_fuzzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ func FuzzIsDML(data []byte) int {
}

func FuzzNormalizer(data []byte) int {
stmt, err := sqlparser.Parse(string(data))
stmt, reservedVars, err := sqlparser.Parse2(string(data))
if err != nil {
return -1
}
prefix := "bv"
bv := make(map[string]*querypb.BindVariable)
sqlparser.Normalize(stmt, bv, prefix)
sqlparser.Normalize(stmt, reservedVars, bv, prefix)
return 1
}

Expand Down
4 changes: 2 additions & 2 deletions go/vt/sqlparser/ast_rewriting.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ type RewriteASTResult struct {
}

// PrepareAST will normalize the query
func PrepareAST(in Statement, bindVars map[string]*querypb.BindVariable, prefix string, parameterize bool, keyspace string) (*RewriteASTResult, error) {
func PrepareAST(in Statement, reservedVars BindVars, bindVars map[string]*querypb.BindVariable, prefix string, parameterize bool, keyspace string) (*RewriteASTResult, error) {
if parameterize {
err := Normalize(in, bindVars, prefix)
err := Normalize(in, reservedVars, bindVars, prefix)
if err != nil {
return nil, err
}
Expand Down
13 changes: 7 additions & 6 deletions go/vt/sqlparser/normalizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,18 @@ import (
querypb "vitess.io/vitess/go/vt/proto/query"
)

// BindVars is a set of reserved bind variables from a SQL statement
type BindVars map[string]struct{}

// Normalize changes the statement to use bind values, and
// updates the bind vars to those values. The supplied prefix
// is used to generate the bind var names. The function ensures
// that there are no collisions with existing bind vars.
// Within Select constructs, bind vars are deduped. This allows
// us to identify vindex equality. Otherwise, every value is
// treated as distinct.
func Normalize(stmt Statement, bindVars map[string]*querypb.BindVariable, prefix string) error {
nz := newNormalizer(stmt, bindVars, prefix)
func Normalize(stmt Statement, known BindVars, bindVars map[string]*querypb.BindVariable, prefix string) error {
nz := newNormalizer(known, bindVars, prefix)
_, err := Rewrite(stmt, nz.WalkStatement, nil)
if err != nil {
return err
Expand All @@ -49,11 +52,11 @@ type normalizer struct {
err error
}

func newNormalizer(stmt Statement, bindVars map[string]*querypb.BindVariable, prefix string) *normalizer {
func newNormalizer(reserved map[string]struct{}, bindVars map[string]*querypb.BindVariable, prefix string) *normalizer {
return &normalizer{
bindVars: bindVars,
prefix: prefix,
reserved: GetBindvars(stmt),
reserved: reserved,
counter: 1,
vals: make(map[string]string),
}
Expand Down Expand Up @@ -226,8 +229,6 @@ func (nz *normalizer) newName() string {
}

// GetBindvars returns a map of the bind vars referenced in the statement.
// TODO(sougou); This function gets called again from vtgate/planbuilder.
// Ideally, this should be done only once.
func GetBindvars(stmt Statement) map[string]struct{} {
bindvars := make(map[string]struct{})
_ = Walk(func(node SQLNode) (kontinue bool, err error) {
Expand Down
38 changes: 35 additions & 3 deletions go/vt/sqlparser/normalizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,10 @@ func TestNormalize(t *testing.T) {
t.Error(err)
continue
}
known := GetBindvars(stmt)
bv := make(map[string]*querypb.BindVariable)
require.NoError(t,
Normalize(stmt, bv, prefix))
Normalize(stmt, known, bv, prefix))
outstmt := String(stmt)
if outstmt != tc.outstmt {
t.Errorf("Query:\n%s:\n%s, want\n%s", tc.in, outstmt, tc.outstmt)
Expand Down Expand Up @@ -269,12 +270,43 @@ BenchmarkNormalize-8 500000 3620 ns/op 1461 B/op
*/
func BenchmarkNormalize(b *testing.B) {
sql := "select 'abcd', 20, 30.0, eid from a where 1=eid and name='3'"
ast, err := Parse(sql)
ast, reservedVars, err := Parse2(sql)
if err != nil {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
require.NoError(b,
Normalize(ast, map[string]*querypb.BindVariable{}, ""))
Normalize(ast, reservedVars, map[string]*querypb.BindVariable{}, ""))
}
}

func BenchmarkNormalizeTraces(b *testing.B) {
for _, trace := range []string{"django_queries.txt", "lobsters.sql.gz"} {
b.Run(trace, func(b *testing.B) {
queries := loadQueries(b, trace)
if len(queries) > 10000 {
queries = queries[:10000]
}

parsed := make([]Statement, 0, len(queries))
reservedVars := make([]BindVars, 0, len(queries))
for _, q := range queries {
pp, kb, err := Parse2(q)
if err != nil {
b.Fatal(err)
}
parsed = append(parsed, pp)
reservedVars = append(reservedVars, kb)
}

b.ResetTimer()
b.ReportAllocs()

for i := 0; i < b.N; i++ {
for i, query := range parsed {
_ = Normalize(query, reservedVars[i], map[string]*querypb.BindVariable{}, "")
}
}
})
}
}
23 changes: 15 additions & 8 deletions go/vt/sqlparser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,27 +68,34 @@ func yyParsePooled(yylex yyLexer) int {
// a set of types, define the function as iTypeName.
// This will help avoid name collisions.

// Parse parses the SQL in full and returns a Statement, which
// is the AST representation of the query. If a DDL statement
// Parse2 parses the SQL in full and returns a Statement, which
// is the AST representation of the query, and a set of BindVars, which are all the
// bind variables that were found in the original SQL query. If a DDL statement
// is partially parsed but still contains a syntax error, the
// error is ignored and the DDL is returned anyway.
func Parse(sql string) (Statement, error) {
func Parse2(sql string) (Statement, BindVars, error) {
tokenizer := NewStringTokenizer(sql)
if yyParsePooled(tokenizer) != 0 {
if tokenizer.partialDDL != nil {
if typ, val := tokenizer.Scan(); typ != 0 {
return nil, fmt.Errorf("extra characters encountered after end of DDL: '%s'", string(val))
return nil, nil, fmt.Errorf("extra characters encountered after end of DDL: '%s'", string(val))
}
log.Warningf("ignoring error parsing DDL '%s': %v", sql, tokenizer.LastError)
tokenizer.ParseTree = tokenizer.partialDDL
return tokenizer.ParseTree, nil
return tokenizer.ParseTree, tokenizer.BindVars, nil
}
return nil, vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, tokenizer.LastError.Error())
return nil, nil, vterrors.New(vtrpcpb.Code_INVALID_ARGUMENT, tokenizer.LastError.Error())
}
if tokenizer.ParseTree == nil {
return nil, ErrEmpty
return nil, nil, ErrEmpty
}
return tokenizer.ParseTree, nil
return tokenizer.ParseTree, tokenizer.BindVars, nil
}

// Parse behaves like Parse2 but does not return a set of bind variables
func Parse(sql string) (Statement, error) {
stmt, _, err := Parse2(sql)
return stmt, err
}

// ParseStrictDDL is the same as Parse except it errors on
Expand Down
4 changes: 2 additions & 2 deletions go/vt/sqlparser/redact_query.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ func RedactSQLQuery(sql string) (string, error) {
bv := map[string]*querypb.BindVariable{}
sqlStripped, comments := SplitMarginComments(sql)

stmt, err := Parse(sqlStripped)
stmt, reservedVars, err := Parse2(sqlStripped)
if err != nil {
return "", err
}

prefix := "redacted"
err = Normalize(stmt, bv, prefix)
err = Normalize(stmt, reservedVars, bv, prefix)
if err != nil {
return "", err
}
Expand Down