Skip to content

Commit

Permalink
Merge pull request from GHSA-649x-hxfx-57j2
Browse files Browse the repository at this point in the history
* collations: Fix OOM and handle padding for multibyte

This fixes the OOM issue where a simple query can trigger a denial of
service attack. It also ensures we return the right result for these
queries by doing the correct padding.

Signed-off-by: Dirkjan Bussink <d.bussink@gmail.com>

* Address review comments

Signed-off-by: Dirkjan Bussink <d.bussink@gmail.com>

---------

Signed-off-by: Dirkjan Bussink <d.bussink@gmail.com>
  • Loading branch information
dbussink authored May 8, 2024
1 parent 4b60128 commit 2fd5ba1
Show file tree
Hide file tree
Showing 8 changed files with 68 additions and 12 deletions.
2 changes: 1 addition & 1 deletion go/mysql/collations/charset/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func convertSlow(dst []byte, dstCharset Charset, src []byte, srcCharset Charset)

for len(src) > 0 {
cp, width := srcCharset.DecodeRune(src)
if cp == utf8.RuneError && width < 3 {
if cp == utf8.RuneError {
failed++
cp = '?'
}
Expand Down
2 changes: 1 addition & 1 deletion go/mysql/collations/charset/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func Validate(charset Charset, input []byte) bool {
}
for len(input) > 0 {
r, size := charset.DecodeRune(input)
if r == RuneError && size < 2 {
if r == RuneError {
return false
}
input = input[size:]
Expand Down
6 changes: 3 additions & 3 deletions go/mysql/collations/charset/unicode/utf16.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (Charset_utf16be) EncodeRune(dst []byte, r rune) int {

func (Charset_utf16be) DecodeRune(b []byte) (rune, int) {
if len(b) < 2 {
return utf8.RuneError, 0
return utf8.RuneError, len(b)
}

r1 := uint16(b[1]) | uint16(b[0])<<8
Expand Down Expand Up @@ -129,7 +129,7 @@ func (Charset_utf16le) EncodeRune(dst []byte, r rune) int {

func (Charset_utf16le) DecodeRune(b []byte) (rune, int) {
if len(b) < 2 {
return utf8.RuneError, 0
return utf8.RuneError, len(b)
}

r1 := uint16(b[0]) | uint16(b[1])<<8
Expand Down Expand Up @@ -185,7 +185,7 @@ func (Charset_ucs2) EncodeRune(dst []byte, r rune) int {

func (Charset_ucs2) DecodeRune(p []byte) (rune, int) {
if len(p) < 2 {
return utf8.RuneError, 0
return utf8.RuneError, len(p)
}
return rune(p[0])<<8 | rune(p[1]), 2
}
Expand Down
2 changes: 1 addition & 1 deletion go/mysql/collations/charset/unicode/utf32.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func (Charset_utf32) EncodeRune(dst []byte, r rune) int {

func (Charset_utf32) DecodeRune(p []byte) (rune, int) {
if len(p) < 4 {
return utf8.RuneError, 0
return utf8.RuneError, len(p)
}
return (rune(p[0]) << 24) | (rune(p[1]) << 16) | (rune(p[2]) << 8) | rune(p[3]), 4
}
Expand Down
3 changes: 2 additions & 1 deletion go/vt/vtgate/evalengine/compiler_asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -4733,7 +4733,8 @@ func (asm *assembler) Fn_REGEXP_REPLACE_slow(merged collations.TypedCollation, f

func (asm *assembler) Introduce(offset int, t sqltypes.Type, col collations.TypedCollation) {
asm.emit(func(env *ExpressionEnv) int {
arg := evalToBinary(env.vm.stack[env.vm.sp-offset])
var arg *evalBytes
arg, env.vm.err = introducerCast(env.vm.stack[env.vm.sp-offset], col.Collation)
arg.tt = int16(t)
arg.col = col
env.vm.stack[env.vm.sp-offset] = arg
Expand Down
24 changes: 24 additions & 0 deletions go/vt/vtgate/evalengine/compiler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,30 @@ func TestCompilerSingle(t *testing.T) {
expression: `week('2024-12-31', 5)`,
result: `INT64(53)`,
},
{
expression: `convert(0xFF using utf16)`,
result: `VARCHAR("ÿ")`,
},
{
expression: `_utf16 0xFF`,
result: `VARCHAR("ÿ")`,
},
{
expression: `convert(0xFF using utf32)`,
result: `NULL`,
},
{
expression: `cast(_utf32 0xFF as binary)`,
result: `VARBINARY("\x00\x00\x00\xff")`,
},
{
expression: `cast(_utf32 0x00FF as binary)`,
result: `VARBINARY("\x00\x00\x00\xff")`,
},
{
expression: `cast(_utf32 0x0000FF as binary)`,
result: `VARBINARY("\x00\x00\x00\xff")`,
},
}

for _, tc := range testCases {
Expand Down
39 changes: 35 additions & 4 deletions go/vt/vtgate/evalengine/expr_collate.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package evalengine

import (
"vitess.io/vitess/go/mysql/collations"
"vitess.io/vitess/go/mysql/collations/charset"
"vitess.io/vitess/go/mysql/collations/colldata"
"vitess.io/vitess/go/sqltypes"
querypb "vitess.io/vitess/go/vt/proto/query"
Expand Down Expand Up @@ -217,15 +218,45 @@ func (ca *collationAggregation) result() collations.TypedCollation {

var _ Expr = (*IntroducerExpr)(nil)

func introducerCast(e eval, col collations.ID) (*evalBytes, error) {
if col == collations.CollationBinaryID {
return evalToBinary(e), nil
}

var bytes []byte
if b, ok := e.(*evalBytes); !ok {
bytes = b.ToRawBytes()
} else {
cs := colldata.Lookup(col).Charset()
bytes = b.bytes
// We only need to pad here for encodings that have a minimum
// character byte width larger than 1, which is all UTF-16
// variations and UTF-32.
switch cs.(type) {
case charset.Charset_utf16, charset.Charset_utf16le, charset.Charset_ucs2:
if len(bytes)%2 != 0 {
bytes = append([]byte{0}, bytes...)
}
case charset.Charset_utf32:
if mod := len(bytes) % 4; mod != 0 {
bytes = append(make([]byte, 4-mod), bytes...)
}
}
}
typedcol := collations.TypedCollation{
Collation: col,
Coercibility: collations.CoerceCoercible,
Repertoire: collations.RepertoireASCII,
}
return newEvalText(bytes, typedcol), nil
}

func (expr *IntroducerExpr) eval(env *ExpressionEnv) (eval, error) {
e, err := expr.Inner.eval(env)
if err != nil {
return nil, err
}
if expr.TypedCollation.Collation == collations.CollationBinaryID {
return evalToBinary(e), nil
}
return evalToVarchar(e, expr.TypedCollation.Collation, false)
return introducerCast(e, expr.TypedCollation.Collation)
}

func (expr *IntroducerExpr) typeof(env *ExpressionEnv, fields []*querypb.Field) (sqltypes.Type, typeFlag) {
Expand Down
2 changes: 1 addition & 1 deletion go/vt/vtgate/evalengine/translate.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ func (ast *astCompiler) translateIntroducerExpr(introduced *sqlparser.Introducer
case collations.CollationBinaryID:
lit.inner = evalToBinary(lit.inner)
default:
lit.inner, err = evalToVarchar(lit.inner, collation, false)
lit.inner, err = introducerCast(lit.inner, collation)
if err != nil {
return nil, err
}
Expand Down

0 comments on commit 2fd5ba1

Please sign in to comment.