/
encoding_base.go
145 lines (129 loc) · 3.75 KB
/
encoding_base.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
// Copyright 2021 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// See the License for the specific language governing permissions and
// limitations under the License.
package charset
import (
"bytes"
"fmt"
"reflect"
"strings"
"unsafe"
"github.com/wuhuizuo/tidb6/parser/mysql"
"github.com/wuhuizuo/tidb6/parser/terror"
"golang.org/x/text/encoding"
"golang.org/x/text/transform"
)
// ErrInvalidCharacterString returns when the string is invalid in the specific charset.
var ErrInvalidCharacterString = terror.ClassParser.NewStd(mysql.ErrInvalidCharacterString)
// encodingBase defines some generic functions.
type encodingBase struct {
enc encoding.Encoding
self Encoding
}
func (encodingBase) MbLen(_ string) int {
return 0
}
func (encodingBase) ToUpper(src string) string {
return strings.ToUpper(src)
}
func (encodingBase) ToLower(src string) string {
return strings.ToLower(src)
}
func (b encodingBase) IsValid(src []byte) bool {
isValid := true
b.self.Foreach(src, opFromUTF8, func(from, to []byte, ok bool) bool {
isValid = ok
return ok
})
return isValid
}
func (b encodingBase) Transform(dest *bytes.Buffer, src []byte, op Op) (result []byte, err error) {
if dest == nil {
dest = &bytes.Buffer{}
dest.Grow(len(src))
}
dest.Reset()
b.self.Foreach(src, op, func(from, to []byte, ok bool) bool {
if !ok {
if err == nil && (op&opSkipError == 0) {
err = generateEncodingErr(b.self.Name(), from)
}
if op&opTruncateTrim != 0 {
return false
}
if op&opTruncateReplace != 0 {
dest.WriteByte('?')
return true
}
}
if op&opCollectFrom != 0 {
dest.Write(from)
} else if op&opCollectTo != 0 {
dest.Write(to)
}
return true
})
return dest.Bytes(), err
}
func (b encodingBase) Foreach(src []byte, op Op, fn func(from, to []byte, ok bool) bool) {
var tfm transform.Transformer
var peek func([]byte) []byte
if op&opFromUTF8 != 0 {
tfm = b.enc.NewEncoder()
peek = EncodingUTF8Impl.Peek
} else {
tfm = b.enc.NewDecoder()
peek = b.self.Peek
}
var buf [4]byte
for i, w := 0, 0; i < len(src); i += w {
w = len(peek(src[i:]))
nDst, _, err := tfm.Transform(buf[:], src[i:i+w], false)
meetErr := err != nil || (op&opToUTF8 != 0 && beginWithReplacementChar(buf[:nDst]))
if !fn(src[i:i+w], buf[:nDst], !meetErr) {
return
}
}
}
// replacementBytes are bytes for the replacement rune 0xfffd.
var replacementBytes = []byte{0xEF, 0xBF, 0xBD}
// beginWithReplacementChar check if dst has the prefix '0xEFBFBD'.
func beginWithReplacementChar(dst []byte) bool {
return bytes.HasPrefix(dst, replacementBytes)
}
// generateEncodingErr generates an invalid string in charset error.
func generateEncodingErr(name string, invalidBytes []byte) error {
arg := fmt.Sprintf("%X", invalidBytes)
return ErrInvalidCharacterString.FastGenByArgs(name, arg)
}
// HackSlice converts string to slice without copy.
// Use at your own risk.
func HackSlice(s string) (b []byte) {
pBytes := (*reflect.SliceHeader)(unsafe.Pointer(&b))
pString := (*reflect.StringHeader)(unsafe.Pointer(&s))
pBytes.Data = pString.Data
pBytes.Len = pString.Len
pBytes.Cap = pString.Len
return
}
// HackString converts slice to string without copy.
// Use it at your own risk.
func HackString(b []byte) (s string) {
if len(b) == 0 {
return ""
}
pbytes := (*reflect.SliceHeader)(unsafe.Pointer(&b))
pstring := (*reflect.StringHeader)(unsafe.Pointer(&s))
pstring.Data = pbytes.Data
pstring.Len = pbytes.Len
return
}