-
Notifications
You must be signed in to change notification settings - Fork 2k
/
sets.go
104 lines (90 loc) · 3.5 KB
/
sets.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/*
© 2016 and later: Unicode, Inc. and others.
Copyright (C) 2004-2015, International Business Machines Corporation and others.
Copyright 2023 The Vitess Authors.
This file contains code derived from the Unicode Project's ICU library.
License & terms of use for the original code: http://www.unicode.org/copyright.html
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package icuregex
import (
"vitess.io/vitess/go/mysql/icuregex/internal/uprops"
"vitess.io/vitess/go/mysql/icuregex/internal/uset"
)
var staticPropertySets [13]*uset.UnicodeSet
func init() {
staticPropertySets[urxIswordSet] = func() *uset.UnicodeSet {
s := uset.New()
s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{Alphabetic}`, 0))
s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{M}`, 0))
s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{Nd}`, 0))
s.AddAll(uprops.MustNewUnicodeSetFomPattern(`\p{Pc}`, 0))
s.AddRune(0x200c)
s.AddRune(0x200d)
return s.Freeze()
}()
staticPropertySets[urxIsspaceSet] = uprops.MustNewUnicodeSetFomPattern(`\p{Whitespace}`, 0).Freeze()
staticPropertySets[urxGcExtend] = uprops.MustNewUnicodeSetFomPattern(`\p{Grapheme_Extend}`, 0).Freeze()
staticPropertySets[urxGcControl] = func() *uset.UnicodeSet {
s := uset.New()
s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Zl:]`, 0))
s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Zp:]`, 0))
s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Cc:]`, 0))
s.AddAll(uprops.MustNewUnicodeSetFomPattern(`[:Cf:]`, 0))
s.RemoveAll(uprops.MustNewUnicodeSetFomPattern(`[:Grapheme_Extend:]`, 0))
return s.Freeze()
}()
staticPropertySets[urxGcL] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=L}`, 0).Freeze()
staticPropertySets[urxGcLv] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=LV}`, 0).Freeze()
staticPropertySets[urxGcLvt] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=LVT}`, 0).Freeze()
staticPropertySets[urxGcV] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=V}`, 0).Freeze()
staticPropertySets[urxGcT] = uprops.MustNewUnicodeSetFomPattern(`\p{Hangul_Syllable_Type=T}`, 0).Freeze()
staticPropertySets[urxGcNormal] = func() *uset.UnicodeSet {
s := uset.New()
s.Complement()
s.RemoveRuneRange(0xac00, 0xd7a4)
s.RemoveAll(staticPropertySets[urxGcControl])
s.RemoveAll(staticPropertySets[urxGcL])
s.RemoveAll(staticPropertySets[urxGcV])
s.RemoveAll(staticPropertySets[urxGcT])
return s.Freeze()
}()
}
var staticSetUnescape = func() *uset.UnicodeSet {
u := uset.New()
u.AddString("acefnrtuUx")
return u.Freeze()
}()
const (
ruleSetDigitChar = 128
ruleSetASCIILetter = 129
ruleSetRuleChar = 130
ruleSetCount = 131 - 128
)
var staticRuleSet = [ruleSetCount]*uset.UnicodeSet{
func() *uset.UnicodeSet {
u := uset.New()
u.AddRuneRange('0', '9')
return u.Freeze()
}(),
func() *uset.UnicodeSet {
u := uset.New()
u.AddRuneRange('A', 'Z')
u.AddRuneRange('a', 'z')
return u.Freeze()
}(),
func() *uset.UnicodeSet {
u := uset.New()
u.AddString("*?+[(){}^$|\\.")
u.Complement()
return u.Freeze()
}(),
}