/
pattern.go
160 lines (146 loc) · 5.34 KB
/
pattern.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
// Copyright 2016 The Vanadium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package pattern handles parsing and matching SQL LIKE-style glob patterns.
package pattern
import (
"bytes"
"regexp"
"v.io/v23/verror"
)
const (
DefaultEscapeChar = '\\'
)
// Pattern is a parsed LIKE-style glob pattern.
type Pattern struct {
// regular expression equivalent to the original like pattern
regex *regexp.Regexp
// fixed prefix that all pattern matches must start with
fixedPrefix string
// true if pattern contains no unescaped wildcards; in this case, fixedPrefix
// is the entire unescaped expression
noWildcards bool
}
// Parse parses a LIKE-style glob pattern assuming '\' as escape character.
// See ParseWithEscapeChar().
func Parse(pattern string) (*Pattern, error) {
return ParseWithEscapeChar(pattern, DefaultEscapeChar)
}
// ParseWithEscapeChar parses a LIKE-style glob pattern.
// Supported wildcards are '_' (match any one character) and '%' (match zero or
// more characters). They can be escaped by escChar; escChar can also escape
// itself. '_' and '%' cannot be used as escChar; '\x00' escChar disables
// escaping.
func ParseWithEscapeChar(pattern string, escChar rune) (*Pattern, error) { //nolint:gocyclo
if escChar == '%' || escChar == '_' {
return nil, ErrorfIllegalEscapeChar(nil, "'%' and '_' cannot be used as escape characters")
}
// The LIKE-style pattern is converted to a regex, converting:
// % to .*?
// _ to .
// Everything else that would be incorrectly interpreted as a regex is escaped.
// The approach this function takes is to collect characters to be escaped
// into toBeEscapedBuf. When a wildcard is encountered, first toBeEscapedBuf
// is escaped and written to the regex buffer, next the wildcard is translated
// to regex (either ".*?" or ".") and written to the regex buffer.
// At the end, any remaining chars in toBeEscapedBuf are written.
var buf bytes.Buffer // buffer for return regex
var toBeEscapedBuf bytes.Buffer // buffer to hold characters waiting to be escaped
// Even though regexp.Regexp provides a LiteralPrefix() method, it doesn't
// always return the longest fixed prefix, so we save it while parsing.
var fixedPrefix string
foundWildcard := false
buf.WriteString("^") // '^<regex_str>$'
escapedMode := false
for _, c := range pattern {
if escapedMode {
switch c {
case '%', '_', escChar:
toBeEscapedBuf.WriteRune(c)
default:
return nil, ErrorfInvalidEscape(nil, "only '%', '_', and the escape character are allowed to be escaped, found '%v'", string(c))
}
escapedMode = false
} else {
switch c {
case '%', '_':
// Write out any chars waiting to be escaped, then write ".*?' or '.'.
buf.WriteString(regexp.QuoteMeta(toBeEscapedBuf.String()))
if !foundWildcard {
// First wildcard found, fixedPrefix is the pattern up to it.
fixedPrefix = toBeEscapedBuf.String()
foundWildcard = true
}
toBeEscapedBuf.Reset()
if c == '%' {
buf.WriteString(".*?")
} else {
buf.WriteString(".")
}
case escChar:
if escChar != '\x00' {
escapedMode = true
} else {
// nul is never an escape char, treat same as default.
toBeEscapedBuf.WriteRune(c)
}
default:
toBeEscapedBuf.WriteRune(c)
}
}
}
if escapedMode {
return nil, ErrorfInvalidEscape(nil, "only '%', '_', and the escape character are allowed to be escaped, found '%v'", "<end>")
}
// Write any remaining chars in toBeEscapedBuf.
buf.WriteString(regexp.QuoteMeta(toBeEscapedBuf.String()))
if !foundWildcard {
// No wildcard found, fixedPrefix is the entire pattern.
fixedPrefix = toBeEscapedBuf.String()
}
buf.WriteString("$") // '^<regex_str>$'
regex := buf.String()
compRegex, err := regexp.Compile(regex)
if err != nil {
// TODO(ivanpi): Should never happen. Panic here?
return nil, verror.ErrInternal.Errorf(nil, "Internal error: failed to compile pattern %q (regular expression %q): %v", pattern, regex, err)
}
return &Pattern{
regex: compRegex,
fixedPrefix: fixedPrefix,
noWildcards: !foundWildcard,
}, nil
}
// MatchString returns true iff the pattern matches the entire string.
func (p *Pattern) MatchString(s string) bool {
return p.regex.MatchString(s)
}
// FixedPrefix returns the unescaped fixed prefix that all matching strings must
// start with, and whether the prefix is the whole pattern.
func (p *Pattern) FixedPrefix() (string, bool) {
return p.fixedPrefix, p.noWildcards
}
// Escape escapes a literal string for inclusion in a LIKE-style pattern
// assuming '\' as escape character.
// See EscapeWithEscapeChar().
func Escape(s string) string {
return EscapeWithEscapeChar(s, DefaultEscapeChar)
}
// EscapeWithEscapeChar escapes a literal string for inclusion in a LIKE-style
// pattern. It inserts escChar before each '_', '%', and escChar in the string.
func EscapeWithEscapeChar(s string, escChar rune) string {
if escChar == '\x00' {
panic(verror.ErrBadArg.Errorf(nil, "'\x00' disables escaping, cannot be used in EscapeWithEscapeChar"))
}
if escChar == '%' || escChar == '_' {
panic(ErrorfIllegalEscapeChar(nil, "'%' and '_' cannot be used as escape characters"))
}
var buf bytes.Buffer
for _, c := range s {
if c == '%' || c == '_' || c == escChar {
buf.WriteRune(escChar)
}
buf.WriteRune(c)
}
return buf.String()
}