-
Notifications
You must be signed in to change notification settings - Fork 0
/
register.go
80 lines (66 loc) · 1.71 KB
/
register.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
package stopwords
import (
"strings"
)
// Register an instance of the register of the stopwords.
type Register struct {
// stopWordsIndex - vocabulary of stop-words for quick search
stopWordsIndex map[string]bool
stopWords []string
}
// IsStopWord returns true if given string as a stop-word.
func (r *Register) IsStopWord(s string) bool {
return r.stopWordsIndex[s]
}
// Slice returns a copy of the list of all registered stop-words.
func (r *Register) Slice() []string {
return append([]string(nil), r.stopWords...)
}
// Index returns a copy of the map of all registered stop-words.
func (r *Register) Index() map[string]bool {
copy := make(map[string]bool, len(r.stopWordsIndex))
for k, v := range r.stopWordsIndex {
copy[k] = v
}
return copy
}
func create() *Register {
return &Register{
stopWordsIndex: map[string]bool{},
stopWords: []string{},
}
}
// Setup configures stpwords.
func Setup(opts ...Option) *Register {
c := &config{
words: make([][]string, 0),
}
if len(opts) == 0 {
return registerDefaultStopWords()
}
// apply custom configuration
for _, option := range opts {
option(c)
}
reg := create()
for _, ws := range c.words {
registerStopWords(ws, reg)
}
return reg
}
// registerStopWords registers given list of stop-words to use as vocabulary.
func registerStopWords(words []string, reg *Register) {
for _, s := range words {
w := strings.ToLower(strings.TrimSpace(s))
if w == "" || reg.stopWordsIndex[w] {
continue
}
reg.stopWords = append(reg.stopWords, w)
reg.stopWordsIndex[w] = true
}
}
// registerDefaultStopWords registers default stop words
// listed in `stopwords.go`.
func registerDefaultStopWords() *Register {
return Setup(Text(StopWords, "\n"))
}