-
Notifications
You must be signed in to change notification settings - Fork 2
/
filter.go
112 lines (104 loc) · 3.73 KB
/
filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
package word_filter
import (
"bytes"
"fmt"
"io/ioutil"
"os"
"os/signal"
"sort"
"syscall"
"time"
)
var dicFileTrie map[string]*Trie = make(map[string]*Trie) //map for dictionary file to trie
var dicFileTime map[string]int64 = make(map[string]int64) //map for dictionary file to it's built time
func LoadDicFiles(dicFiles []string) { //load serveral dictionary file to build tries
for _, dicFile := range dicFiles {
dicFileTrie[dicFile] = nil
dicFileTime[dicFile] = 0
}
go buildDicFileTrie() //asyn build dictionary file trie,when completed the old trie will be replaced
}
func buildDicFileTrie() { //when server start-up the trie will be built automatically
for {
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGHUP)
for dicFile, _ := range dicFileTrie {
stat, e := os.Stat(dicFile)
if e != nil || stat.ModTime().Unix() > dicFileTime[dicFile] { //maybe deleted file or updated file
data, e := ioutil.ReadFile(dicFile)
if e != nil || len(data) <= 0 { //file not exist or empty
dicFileTrie[dicFile] = nil //delete the old trie,maybe concurrency problem
}
dictionary := bytes.Split(data, []byte("\n"))
var tree Trie
tree.InitRootNode()
tree.BuildTrie(dictionary)
dicFileTrie[dicFile] = &tree //replace the old trie,maybe concurrency problem
dicFileTime[dicFile] = time.Now().Unix() //save the replace time
fmt.Printf("build completed!\n")
}
}
<-c //after we refresh the all dictionary trie we will block to the next SIGHUP signal and refresh the all dictionary trie again
}
}
//separator charactors,if we want to match "abcde" in "abc112312de" ,separator charactors can be set to "123"
type Seps []rune
func (s Seps) Len() int {
return len(s)
}
func (s Seps) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s Seps) Less(i, j int) bool {
return s[i] < s[j]
}
func FindSepC(seps Seps, charactor rune) bool {
i := sort.Search(len(seps), func(i int) bool { return seps[i] >= charactor })
return i < len(seps) && seps[i] == charactor
}
// we use a dictionary file to filter text,all separator in text will be bypassed,and matched words will be replace by rep
func FilterText(dicFile string, text []rune, seps Seps, rep rune) {
T := dicFileTrie[dicFile] //save trie to temporary variable to eliminate concurrency problem
if T == nil { //no matched trie for dictionary file
return
}
sort.Sort(seps) //sort seps to speed up search process
walkNode := T.RootNode //walk through the trie from root
var nextNode *Node //point to walkNode's next node
for i, charactor := range text { //travel the text,i is used as an index to present charactor
for {
if FindSepC(seps, charactor) { //omit charactor in seps
break
}
nextNode = walkNode.BinGetChildNodeByVal(charactor)
if nextNode == nil { //not found next node whose val is charactor
if nil != walkNode.SuffixNode { //point to suffix node, redo the find operation,maybe its suffix node is root
walkNode = walkNode.SuffixNode
continue
} else { //only root node's suffix node is null
walkNode = T.RootNode //restart from root
break //break to handle next charactor
}
} else { // find node
if nextNode.EOW { //if a word end up with next node
depth := nextNode.Depth //get the word length
j := i //search back from i
for depth > 0 { //until to root
for j >= 0 && FindSepC(seps, text[j]) { //omit
j--
}
if j >= 0 {
text[j] = rep //replace with rep
j--
}
depth--
}
walkNode = T.RootNode //restart from root
} else { //not EOW
walkNode = nextNode //step to next node
}
break //break to handle next charactor
}
}
}
}