-
Notifications
You must be signed in to change notification settings - Fork 6
/
lexer.go
135 lines (117 loc) · 3.5 KB
/
lexer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
package spf
import (
"strings"
"unicode/utf8"
)
// lexer represents lexing structure
type lexer struct {
start int
pos int
prev int
length int
input string
}
// lex reads SPF record and returns list of Tokens along with
// their modifiers and values. Parser should parse the Tokens and execute
// relevant actions
func lex(input string) []*token {
var tokens []*token
l := &lexer{0, 0, 0, len(input), input}
for {
token := l.scan()
if token.mechanism == tEOF {
break
}
tokens = append(tokens, token)
}
return tokens
}
// scan scans input and returns a Token structure
func (l *lexer) scan() *token {
for {
r, eof := l.next()
if eof {
return &token{tEOF, tEOF, ""}
} else if isWhitespace(r) || l.eof() { // we just scanned some meaningful data
token := l.scanIdent()
l.scanWhitespaces()
l.moveon()
return token
}
}
}
// Lexer.eof() return true when scanned record has ended, false otherwise
func (l *lexer) eof() bool { return l.pos >= l.length }
// Lexer.next() returns next read rune and boolean indicator whether scanned
// record has ended. Method also moves `pos` value to size (length of read rune),
// and `prev` to previous `pos` location.
func (l *lexer) next() (rune, bool) {
if l.eof() {
return 0, true
}
r, size := utf8.DecodeRuneInString(l.input[l.pos:])
// TODO(zaccone): check for operation success/failure
l.prev = l.pos
l.pos += size
return r, false
}
// Lexer.moveon() sets Lexer.start to Lexer.pos. This is usually done once the
// ident has been scanned.
func (l *lexer) moveon() { l.start = l.pos }
// Lexer.back() moves back current Lexer.pos to a previous position.
func (l *lexer) back() { l.pos = l.prev }
// scanWhitespaces moves position to a first rune which is not a
// whitespace or tab
func (l *lexer) scanWhitespaces() {
for {
if ch, eof := l.next(); eof {
return
} else if !isWhitespace(ch) {
l.back()
return
}
}
}
// scanIdent is a Lexer method executed after an ident was found.
// It operates on a slice with constraints [l.start:l.pos).
// A cursor tries to find delimiters and set proper `mechanism`, `qualifier`
// and value itself.
// The default token has `mechanism` set to tErr, that is, error state.
func (l *lexer) scanIdent() *token {
t := &token{tErr, qPlus, ""}
cursor := l.start
for cursor < l.pos {
ch, size := utf8.DecodeRuneInString(l.input[cursor:])
cursor += size
if isQualifier(ch) {
t.qualifier, _ = qualifiers[ch]
l.start = cursor
continue
} else if isDelimiter(ch) { // add error handling
t.mechanism = tokenTypeFromString(l.input[l.start : cursor-size])
t.value = strings.TrimSpace(l.input[cursor:l.pos])
if t.value == "" || !checkTokenSyntax(t, ch) {
t.qualifier = qErr
t.mechanism = tErr
}
break
}
}
if t.mechanism.isErr() {
t.mechanism = tokenTypeFromString(
strings.TrimSpace(l.input[l.start:cursor]))
if t.mechanism.isErr() {
t.qualifier = qErr
t.value = ""
}
}
return t
}
// isWhitespace returns true if the rune is a space, tab, or newline.
func isWhitespace(ch rune) bool { return ch == ' ' || ch == '\t' || ch == '\n' }
// isDelimiter returns true if rune equals to ':' or '=', false otherwise
func isDelimiter(ch rune) bool { return ch == ':' || ch == '=' }
// isQualifier returns true if rune is a SPF delimiter (+,-,!,?)
func isQualifier(ch rune) bool { return ch == '+' || ch == '-' || ch == '~' || ch == '?' }
// isDigit returns true if rune is a numer (between '0' and '9'), false otherwise
func isDigit(ch rune) bool { return ch >= '0' && ch <= '9' }