-
Notifications
You must be signed in to change notification settings - Fork 4
/
Tokenizer.vb
197 lines (170 loc) · 6.63 KB
/
Tokenizer.vb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#Region "Microsoft.VisualBasic::a5a81cc10c3070cd9afc5fc125e18092, LINQ\LINQ\Language\Tokenizer.vb"
' Author:
'
' asuka (amethyst.asuka@gcmodeller.org)
' xie (genetics@smrucc.org)
' xieguigang (xie.guigang@live.com)
'
' Copyright (c) 2018 GPL3 Licensed
'
'
' GNU GENERAL PUBLIC LICENSE (GPL3)
'
'
' This program is free software: you can redistribute it and/or modify
' it under the terms of the GNU General Public License as published by
' the Free Software Foundation, either version 3 of the License, or
' (at your option) any later version.
'
' This program is distributed in the hope that it will be useful,
' but WITHOUT ANY WARRANTY; without even the implied warranty of
' MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
' GNU General Public License for more details.
'
' You should have received a copy of the GNU General Public License
' along with this program. If not, see <http://www.gnu.org/licenses/>.
' /********************************************************************************/
' Summaries:
' Class Tokenizer
'
' Constructor: (+1 Overloads) Sub New
' Function: createToken, GetTokens, walkChar
' Class Escaping
'
'
'
'
'
'
' /********************************************************************************/
#End Region
Imports Microsoft.VisualBasic.ComponentModel.Collection
Imports Microsoft.VisualBasic.Language
Imports Microsoft.VisualBasic.Text
Imports Microsoft.VisualBasic.Text.Parser
Namespace Language
Friend Class Tokenizer
Dim buffer As New CharBuffer
Dim text As CharPtr
Dim escapes As New Escaping
Friend Class Escaping
Public [string] As Boolean = False
Public strWrapper As Char
Public comment As Boolean = False
End Class
Sub New(code As String)
text = code
End Sub
Public Iterator Function GetTokens() As IEnumerable(Of Token)
Dim token As New Value(Of Token)
Do While text
If Not token = walkChar(++text) Is Nothing Then
If buffer <> 0 Then
Yield createToken(Nothing)
End If
Yield token
End If
Loop
If buffer > 0 Then
Yield createToken(Nothing)
End If
End Function
ReadOnly keywords As Index(Of String) = {
"imports",
"select", "from", "in", "let", "as", "distinct", "group", "by", "order", "aggregate",
"where", "take", "skip", "into",
"descending", "ascending"
}
ReadOnly operators As Index(Of String) = {"+", "-", "*", "/", "\", "%", "=", "<>", ">", "<", ">=", "<=", "^", "&"}
ReadOnly literal As Index(Of String) = {"true", "false"}
ReadOnly logicals As Index(Of String) = {"not", "and", "or"}
Private Function walkChar(c As Char) As Token
If escapes.string Then
If c = escapes.strWrapper Then
escapes.string = False
Return New Token(Tokens.Literal, buffer.PopAllChars.CharString)
Else
buffer += c
Return Nothing
End If
ElseIf escapes.comment Then
If c = ASCII.CR OrElse c = ASCII.LF Then
escapes.comment = False
Return New Token(Tokens.Comment, buffer.PopAllChars.CharString)
Else
buffer += c
Return Nothing
End If
ElseIf c = "'"c OrElse c = """"c OrElse c = "`"c Then
escapes.string = True
escapes.strWrapper = c
Return Nothing
ElseIf c = "#"c Then
escapes.comment = True
Return Nothing
ElseIf c = " "c OrElse c = ASCII.TAB Then
If buffer <> 0 Then
Return createToken(Nothing)
Else
Return Nothing
End If
ElseIf c = ASCII.CR OrElse c = ASCII.LF Then
Return New Token(Tokens.Terminator, vbCrLf)
ElseIf c = "<"c OrElse c = ">"c OrElse c = "="c Then
Return createToken(bufferNext:=c)
ElseIf c = "["c OrElse c = "("c Then
Return New Token(Tokens.Open, c)
ElseIf c = ")"c OrElse c = "]"c Then
Return New Token(Tokens.Close, c)
ElseIf c Like operators Then
Return New Token(Tokens.Operator, c)
ElseIf c = ","c Then
Return New Token(Tokens.Comma, c)
ElseIf c = "."c Then
If Not buffer.isInteger Then
Return New Token(Tokens.Reference, c)
Else
buffer += c
End If
Else
buffer += c
End If
Return Nothing
End Function
Private Function createToken(bufferNext As Char?) As Token
Dim text As String = buffer.PopAllChars.CharString
Dim textLower As String
If escapes.comment Then
escapes.comment = False
Return New Token(Tokens.Comment, text)
Else
textLower = text.ToLower
End If
If Not bufferNext Is Nothing Then
Dim test As String = text & bufferNext.Value
If test Like operators Then
Return New Token(Tokens.Operator, test)
Else
buffer += bufferNext
End If
End If
If textLower Like keywords Then
Return New Token(Tokens.keyword, text)
ElseIf textLower Like operators Then
Return New Token(Tokens.Operator, text)
ElseIf textLower Like literal Then
Return New Token(Tokens.Boolean, text)
ElseIf textLower Like logicals Then
Return New Token(Tokens.Operator, text)
ElseIf textLower.IsPattern("\d+") Then
Return New Token(Tokens.Integer, text)
ElseIf textLower.IsNumeric Then
Return New Token(Tokens.Number, text)
ElseIf textLower.IsPattern("[a-z_][a-z_0-9]*") Then
Return New Token(Tokens.Symbol, text)
Else
Return New Token(Tokens.Invalid, text)
End If
End Function
End Class
End Namespace