forked from mythmon/cs480_milestones
/
lexer.rb
116 lines (94 loc) · 2.19 KB
/
lexer.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
$LOAD_PATH << "./"
require "tokens.rb"
require "symboltable.rb"
require "strscan"
def tokenize(input)
tokens = []
s = StringScanner.new(input)
st = SymbolTable.new
line = 1
# =============================================================
# tokenize stream of characters with regexes in if/next fashion
# =============================================================
until s.eos?
# nom whitespace, turn input to character stream
begin
l = s.scan(/\s/)
if l == '\n'
line += 1
end
end while l
# parentheses
l = s.scan(/[()]/)
if l
token = Token.new(:openparen) if l == '('
token = Token.new(:closeparen) if l == ')'
st.try_set(l, token)
tokens << token
next
end
# ==========================================
# CONSTANTS (boolean, integer, real, string)
# ==========================================
# booleans
l = s.scan(/true/)
l = s.scan(/false/) unless l
if l
token = BooleanToken.new(:boolean, l)
st.try_set(l, token)
tokens << token
next
end
# reals
l = s.scan(/\-?\d+\.(\d+)?e(\d+)?/)
if l
token = RealToken.new(:real, l.to_f)
st.try_set(l, token)
tokens << token
next
end
l = s.scan(/\-?\d+e(\d+)?/)
if l
token = RealToken.new(:real, l.to_f)
st.try_set(l, token)
tokens << token
next
end
l = s.scan(/\-?\d+\.(\d+)?/)
if l
token = RealToken.new(:real, l.to_f)
st.try_set(l, token)
tokens << token
next
end
# integers
l = s.scan(/\-?\d+/)
if l
token = IntegerToken.new(:int, l.to_i)
st.try_set(l, token)
tokens << token
next
end
# strings (quoted)
l = s.scan(/"(.*?)"/)
l = s.scan(/'(.*?)'/) unless l
if l
token = StringToken.new(:string, s[0][1...-1])
st.try_set(l, token)
tokens << token
next
end
# strings (bare)
l = s.scan(/[^\s)]+/)
if l
token = StringToken.new(:string, l)
st.try_set(l, token)
tokens << token
next
end
# invalid
raise "What? " + s.inspect unless s.eos?
end
return tokens
end
#EOF vim: sw=2:ts=2