-
Notifications
You must be signed in to change notification settings - Fork 0
/
wheat.py
134 lines (104 loc) · 3.5 KB
/
wheat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import sys
INVALID = '!invalid!'
LINE_COMMENT = '//'
STR = '"'
CALL = '$'
SINGLE_MACRO = '!'
DOUBLE_MACRO = '!!'
COLON = ':'
SEMICOLON = ';'
EQUAL = '='
BLOCK_OPEN = '('
BLOCK_CLOSE = ')'
UNPARSED_OPEN = '#('
UNPARSED_CLOSED = '#)'
BRACKET_OPEN = '['
BRACKET_CLOSE = ']'
DATA_OPEN = '{'
DATA_CLOSE = '}'
NAME = 'name'
FUNC = 'func'
RETURN = 'return'
LET = 'let'
EOF = 'EOF'
RESTRICTED = {'@', '#', '%', '^', '&', '*'}
def gi(arr, index, default=None):
"""Get the index in an array or the default if out of bounds."""
if index >= len(arr):
return default
return arr[index]
def isWhitespace(text, index):
return ord(text[index]) <= ord(' ')
def isSymbol(text, index):
return (ord('!') <= ord(text[index]) <= ord('/') or
ord(':') <= ord(text[index]) <= ord('@') or
ord('[') <= ord(text[index]) <= ord('`') or
ord('{') <= ord(text[index]) <= ord('~'))
def isNameChar(text, index):
return not isWhitespace(text, index) and not isSymbol(text, index)
def getToken(text, index):
"""Parse a single token from the text
Returns: (startIndex, endIndex, token)
"""
while isWhitespace(text, index): # skip whitespace
index += 1
if index >= len(text):
break
startIndex = index
if len(text) <= index:
return startIndex, index, EOF
if text[index] == '/':
if gi(text, index + 1) == '/':
index += 2
while gi(text, index, '\n') != '\n':
index += 1
return startIndex, index, LINE_COMMENT
return startIndex, index + 1, INVALID
elif text[index] == '$': return startIndex, index+1, CALL
elif text[index] == '!':
if gi(text, index+1) == '!':
return startIndex, index+2, DOUBLE_MACRO
return startIndex, index+1, SINGLE_MACRO
elif text[index] == '"':
index += 1
while True:
if gi(text, index, '"') == '"': break
if gi(text, index) == '\\' and gi(text, index + 1) == '"':
index += 1 # will skip both
index += 1
index += 1
return startIndex, index, STR
elif text[index] == ':': return startIndex, index + 1, COLON
elif text[index] == ';': return startIndex, index + 1, SEMICOLON
elif text[index] == '=': return startIndex, index + 1, EQUAL
elif text[index] == '(': return startIndex, index + 1, BLOCK_OPEN
elif text[index] == ')': return startIndex, index + 1, BLOCK_CLOSE
elif text[index] == '[': return startIndex, index + 1, BRACKET_OPEN
elif text[index] == ']': return startIndex, index + 1, BRACKET_CLOSE
elif text[index] == '{': return startIndex, index + 1, DATA_OPEN
elif text[index] == '}': return startIndex, index + 1, DATA_CLOSE
elif isSymbol(text, index): return startIndex, index + 1, INVALID
while isNameChar(text, index):
index += 1
# name or keyword
name = text[startIndex:index]
token = NAME
if name == FUNC: token = FUNC
elif name == LET: token = LET
elif name == RETURN: token = RETURN
return startIndex, index, token
def parseFile(text, index):
pass
def parseFn(text, index):
pass
def parseWord(text, index):
pass
def printTokens(text, index=0):
while index <= len(text):
startIndex, endIndex, token = getToken(text, index)
print("{}: {}".format(token, text[startIndex:endIndex]))
index = endIndex
if token == 'EOF':
return
if __name__ == '__main__':
printTokens(open(sys.argv[1]).read())