-
Notifications
You must be signed in to change notification settings - Fork 0
/
grammar_parse.py
129 lines (103 loc) · 3.24 KB
/
grammar_parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import ast
import lexer
class ParseError(Exception):
pass
class TokenPeek:
def __init__(self, token_gen):
self.token_iter = iter(token_gen)
self.top = next(self.token_iter)
def peek(self):
return self.top
def next(self):
try:
self.top = next(self.token_iter)
except StopIteration:
self.top = ('@@eof', None)
def pop(self):
tmp = self.peek()
self.next()
return tmp
def expect(self, tok_id):
t, v = self.pop()
if t != tok_id:
raise ParseError("Expected %s; got %s." % (tok_id, t))
return v
def parse_token_decl(tokens):
token_id = tokens.expect('@token-id')
tokens.expect('@$:=')
token_match = tokens.expect('@token-match')
return ast.TokenDefinition(token_id, token_match)
def parse_rule_decl(tokens):
rule_id = tokens.expect('@rule-id')
tokens.expect('@$:=')
rule_expr = parse_alternation_expr(tokens)
return ast.RuleDefinition(rule_id, rule_expr)
def parse_atom(tokens):
t, v = tokens.pop()
if t in ('@token-id', '@rule-id'):
return ast.RuleTokenId(v)
elif t == '@token-literal':
return ast.TokenLiteral(v)
elif t == '@$(':
expr = parse_alternation_expr(tokens)
tokens.expect('@$)')
return expr
else:
raise ParseError("Expected @token-id, @rule-id, @token-literal or @$(; got %s." % (t,))
def parse_postfix_expr(tokens):
expr = parse_atom(tokens)
t, v = tokens.peek()
if t == '@$?':
post_type = '?'
tokens.next()
elif t == '@$*':
post_type = '*'
tokens.next()
else:
post_type = None
return ast.PostfixExpr(post_type, expr)
def parse_follow_expr(tokens):
exprs = [parse_postfix_expr(tokens)]
while True:
t, v = tokens.peek()
if t != '@$=>':
break
tokens.next()
exprs.append(parse_postfix_expr(tokens))
return ast.FollowExpr(exprs)
def parse_alternation_expr(tokens):
exprs = [parse_follow_expr(tokens)]
while True:
t, v = tokens.peek()
if t != '@$|':
break
tokens.next()
exprs.append(parse_follow_expr(tokens))
return ast.AlternationExpr(exprs)
def parse_grammar_rule(tokens):
stmts = []
t, v = tokens.peek()
while t != '@@eof':
if t == '@token-id':
stmts.append(parse_token_decl(tokens))
elif t == '@rule-id':
stmts.append(parse_rule_decl(tokens))
else:
raise ParseError("Expected @token-id or @rule-id; got %s." % (t,))
tokens.expect('@$;')
t, v = tokens.peek()
return ast.Root(stmts)
def parse_grammar(token_iter):
return parse_grammar_rule(TokenPeek(token_iter))
def create_lexer():
tokens_info = {
"@@skip" : r"(?:\s*(?://[^\n]*\n)?)*",
"@token-id" : r"(@@?[a-z]+(?:-[a-z]+)*)",
"@rule-id" : r"(#?[a-z]+(?:-[a-z]+)*)",
"@token-literal" : r"'((?:\\.|[^'\n])+)'",
"@token-match" : '"' + r"((?:\\.|[^\"\n])+)" + '"'
}
literals = [':=', '(', ')', '?', '*', '=>', '|', ';']
lexer.add_literal_tokens(tokens_info, literals)
return lexer.Tokenizer(tokens_info)
__all__ = ['parse_grammar', 'create_lexer', 'ParseError']