-
-
Notifications
You must be signed in to change notification settings - Fork 784
/
grammar.lark
346 lines (292 loc) · 10.3 KB
/
grammar.lark
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
// Vyper grammar for Lark
// A module is a sequence of definitions and methods (and comments).
// NOTE: Start symbol for the grammar
// NOTE: Module can start with docstring
module: ( DOCSTRING
| COMMENT
| import
| struct_def
| interface_def
| constant_def
| variable_def
| enum_def // TODO deprecate at some point in favor of flag
| flag_def
| event_def
| function_def
| immutable_def
| transient_def
| exports_decl
| _NEWLINE )*
// Import statements (Supports all styles of Python imports)
_AS: "as"
_FROM: "from"
_IMPORT: "import"
DOT: "."
WILDCARD: "*"
_import_name: NAME
// NOTE: Don't use DOT here, just a separator
_import_path: (_import_name ".")* _import_name
import_alias: _AS NAME
?import_list: _import_name [import_alias] ("," _import_name [import_alias] )* [","]
_import_from: _FROM (DOT* _import_path | DOT+)
import: _IMPORT DOT* _import_path [import_alias]
| _import_from _IMPORT ( WILDCARD | _import_name [import_alias] )
| _import_from _IMPORT "(" import_list ")"
// Constant definitions
// NOTE: Temporary until decorators used
constant: "constant" "(" type ")"
constant_private: NAME ":" constant
constant_with_getter: NAME ":" "public" "(" constant ")"
constant_def: (constant_private | constant_with_getter) "=" expr
// immutable definitions
// NOTE: Temporary until decorators used
immutable: "immutable" "(" type ")"
immutable_def: NAME ":" immutable
// transient definitions
transient: "transient" "(" type ")"
transient_def: NAME ":" transient
variable: NAME ":" type
// NOTE: Temporary until decorators used
variable_with_getter: NAME ":" "public" "(" (type | immutable | transient) ")"
variable_def: variable | variable_with_getter
// A decorator "wraps" a method, modifying it's context.
// NOTE: One or more can be applied (some combos might conflict)
decorator: "@" NAME [ "(" [arguments] ")" ] _NEWLINE
decorators: decorator+
// Functions/Methods take a list of zero or more typed parameters,
// and can return up to one parameter.
// NOTE: Parameters can have a default value,
// which must be a constant or environment variable.
parameter: NAME ":" type ["=" expr]
parameters: parameter ("," parameter?)*
_FUNC_DECL: "def"
_RETURN_TYPE: "->"
returns: _RETURN_TYPE type
function_sig: _FUNC_DECL NAME "(" [parameters] ")" [returns]
function_def: [decorators] function_sig ":" body
// Events can be composed of 0 or more members
_EVENT_DECL: "event"
event_member: NAME ":" type
indexed_event_arg: NAME ":" "indexed" "(" type ")"
// Events which use no args use a pass statement instead
event_body: _NEWLINE _INDENT (((event_member | indexed_event_arg ) _NEWLINE)+ | _PASS _NEWLINE) _DEDENT
event_def: _EVENT_DECL NAME ":" ( event_body | _PASS )
// TODO deprecate in favor of flag
// Enums
_ENUM_DECL: "enum"
enum_member: NAME
enum_body: _NEWLINE _INDENT (enum_member _NEWLINE)+ _DEDENT
enum_def: _ENUM_DECL NAME ":" enum_body
// Flags
_FLAG_DECL: "flag"
flag_member: NAME
flag_body: _NEWLINE _INDENT (flag_member _NEWLINE)+ _DEDENT
flag_def: _FLAG_DECL NAME ":" flag_body
// Types
array_def: (NAME | array_def | dyn_array_def) "[" expr "]"
dyn_array_def: "DynArray" "[" (NAME | array_def | dyn_array_def) "," expr "]"
tuple_def: "(" ( NAME | array_def | dyn_array_def | tuple_def ) ( "," ( NAME | array_def | dyn_array_def | tuple_def ) )* [","] ")"
// NOTE: Map takes a basic type and maps to another type (can be non-basic, including maps)
_MAP: "HashMap"
map_def: _MAP "[" ( NAME | array_def ) "," type "]"
imported_type: NAME ("." NAME)+
type: ( NAME | imported_type | array_def | tuple_def | map_def | dyn_array_def )
// Structs can be composed of 1+ basic types or other custom_types
_STRUCT_DECL: "struct"
struct_member: NAME ":" type
struct_def: _STRUCT_DECL NAME ":" _NEWLINE _INDENT (struct_member _NEWLINE)+ _DEDENT
// Interfaces are composed of a series of method definitions, plus their mutability
_INTERFACE_DECL: "interface"
mutability: NAME
interface_function: function_sig ":" mutability
interface_def: _INTERFACE_DECL NAME ":" _NEWLINE _INDENT ( interface_function _NEWLINE)+ _DEDENT
_IMPLEMENTS_DECL: "implements"
implements_def: _IMPLEMENTS_DECL ":" NAME
exports_decl: "exports" ":" (attribute | tuple)
// Statements
// If and For blocks create a new block, and thus are complete when de-indented
// Conversely, the rest of the statements require a newline to be considered complete
// (as they do not create a new block)
_stmt: ( if_stmt | for_stmt ) [COMMENT]
| (declaration
| assign
| aug_assign
| return_stmt
| pass_stmt
| break_stmt
| continue_stmt
| log_stmt
| raise_stmt
| assert_stmt
| expr ) [COMMENT] _NEWLINE
declaration: variable ["=" expr]
skip_assign: "_"
multiple_assign: (atom_expr | skip_assign) ("," (atom_expr | skip_assign))+
assign: (atom_expr | multiple_assign | "(" multiple_assign ")" ) "=" expr
// NOTE: Keep these in sync with bin_op below
?aug_operator: "+" -> add
| "-" -> sub
| "*" -> mul
| "/" -> div
| "//" -> floordiv
| "%" -> mod
| "**" -> pow
| "<<" -> shl
| ">>" -> shr
| _BITAND -> bitand
| _BITOR -> bitor
| _BITXOR -> bitxor
| _AND -> and
| _OR -> or
// NOTE: Post-process into a normal assign
aug_assign: atom_expr aug_operator "=" expr
_PASS: "pass"
_BREAK: "break"
_CONTINUE: "continue"
_LOG: "log"
_RETURN: "return"
_RAISE: "raise"
_ASSERT: "assert"
pass_stmt: _PASS
break_stmt: _BREAK
continue_stmt: _CONTINUE
log_stmt: _LOG (NAME | atom_expr) "(" [arguments] ")"
return_stmt: _RETURN [expr ("," expr)*]
_UNREACHABLE: "UNREACHABLE"
raise_stmt: _RAISE -> raise
| _RAISE expr -> raise_with_reason
| _RAISE _UNREACHABLE -> raise_unreachable
assert_stmt: _ASSERT expr -> assert
| _ASSERT expr "," expr -> assert_with_reason
| _ASSERT expr "," _UNREACHABLE -> assert_unreachable
body: _NEWLINE _INDENT ([COMMENT] _NEWLINE | _stmt)+ _DEDENT
cond_exec: expr ":" body
default_exec: body
if_stmt: "if" cond_exec ("elif" cond_exec)* ["else" ":" default_exec]
loop_variable: NAME ":" type
loop_iterator: expr
for_stmt: "for" loop_variable "in" loop_iterator ":" body
arg: expr
kwarg: NAME "=" expr
?argument: (arg | kwarg)
arguments: argument ("," argument)* [","]
tuple: "(" "," ")" | "(" expr ( ("," expr)+ [","] | "," ) ")"
list: "[" "]" | "[" expr ("," expr)* [","] "]"
dict: "{" "}" | "{" (NAME ":" expr) ("," (NAME ":" expr))* [","] "}"
// Operators
// This section needs to match Python's operator precedence:
// See https://docs.python.org/3/reference/expressions.html#operator-precedence
// NOTE: The recursive cycle here helps enforce operator precedence
// Precedence goes up the lower down you go
?expr: assignment_expr
// "walrus" operator
?assignment_expr: ternary
| NAME ":=" assignment_expr
// ternary operator
?ternary: bool_or
| ternary "if" ternary "else" ternary
_AND: "and"
_OR: "or"
_NOT: "not"
// Boolean Operations
?bool_or: bool_and
| bool_or _OR bool_and -> or
?bool_and: bool_not
| bool_and _AND bool_not -> and
?bool_not: comparator
| _NOT bool_not -> not
_POW: "**"
_SHL: "<<"
_SHR: ">>"
_BITAND: "&"
_BITOR: "|"
_BITXOR: "^"
// Comparisons
_EQ: "=="
_NE: "!="
_LE: "<="
_GE: ">="
_IN: "in"
?comparator: bitwise_or
| comparator "<" bitwise_or -> lt
| comparator ">" bitwise_or -> gt
| comparator _EQ bitwise_or -> eq
| comparator _NE bitwise_or -> ne
| comparator _LE bitwise_or -> le
| comparator _GE bitwise_or -> ge
| comparator _IN bitwise_or -> in
| comparator _NOT _IN bitwise_or -> in
// Binary Operations
// NOTE: Keep these in sync with aug_assign above
?bitwise_or : bitwise_xor
| bitwise_or _BITOR bitwise_xor -> bitor
?bitwise_xor: bitwise_and
| bitwise_xor _BITXOR bitwise_and -> bitxor
?bitwise_and: shift
| bitwise_and _BITAND shift -> bitand
?shift: summation
| shift _SHL summation -> shl
| shift _SHR summation -> shr
?summation: product
| summation "+" product -> add
| summation "-" product -> sub
?product: unary
| product "*" unary -> mul
| product "/" unary -> div
| product "//" unary -> floordiv
| product "%" unary -> mod
?unary: power
| "+" power -> uadd
| "-" power -> usub
| "~" power -> invert
// TODO: add factor rule
?power: external_call
| external_call _POW power -> pow
?external_call: ("extcall" | "staticcall")? atom_expr
subscript: (atom_expr | list) "[" expr "]"
attribute: atom_expr "." NAME
call: atom_expr "(" [arguments] ")"
?atom_expr: NAME -> get_var
| subscript
| attribute
| call
| atom
// special rule to handle types as "arguments" (for `empty` builtin)
empty: "empty" "(" type ")"
// special rule to handle types as "arguments" (for `_abi_decode` builtin)
abi_decode: ("_abi_decode" | "abi_decode") "(" arg "," type ( "," kwarg )* ")"
special_builtins: empty | abi_decode
// NOTE: Must end recursive cycle like this (with `atom` calling `expr`)
?atom: literal
| special_builtins
| tuple
| list
| dict
| "(" expr ")"
// Tokens
// Adapted from Lark repo. https://github.com/lark-parser/lark/blob/master/examples/python3.lark
// Adapted from: https://docs.python.org/3/reference/grammar.html
// Adapted by: Erez Shinan
NAME: /[a-zA-Z_]\w*/
COMMENT: /#[^\n\r]*/
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+
STRING: /b?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/i
DOCSTRING: /(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/is
DEC_NUMBER: /0|[1-9]\d*/i
HEX_NUMBER.2: /0x[\da-f]*/i
OCT_NUMBER.2: /0o[0-7]*/i
BIN_NUMBER.2 : /0b[0-1]*/i
FLOAT_NUMBER.2: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i
_number: DEC_NUMBER
| HEX_NUMBER
| BIN_NUMBER
| OCT_NUMBER
| FLOAT_NUMBER
BOOL.2: "True" | "False"
ELLIPSIS: "..."
// TODO: Remove Docstring from here, and add to first part of body
?literal: ( _number | STRING | DOCSTRING | BOOL | ELLIPSIS)
%ignore /[\t \f]+/ // WS
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT
%ignore COMMENT
%declare _INDENT _DEDENT