/
grammar.y
636 lines (574 loc) · 18.1 KB
/
grammar.y
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
// Yacc grammar file for the vanadium VDL langage.
// http://goto/veyron:vdl
//
// Similar to Go, the formal grammar uses semicolons ';' as terminators, but
// idiomatic usage may omit most semicolons using the following rules:
// 1) During the tokenization phase, semicolons are always auto-inserted at
// the end of each line after certain tokens. This is implemented in
// the lexer via the autoSemi function.
// 2) Semicolons may be omitted before a closing ')' or '}'. This is
// implemented via the osemi rule below.
//
// To generate the grammar.go source file containing the parser, run
// grammar_gen.sh in this same directory, or run go generate on this package.
////////////////////////////////////////////////////////////////////////
// Declarations section.
%{
// This grammar.y.go file was auto-generated by yacc from grammar.y.
package parse
import (
"math/big"
"strings"
)
type intPos struct {
int *big.Int
pos Pos
}
type ratPos struct {
rat *big.Rat
pos Pos
}
// typeListToStrList converts a slice of Type to a slice of StringPos. Each
// type must be a TypeNamed with an empty PackageName, otherwise errors are
// reported, and ok=false is returned.
func typeListToStrList(yylex yyLexer, typeList []Type) (strList []StringPos, ok bool) {
ok = true
for _, t := range typeList {
var tn *TypeNamed
if tn, ok = t.(*TypeNamed); !ok {
lexPosErrorf(yylex, t.Pos(), "%s invalid (expected one or more variable names)", t.String())
return
}
if strings.ContainsRune(tn.Name, '.') {
ok = false
lexPosErrorf(yylex, t.Pos(), "%s invalid (expected one or more variable names).", tn.Name)
return
}
strList = append(strList, StringPos{tn.Name, tn.P})
}
return
}
%}
// This union is turned into the struct type yySymType. Most symbols include
// positional information; this is necessary since Go yacc doesn't support
// passing positional information, so we need to track it ourselves.
%union {
pos Pos
strpos StringPos
intpos intPos
ratpos ratPos
namepos NamePos
nameposes []NamePos
typeexpr Type
typeexprs []Type
fields []*Field
iface *Interface
constexpr ConstExpr
constexprs []ConstExpr
complit *ConstCompositeLit
kvlit KVLit
kvlits []KVLit
errordef ErrorDef
}
// Terminal tokens. We leave single-char tokens as-is using their ascii code as
// their id, to make the grammar more readable; multi-char tokens get their own
// id. The start* tokens are dummy tokens to kick off the parse.
%token startFileImports startFile startConfigImports startConfig
%token startExprs
%token <pos> ';' ':' ',' '.' '(' ')' '[' ']' '{' '}' '<' '>' '='
%token <pos> '!' '+' '-' '*' '/' '%' '|' '&' '^' '?'
%token <pos> tOROR tANDAND tLE tGE tNE tEQEQ tLSH tRSH
%token <pos> tCONST tENUM tERROR tIMPORT tINTERFACE tMAP tPACKAGE
%token <pos> tSET tSTREAM tSTRUCT tTYPE tTYPEOBJECT tUNION
%token <strpos> tIDENT tSTRLIT
%token <intpos> tINTLIT
%token <ratpos> tRATLIT
// Labeled rules holding typed values.
%type <strpos> nameref dotnameref
%type <namepos> label_spec
%type <nameposes> label_spec_list
%type <typeexpr> type type_no_typeobject otype
%type <typeexprs> type_comma_list streamargs
%type <fields> field_spec_list field_spec named_arg_list inargs outargs
%type <iface> iface_item_list iface_item
%type <constexpr> expr unary_expr operand
%type <constexprs> tags expr_comma_list
%type <complit> comp_lit
%type <kvlit> kv_lit
%type <kvlits> kv_lit_list
%type <errordef> error_details error_detail_list error_detail
// There are 5 precedence levels for operators, all left-associative, just like
// Go. Lines are listed in order of increasing precedence.
%left tOROR
%left tANDAND
%left '<' '>' tLE tGE tNE tEQEQ
%left '+' '-' '|' '^'
%left '*' '/' '%' '&' tLSH tRSH
%left notPackage notConfig
%start start
%%
////////////////////////////////////////////////////////////////////////
// Rules section.
// Note that vdl files and config files use an identical grammar, other than the
// initial package or config clause respectively. Error checking for config
// files that include error, type or interface definitions occurs afterwards, to
// improve error reporting.
start:
startFileImports package imports gen_imports_eof
| startFile package imports defs
| startConfigImports config imports gen_imports_eof
| startConfig config imports defs
| startExprs expr_comma_list ';'
{ lexStoreExprs(yylex, $2) }
// Dummy rule to terminate the parse after the imports, regardless of whether
// there are any defs. Defs always start with either the tTYPE, tCONST or
// tERROR tokens, and the rule handles all cases - either there's no trailing
// text (the empty case, which would have resulted in EOF anyways), or there's
// one or more defs, where we need to force an EOF.
gen_imports_eof:
// Empty.
{ lexGenEOF(yylex) }
| tTYPE
{ lexGenEOF(yylex) }
| tCONST
{ lexGenEOF(yylex) }
| tERROR
{ lexGenEOF(yylex) }
// PACKAGE
package:
%prec notPackage
{ lexPosErrorf(yylex, Pos{}, "vdl file must start with package clause") }
| tPACKAGE tIDENT ';'
{ lexVDLFile(yylex).PackageDef = NamePos{Name:$2.String, Pos:$2.Pos} }
// CONFIG
config:
%prec notConfig
{ lexPosErrorf(yylex, Pos{}, "config file must start with config clause") }
| tIDENT '=' expr ';'
{
// We allow "config" as an identifier; it is not a keyword. So we check
// manually to make sure the syntax is correct.
if $1.String != "config" {
lexPosErrorf(yylex, $1.Pos, "config file must start with config clause")
return 1 // Any non-zero code indicates an error
}
file := lexVDLFile(yylex)
file.PackageDef = NamePos{Name:"config", Pos:$1.Pos}
file.ConstDefs = []*ConstDef{{Expr:$3}}
}
// IMPORTS
imports:
// Empty.
| imports import ';'
import:
tIMPORT '(' ')'
| tIMPORT '(' import_spec_list osemi ')'
| tIMPORT import_spec
import_spec_list:
import_spec
| import_spec_list ';' import_spec
import_spec:
tSTRLIT
{
imps := &lexVDLFile(yylex).Imports
*imps = append(*imps, &Import{Path:$1.String, NamePos:NamePos{Pos:$1.Pos}})
}
| tIDENT tSTRLIT
{
imps := &lexVDLFile(yylex).Imports
*imps = append(*imps, &Import{Path:$2.String, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}})
}
// DEFINITIONS
defs:
// Empty.
| defs type_def ';'
| defs const_def ';'
| defs error_def ';'
type_def:
tTYPE '(' ')'
| tTYPE '(' type_spec_list osemi ')'
| tTYPE type_spec
| tTYPE interface_spec
const_def:
tCONST '(' ')'
| tCONST '(' const_spec_list osemi ')'
| tCONST const_spec
error_def:
tERROR '(' ')'
| tERROR '(' error_spec_list osemi ')'
| tERROR error_spec
// TYPE DEFINITIONS
type_spec_list:
type_spec
| type_spec_list ';' type_spec
type_spec:
tIDENT type
{
tds := &lexVDLFile(yylex).TypeDefs
*tds = append(*tds, &TypeDef{Type:$2, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}})
}
// The type_no_typeobject rule is necessary to avoid a shift/reduce conflict
// between type conversions and typeobject const expressions. E.g.
// type(expr) // type conversion
// typeobject(type) // typeobject const expression
//
// We've chosen similar syntax to make it easier for the user to remember how to
// use the feature, but since "typeobject" is itself a type, there is a problem.
// We resolve the conflict by restricting the type conversion to the rule:
// type_no_typeobject '(' expr ')'
//
// Note that if we wanted to add general-purpose functions with the func(expr)
// syntax, we'll need to pull nameref out of type_no_typeobject, and parse both
// func(expr) and nameref(expr) into a generic structure. We can't use that
// same mechanism for typeobject, since the thing inside the parens is a value
// expression for type conversions, but a type expression for typeobject.
type_no_typeobject:
nameref
{ $$ = &TypeNamed{Name:$1.String, P:$1.Pos} }
| tERROR // Special-case to allow the "error" keyword as a named type.
{ $$ = &TypeNamed{Name:"error", P:$1} }
| '[' tINTLIT ']' type
{ $$ = &TypeArray{Len:int($2.int.Int64()), Elem:$4, P:$1} }
| '[' ']' type
{ $$ = &TypeList{Elem:$3, P:$1} }
| tENUM '{' label_spec_list osemi '}'
{ $$ = &TypeEnum{Labels:$3, P:$1} }
| tSET '[' type ']'
{ $$ = &TypeSet{Key:$3, P:$1} }
| tMAP '[' type ']' type
{ $$ = &TypeMap{Key:$3, Elem:$5, P:$1} }
| tSTRUCT '{' field_spec_list osemi '}'
{ $$ = &TypeStruct{Fields:$3, P:$1} }
| tSTRUCT '{' '}'
{ $$ = &TypeStruct{P:$1} }
| tUNION '{' field_spec_list osemi '}'
{ $$ = &TypeUnion{Fields:$3, P:$1} }
| tUNION '{' '}'
{ $$ = &TypeUnion{P:$1} }
| '?' type
{ $$ = &TypeOptional{Base:$2, P:$1} }
// The type rule expands to all the actual types, including typeobject.
type:
type_no_typeobject
{ $$ = $1}
| tTYPEOBJECT
{ $$ = &TypeNamed{Name:"typeobject", P:$1} }
label_spec_list:
label_spec
{ $$ = []NamePos{$1} }
| label_spec_list ';' label_spec
{ $$ = append($1, $3) }
label_spec:
tIDENT
{ $$ = NamePos{Name:$1.String, Pos:$1.Pos} }
field_spec_list:
field_spec
{ $$ = $1 }
| field_spec_list ';' field_spec
{ $$ = append($1, $3...) }
// The field_spec rule is intended to capture the following patterns:
// var type
// var0, var1, var2 type
// where var* refers to a variable name, and type refers to a type. Each var
// is expressed as an identifier. An oddity here is that we use a type_list to
// capture the list of variables rather than using a list of IDENTS. This means
// the grammar accepts invalid constructions, and we must validate afterwards.
//
// We do this to avoid a LALR reduce/reduce conflict with function arguments.
// The problem is exhibited by the in-args of these two functions, where func1
// has three args respectively named A, B, C all of type t1, and func2 has three
// args with name and type t2, t3 and t4 respectively. The func1 style is
// captured by field_spec in named_arg_list, while the func2 style is captured
// by type_list in args.
// func1(A, B, C t1)
// func2(t2, t3, t4)
//
// If we used an ident_list to capture "A, B, C" in func1, but used a type_list
// to capture "t2, t3, t4" in func2, we'd have a reduce/reduce conflict since
// yacc cannot determine whether to reduce as an ident_list or as a type_list;
// we don't know until we've reached token t1 in func1, or token ')' in func2.
//
// The fix can be considered both beautiful and a huge hack. To avoid the
// conflict we force both forms to use type_list to capture both "A, B, C" and
// "t2, t3, t4". This avoids the conflict since we're now always reducing via
// type_list, but allows invalid constructions like "[]int, []int []int". So we
// validate in the action and throw errors.
//
// An alternate fix would have been to remove the IDENT case from the type rule,
// use ident_list to capture both cases, and manually "expand" the grammar to
// distinguish the cases appropriately. That would ensure we don't allow
// constructions like "int, int int" in the grammar itself, but would lead to a
// much more complicated grammar. As a bonus, with the type_list solution we
// can give better error messages.
field_spec:
type_comma_list type
{
if names, ok := typeListToStrList(yylex, $1); ok {
for _, n := range names {
$$ = append($$, &Field{Type:$2, NamePos:NamePos{Name:n.String, Pos:n.Pos}})
}
} else {
lexPosErrorf(yylex, $2.Pos(), "perhaps you forgot a comma before %q?.", $2.String())
}
}
type_comma_list:
type
{ $$ = []Type{$1} }
| type_comma_list ',' type
{ $$ = append($1, $3) }
// INTERFACE DEFINITIONS
interface_spec:
tIDENT tINTERFACE '{' '}'
{
ifs := &lexVDLFile(yylex).Interfaces
*ifs = append(*ifs, &Interface{NamePos:NamePos{Name:$1.String, Pos:$1.Pos}})
}
| tIDENT tINTERFACE '{' iface_item_list osemi '}'
{
$4.Name, $4.Pos = $1.String, $1.Pos
ifs := &lexVDLFile(yylex).Interfaces
*ifs = append(*ifs, $4)
}
iface_item_list:
iface_item
{ $$ = $1 }
| iface_item_list ';' iface_item
{
$1.Embeds = append($1.Embeds, $3.Embeds...)
$1.Methods = append($1.Methods, $3.Methods...)
$$ = $1
}
iface_item:
tIDENT inargs streamargs outargs tags
{ $$ = &Interface{Methods: []*Method{{InArgs:$2, InStream:$3[0], OutStream:$3[1], OutArgs:$4, Tags:$5, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}}}} }
| nameref
{ $$ = &Interface{Embeds: []*NamePos{{Name:$1.String, Pos:$1.Pos}}} }
inargs:
'(' ')'
{ $$ = nil }
| '(' named_arg_list ocomma ')'
{ $$ = $2 }
| '(' type_comma_list ocomma ')'
// Just like Go, we allow a list of types without variable names. See the
// field_spec rule for a workaround to avoid a reduce/reduce conflict.
{
for _, t := range $2 {
$$ = append($$, &Field{Type:t, NamePos:NamePos{Pos:t.Pos()}})
}
}
// The named_arg_list rule is just like the field_spec_list, but uses comma ','
// as a delimiter rather than semicolon ';'.
named_arg_list:
field_spec
{ $$ = $1 }
| named_arg_list ',' field_spec
{ $$ = append($1, $3...) }
// The outargs use special syntax to denote the error associated with each
// method. For parsing we accept these forms:
// error
// (string | error)
// (a, b string, c bool | error)
//
// TODO(toddw): Improve parser syntax errors.
outargs:
tERROR
{ $$ = nil }
| '(' named_arg_list ocomma '|' tERROR ')'
{ $$ = $2 }
| '(' type_comma_list ocomma '|' tERROR ')'
// Just like Go, we allow a list of types without variable names. See the
// field_spec rule for a workaround to avoid a reduce/reduce conflict.
{
for _, t := range $2 {
$$ = append($$, &Field{Type:t, NamePos:NamePos{Pos:t.Pos()}})
}
}
streamargs:
// Empty.
{ $$ = []Type{nil, nil} }
| tSTREAM '<' '>'
{ $$ = []Type{nil, nil} }
| tSTREAM '<' type '>'
{ $$ = []Type{$3, nil} }
| tSTREAM '<' type ',' type '>'
{ $$ = []Type{$3, $5} }
tags:
// Empty.
{ $$ = nil }
| '{' '}'
{ $$ = nil }
| '{' expr_comma_list ocomma '}'
{ $$ = $2 }
expr_comma_list:
expr
{ $$ = []ConstExpr{$1} }
| expr_comma_list ',' expr
{ $$ = append($1, $3) }
// CONST DEFINITIONS
const_spec_list:
const_spec
| const_spec_list ';' const_spec
const_spec:
tIDENT '=' expr
{
cds := &lexVDLFile(yylex).ConstDefs
*cds = append(*cds, &ConstDef{Expr:$3, NamePos:NamePos{Name:$1.String, Pos:$1.Pos}})
}
expr:
unary_expr
{ $$ = $1 }
| expr tOROR expr
{ $$ = &ConstBinaryOp{"||", $1, $3, $2} }
| expr tANDAND expr
{ $$ = &ConstBinaryOp{"&&", $1, $3, $2} }
| expr '<' expr
{ $$ = &ConstBinaryOp{"<", $1, $3, $2} }
| expr '>' expr
{ $$ = &ConstBinaryOp{">", $1, $3, $2} }
| expr tLE expr
{ $$ = &ConstBinaryOp{"<=", $1, $3, $2} }
| expr tGE expr
{ $$ = &ConstBinaryOp{">=", $1, $3, $2} }
| expr tNE expr
{ $$ = &ConstBinaryOp{"!=", $1, $3, $2} }
| expr tEQEQ expr
{ $$ = &ConstBinaryOp{"==", $1, $3, $2} }
| expr '+' expr
{ $$ = &ConstBinaryOp{"+", $1, $3, $2} }
| expr '-' expr
{ $$ = &ConstBinaryOp{"-", $1, $3, $2} }
| expr '*' expr
{ $$ = &ConstBinaryOp{"*", $1, $3, $2} }
| expr '/' expr
{ $$ = &ConstBinaryOp{"/", $1, $3, $2} }
| expr '%' expr
{ $$ = &ConstBinaryOp{"%", $1, $3, $2} }
| expr '|' expr
{ $$ = &ConstBinaryOp{"|", $1, $3, $2} }
| expr '&' expr
{ $$ = &ConstBinaryOp{"&", $1, $3, $2} }
| expr '^' expr
{ $$ = &ConstBinaryOp{"^", $1, $3, $2} }
| expr tLSH expr
{ $$ = &ConstBinaryOp{"<<", $1, $3, $2} }
| expr tRSH expr
{ $$ = &ConstBinaryOp{">>", $1, $3, $2} }
unary_expr:
operand
{ $$ = $1 }
| '!' unary_expr
{ $$ = &ConstUnaryOp{"!", $2, $1} }
| '+' unary_expr
{ $$ = &ConstUnaryOp{"+", $2, $1} }
| '-' unary_expr
{ $$ = &ConstUnaryOp{"-", $2, $1} }
| '^' unary_expr
{ $$ = &ConstUnaryOp{"^", $2, $1} }
| type_no_typeobject '(' expr ')'
{ $$ = &ConstTypeConv{$1, $3, $1.Pos()} }
| tTYPEOBJECT '(' type ')'
{ $$ = &ConstTypeObject{$3, $1} }
operand:
tSTRLIT
{ $$ = &ConstLit{$1.String, $1.Pos} }
| tINTLIT
{ $$ = &ConstLit{$1.int, $1.pos} }
| tRATLIT
{ $$ = &ConstLit{$1.rat, $1.pos} }
| nameref
{ $$ = &ConstNamed{$1.String, $1.Pos} }
| comp_lit
{ $$ = $1 }
| comp_lit '.' tIDENT
{ lexPosErrorf(yylex, $2, "cannot apply selector operator to unnamed constant")}
| comp_lit '[' expr ']'
{ lexPosErrorf(yylex, $2, "cannot apply index operator to unnamed constant")}
| nameref '[' expr ']'
{ $$ = &ConstIndexed{&ConstNamed{$1.String, $1.Pos}, $3, $1.Pos} }
| '(' expr ')'
{ $$ = $2 }
comp_lit:
otype '{' '}'
{ $$ = &ConstCompositeLit{$1, nil, $2} }
| otype '{' kv_lit_list ocomma '}'
{ $$ = &ConstCompositeLit{$1, $3, $2} }
kv_lit_list:
kv_lit
{ $$ = []KVLit{$1} }
| kv_lit_list ',' kv_lit
{ $$ = append($1, $3) }
kv_lit:
expr
{ $$ = KVLit{Value:$1} }
| expr ':' expr
{ $$ = KVLit{Key:$1, Value:$3} }
// ERROR DEFINITIONS
error_spec_list:
error_spec
| error_spec_list ';' error_spec
error_spec:
tIDENT inargs error_details
{
// Create *ErrorDef starting with a copy of error_details, filling in the
// name and params
ed := $3
ed.NamePos = NamePos{Name:$1.String, Pos:$1.Pos}
ed.Params = $2
eds := &lexVDLFile(yylex).ErrorDefs
*eds = append(*eds, &ed)
}
error_details:
// Empty.
{ $$ = ErrorDef{} }
| '{' '}'
{ $$ = ErrorDef{} }
| '{' error_detail_list ocomma '}'
{ $$ = $2 }
error_detail_list:
error_detail
{ $$ = $1 }
| error_detail_list ',' error_detail
{
// Merge each ErrorDef in-order to build the final ErrorDef.
$$ = $1
switch {
case len($3.Actions) > 0:
$$.Actions = append($$.Actions, $3.Actions...)
case len($3.Formats) > 0:
$$.Formats = append($$.Formats, $3.Formats...)
}
}
error_detail:
tIDENT
{ $$ = ErrorDef{Actions: []StringPos{$1}} }
| tSTRLIT ':' tSTRLIT
{ $$ = ErrorDef{Formats: []LangFmt{{Lang: $1, Fmt: $3}}} }
// MISC TOKENS
// nameref describes a named reference to another type, interface or const. We
// allow the following forms:
// foo
// foo.bar (and multi-dot variants)
// "pkg/path".foo
// "pkg/path".foo.bar (and multi-dot variants)
nameref:
dotnameref
{ $$ = $1 }
| tSTRLIT '.' dotnameref
{ $$ = StringPos{"\""+$1.String+"\"."+$3.String, $1.Pos} }
// dotnameref describes just the dotted portion of nameref.
dotnameref:
tIDENT
{ $$ = $1 }
| dotnameref '.' tIDENT
{ $$ = StringPos{$1.String+"."+$3.String, $1.Pos} }
otype:
// Empty.
{ $$ = nil }
| type
{ $$ = $1 }
osemi:
// Empty.
| ';'
ocomma:
// Empty.
| ','