Skip to content

Commit

Permalink
update parser to handle string literals
Browse files Browse the repository at this point in the history
and fix soundness and completeness proofs.
string literals parsed to a new AST entry for Lit.
  • Loading branch information
xrchz committed Feb 1, 2014
1 parent 7747d8d commit f03b937
Show file tree
Hide file tree
Showing 9 changed files with 53 additions and 11 deletions.
3 changes: 2 additions & 1 deletion parsing/cmlPEGScript.sml
Expand Up @@ -203,6 +203,7 @@ val cmlPEG_def = zDefine`
(bindNT nCompOps));
(mkNT nEbase,
choicel [tok isInt (bindNT nEbase o mktokLf);
tok isString (bindNT nEbase o mktokLf);
seql [tokeq LparT; tokeq RparT] (bindNT nEbase);
peg_EbaseParen;
seql [tokeq LbrackT; try (pnt nElist1); tokeq RbrackT]
Expand Down Expand Up @@ -330,7 +331,7 @@ val cmlPEG_def = zDefine`
(mkNT nPbase,
pegf
(choicel [pnt nV; pnt nConstructorName; tok isInt mktokLf;
pnt nPtuple; tokeq UnderbarT;
tok isString mktokLf; pnt nPtuple; tokeq UnderbarT;
seql [tokeq LbrackT; try (pnt nPatternList);
tokeq RbrackT] I])
(bindNT nPbase));
Expand Down
25 changes: 18 additions & 7 deletions parsing/pegCompleteScript.sml
Expand Up @@ -306,7 +306,7 @@ val firstSet_nEbase = Store_thm(
"firstSet_nEbase",
``firstSet cmlG [NT (mkNT nEbase)] =
{LetT; LparT; LbrackT} ∪ firstSet cmlG [NT (mkNT nFQV)] ∪ {IntT i | T} ∪
firstSet cmlG [NT (mkNT nConstructorName)]``,
{StringT s | T} ∪ firstSet cmlG [NT (mkNT nConstructorName)]``,
simp[Once firstSet_NT, cmlG_FDOM, cmlG_applied] >>
dsimp[Once EXTENSION] >> gen_tac >> eq_tac >> rw[] >> simp[]);

Expand Down Expand Up @@ -571,7 +571,7 @@ val firstSet_nPtuple = Store_thm(
val firstSet_nPbase = Store_thm(
"firstSet_nPbase",
``firstSet cmlG (NN nPbase :: rest) =
{LparT; UnderbarT; LbrackT} ∪ {IntT i | T } ∪
{LparT; UnderbarT; LbrackT} ∪ {IntT i | T } ∪ {StringT s | T } ∪
firstSet cmlG [NN nConstructorName] ∪ firstSet cmlG [NN nV]``,
simp[SimpLHS, firstSetML_eqn] >>
simp[Once firstSetML_def, cmlG_applied, cmlG_FDOM] >>
Expand Down Expand Up @@ -628,6 +628,7 @@ val NOTIN_firstSet_nV = Store_thm(
RbrackT ∉ firstSet cmlG [NN nV] ∧
InT ∉ firstSet cmlG [NN nV] ∧
IntT i ∉ firstSet cmlG [NN nV] ∧
StringT s ∉ firstSet cmlG [NN nV] ∧
ThenT ∉ firstSet cmlG [NN nV] ∧
ElseT ∉ firstSet cmlG [NN nV] ∧
CaseT ∉ firstSet cmlG [NN nV] ∧
Expand All @@ -653,6 +654,7 @@ val NOTIN_firstSet_nFQV = Store_thm(
RbrackT ∉ firstSet cmlG [NN nFQV] ∧
InT ∉ firstSet cmlG [NN nFQV] ∧
IntT i ∉ firstSet cmlG [NN nFQV] ∧
StringT s ∉ firstSet cmlG [NN nFQV] ∧
AndT ∉ firstSet cmlG [NN nFQV] ∧
ThenT ∉ firstSet cmlG [NN nFQV] ∧
ElseT ∉ firstSet cmlG [NN nFQV] ∧
Expand Down Expand Up @@ -682,6 +684,7 @@ val NOTIN_firstSet_nConstructorName = Store_thm(
IfT ∉ firstSet cmlG [NN nConstructorName] ∧
InT ∉ firstSet cmlG [NN nConstructorName] ∧
IntT i ∉ firstSet cmlG [NN nConstructorName] ∧
StringT s ∉ firstSet cmlG [NN nConstructorName] ∧
LbrackT ∉ firstSet cmlG [NN nConstructorName] ∧
LetT ∉ firstSet cmlG [NN nConstructorName] ∧
LparT ∉ firstSet cmlG [NN nConstructorName] ∧
Expand Down Expand Up @@ -1164,14 +1167,14 @@ val stoppers_def = Define`
(stoppers nLetDec = nestoppers DELETE AndT) ∧
(stoppers nLetDecs = nestoppers DIFF {AndT; FunT; ValT; SemicolonT}) ∧
(stoppers nPapp =
UNIV DIFF ({LparT; UnderbarT; LbrackT} ∪ { IntT i | T } ∪
UNIV DIFF ({LparT; UnderbarT; LbrackT} ∪ { IntT i | T } ∪ { StringT s | T } ∪
firstSet cmlG [NN nV] ∪ firstSet cmlG [NN nConstructorName])) ∧
(stoppers nPattern =
UNIV DIFF ({LparT; UnderbarT; LbrackT; SymbolT "::"} ∪ { IntT i | T } ∪
UNIV DIFF ({LparT; UnderbarT; LbrackT; SymbolT "::"} ∪ { IntT i | T } ∪ { StringT s | T } ∪
firstSet cmlG [NN nV] ∪ firstSet cmlG [NN nConstructorName])) ∧
(stoppers nPatternList =
UNIV DIFF ({CommaT; LparT; UnderbarT; LbrackT; SymbolT "::"} ∪
{IntT i | T} ∪
{IntT i | T} ∪ { StringT s | T } ∪
firstSet cmlG [NN nV] ∪ firstSet cmlG [NN nConstructorName])) ∧
(stoppers nPE = nestoppers) ∧
(stoppers nPE' = BarT INSERT nestoppers) ∧
Expand Down Expand Up @@ -1883,6 +1886,7 @@ val completeness = store_thm(
match_mp_tac peg_respects_firstSets >> simp[] >>
metis_tac [firstSets_nV_nConstructorName, firstSet_nonempty_fringe])
>- simp[peg_respects_firstSets]
>- simp[peg_respects_firstSets,peg_eval_tok_NONE]
>- (simp[NT_rank_def] >>
erule mp_tac
(MATCH_MP fringe_length_not_nullable nullable_Ptuple) >>
Expand Down Expand Up @@ -1932,6 +1936,7 @@ val completeness = store_thm(
simp[NT_rank_def] >>
Cases_on `sfx` >> fs[peg_respects_firstSets, not_peg0_peg_eval_NIL_NONE])
>- (fs[MAP_EQ_CONS] >> simp[peg_respects_firstSets])
>- (fs[MAP_EQ_CONS] >> simp[peg_respects_firstSets])
>- (DISJ1_TAC >>
erule mp_tac (MATCH_MP fringe_length_not_nullable nullable_Ptuple) >>
simp[] >> Cases_on `pfx` >> fs[] >>
Expand Down Expand Up @@ -2199,6 +2204,7 @@ val completeness = store_thm(
fs[])
>- (print_tac "nEbase" >> note_tac "** Slow nEbase beginning" >> stdstart
>- (note_tac "Ebase:Eseq (not Int)" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:Eseq (not String)" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:Eseq (not ())" >> simp[peg_eval_tok_NONE] >>
erule mp_tac (MATCH_MP fringe_length_not_nullable nullable_Eseq) >>
simp[] >> asm_match `ptree_fringe pt = MAP TK f` >> Cases_on `f` >>
Expand Down Expand Up @@ -2243,11 +2249,12 @@ val completeness = store_thm(
CONV_TAC SWAP_VARS_CONV >> Q.REFINE_EXISTS_TAC `[pt2]` >>
asimp[peg_EbaseParenFn_def, peg_eval_tok_NONE])
>- (note_tac "Ebase:() (not Int)" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:() (not String)" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:FQV" >>
erule mp_tac (MATCH_MP fringe_length_not_nullable nullable_FQV) >>
simp[] >> Cases_on `pfx` >> fs[] >> DISJ2_TAC >>
asm_match `ptree_fringe fpt = TK h :: MAP TK t` >>
`¬isInt h ∧ h ≠ LparT ∧ h ≠ LetT ∧ h ≠ LbrackT`
`¬isInt h ∧ ¬isString h ∧ h ≠ LparT ∧ h ≠ LetT ∧ h ≠ LbrackT`
by (IMP_RES_THEN mp_tac firstSet_nonempty_fringe >>
Cases_on `h` >> simp[]) >>
simp[peg_eval_tok_NONE] >> DISJ2_TAC >>
Expand All @@ -2259,7 +2266,7 @@ val completeness = store_thm(
nullable_ConstructorName) >>
simp[] >> Cases_on `pfx` >> fs[] >> DISJ2_TAC >>
asm_match `ptree_fringe fpt = TK h :: MAP TK t` >>
`¬isInt h ∧ h ≠ LparT ∧ h ≠ LetT ∧ h ≠ LbrackT`
`¬isInt h ∧ ¬isString h ∧ h ≠ LparT ∧ h ≠ LetT ∧ h ≠ LbrackT`
by (IMP_RES_THEN mp_tac firstSet_nonempty_fringe >>
Cases_on `h` >> simp[]) >>
simp[peg_eval_tok_NONE] >> DISJ2_TAC >>
Expand All @@ -2274,7 +2281,9 @@ val completeness = store_thm(
`h ∈ firstSet cmlG [NN nConstructorName]`
by metis_tac [firstSet_nonempty_fringe] >>
fs[firstSet_nConstructorName])
>- (note_tac "Ebase:string (not Int)" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:let (not Int)" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:let (not String)" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:let (not LparT)" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:let-in-end" >> DISJ2_TAC >> conj_tac
>- simp[peg_eval_tok_NONE, peg_EbaseParen_def] >>
Expand All @@ -2283,12 +2292,14 @@ val completeness = store_thm(
first_assum (unify_firstconj kall_tac) >> asimp[] >>
normlist >> simp[])
>- (note_tac "Ebase:[] not int" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:[] not string" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:[] not LparT" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:[]" >> simp[peg_eval_tok_NONE] >>
DISJ2_TAC >> conj_tac
>- simp[peg_eval_tok_NONE, peg_EbaseParen_def] >>
simp[peg_respects_firstSets])
>- (note_tac "Ebase:[..] not int" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:[..] not string" >> simp[peg_eval_tok_NONE])
>- (note_tac "Ebase:[..] not LparT" >> simp[peg_eval_tok_NONE]) >>
note_tac "Ebase:[..]" >> DISJ2_TAC >>
conj_tac
Expand Down
3 changes: 3 additions & 0 deletions parsing/pegSoundScript.sml
Expand Up @@ -553,6 +553,8 @@ val peg_sound = store_thm(
dsimp[cmlG_applied, cmlG_FDOM])
>- (simp[cmlG_FDOM, cmlG_applied] >> asm_match `isInt h` >>
Cases_on `h` >> fs[])
>- (simp[cmlG_FDOM, cmlG_applied] >> asm_match `isString h` >>
Cases_on `h` >> fs[])
>- (first_x_assum (erule mp_tac) >> strip_tac >> rveq >>
dsimp[cmlG_applied, cmlG_FDOM])
>- simp[cmlG_FDOM, cmlG_applied]
Expand Down Expand Up @@ -989,6 +991,7 @@ val peg_sound = store_thm(
fs[peg_eval_nConstructor_wrongtok, peg_eval_nFQV_wrongtok] >>
rpt (qpat_assum `peg_eval G X NONE` (K ALL_TAC))
>- (asm_match `isInt h` >> Cases_on `h` >> fs[])
>- (asm_match `isString h` >> Cases_on `h` >> fs[])
>- (* () *) dsimp[]
>- ((* peg_EbaseParen 1 *)
IMP_RES_THEN match_mp_tac peg_EbaseParen_sound >> simp[])
Expand Down
1 change: 1 addition & 0 deletions semantics/ast.lem
Expand Up @@ -5,6 +5,7 @@ type lit =
| IntLit of integer
| Bool of bool
| Unit
| String of string

(* Built-in binary operations (including function application) *)

Expand Down
3 changes: 2 additions & 1 deletion semantics/astScript.sml
Expand Up @@ -15,7 +15,8 @@ val _ = Hol_datatype `
lit =
IntLit of int
| Bool of bool
| Unit`;
| Unit
| String of string`;


(* Built-in binary operations (including function application) *)
Expand Down
12 changes: 12 additions & 0 deletions semantics/cmlPtreeConversionScript.sml
Expand Up @@ -440,6 +440,12 @@ val ptree_Pattern_def = Define`
i <- destIntT t ;
SOME (Ast_Plit (IntLit i))
od ++
do
lf <- destLf vic;
t <- destTOK lf;
s <- destStringT t ;
SOME (Ast_Plit (String s))
od ++
if vic = Lf (TOK UnderbarT) then SOME (Ast_Pvar "_")
else NONE
| [lb; rb] =>
Expand Down Expand Up @@ -559,6 +565,12 @@ val ptree_Expr_def = Define`
i <- destIntT t ;
SOME (Ast_Lit (IntLit i))
od ++
do
lf <- destLf single;
t <- destTOK lf;
s <- destStringT t;
SOME (Ast_Lit (String s))
od ++
do
s <- ptree_FQV single;
SOME (Ast_Var s)
Expand Down
4 changes: 2 additions & 2 deletions semantics/gramScript.sml
Expand Up @@ -104,7 +104,7 @@ val cmlG_def = mk_grammar_def ginfo
s ∉ {"true"; "false"; "ref"; "nil"}}``) ;
Vlist1 ::= V Vlist1 | V;
Ebase ::= "(" Eseq ")" | Etuple | "(" ")" | FQV | ConstructorName | <IntT>
| "let" LetDecs "in" Eseq "end" | "[" "]" | "[" Elist1 "]";
| <StringT> | "let" LetDecs "in" Eseq "end" | "[" "]" | "[" Elist1 "]";
Eseq ::= E ";" Eseq | E;
Etuple ::= "(" Elist2 ")";
Elist2 ::= E "," Elist1;
Expand Down Expand Up @@ -141,7 +141,7 @@ val cmlG_def = mk_grammar_def ginfo
LetDecs ::= LetDec LetDecs | ";" LetDecs | ;

(* patterns *)
Pbase ::= V | ConstructorName | <IntT> | Ptuple | "_"
Pbase ::= V | ConstructorName | <IntT> | <StringT> | Ptuple | "_"
| "[" "]" | "[" PatternList "]";
Papp ::= ConstructorName Pbase | Pbase;
Pattern ::= Papp "::" Pattern | Papp ;
Expand Down
1 change: 1 addition & 0 deletions semantics/grammar.txt
Expand Up @@ -16,6 +16,7 @@ Literal ::= IntegerLiteral // IntLit i
| "true" // Bool T
| "false" // Bool F
| "()" // Unit
| StringLiteral // String s

(* See http://www.mlton.org/OperatorPrecedence *)
Ebase ::=
Expand Down
12 changes: 12 additions & 0 deletions semantics/tokenUtilsScript.sml
Expand Up @@ -15,6 +15,12 @@ val isInt_def = Define`
`;
val _ = export_rewrites ["isInt_def"]

val isString_def = Define`
isString (StringT _) = T ∧
isString _ = F
`;
val _ = export_rewrites ["isString_def"]

val isAlphaT_def = Define`
isAlphaT (AlphaT s) = T ∧
isAlphaT _ = F
Expand Down Expand Up @@ -80,4 +86,10 @@ val destIntT_def = Define`
`;
val _ = export_rewrites ["destIntT_def"]

val destStringT_def = Define`
(destStringT (StringT s) = SOME s) ∧
(destStringT _ = NONE)
`;
val _ = export_rewrites ["destStringT_def"]

val _ = export_theory()

0 comments on commit f03b937

Please sign in to comment.