From 8514585af386ac2d4e0f9f95ea2e20042185b3ad Mon Sep 17 00:00:00 2001 From: William Byrd Date: Thu, 14 Feb 2013 03:09:40 -0700 Subject: [PATCH] updated tests for regex parsing --- mK-empty-set-as-failure/mk-lexer.scm | 451 +++++++++++---------------- 1 file changed, 174 insertions(+), 277 deletions(-) diff --git a/mK-empty-set-as-failure/mk-lexer.scm b/mK-empty-set-as-failure/mk-lexer.scm index 9f588ff..e4dad21 100644 --- a/mK-empty-set-as-failure/mk-lexer.scm +++ b/mK-empty-set-as-failure/mk-lexer.scm @@ -24,7 +24,7 @@ (define (n-repso n pat out) (fresh () (conde - [(== 'z n) (== regex-BLANK out)] + [(== 'z n) (== EPSILON out)] [(fresh (n-1 res) (== `(s ,n-1) n) (n-repso n-1 pat res) @@ -40,7 +40,7 @@ ; Option: zero or one instances (define (optiono pat out) (fresh () - (alto regex-BLANK pat out))) + (alto EPSILON pat out))) ;;; letters: a-d (to make the branching factor tolerable) @@ -131,7 +131,7 @@ [(== white-space pat) (== '() tok)] [(== 'left-paren pat) (== '((PuncToken left-paren)) tok)] [(== 'right-paren pat) (== '((PuncToken right-paren)) tok)]) - (regex-matcho pat prefix #t) + (regex-matcho pat prefix) (appendo prefix suffix chars))) ;;; tests @@ -141,28 +141,27 @@ (fresh (n pat out) (n-repso n pat out) (== `(,n ,pat ,out) q))) - '((z _.0 #t) ; is this true, even if _.0 is #f? - ((s z) #f #f) + '((z _.0 #t) ((s z) #t #t) - (((s z) _.0 _.0) (=/= ((_.0 . #f)) ((_.0 . #t)))) - ((s (s z)) #f #f) + (((s z) _.0 _.0) (=/= ((_.0 . #t)))) ((s (s z)) #t #t) - (((s (s z)) _.0 (seq _.0 _.0)) (=/= ((_.0 . #f)) ((_.0 . #t)))) - ((s (s (s z))) #f #f) + (((s (s z)) _.0 (seq _.0 _.0)) (=/= ((_.0 . #t)))) ((s (s (s z))) #t #t) - (((s (s (s z))) _.0 (seq _.0 (seq _.0 _.0))) (=/= ((_.0 . #f)) ((_.0 . #t)))))) + (((s (s (s z))) _.0 (seq _.0 (seq _.0 _.0))) (=/= ((_.0 . #t)))) + ((s (s (s (s z)))) #t #t) + (((s (s (s (s z)))) _.0 (seq _.0 (seq _.0 (seq _.0 _.0)))) (=/= ((_.0 . #t)))) + ((s (s (s (s (s z))))) #t #t))) (check-expect "pluso-1" (run* (q) (fresh (pat out) (pluso pat out) (== `(,pat ,out) q))) - '((#f #f) + '((#t #t) ((_.0 (seq _.0 (rep _.0))) (sym _.0)) - (#t #t) ((rep _.0) (seq (rep _.0) (rep _.0))) - ((seq _.0 _.1) (seq (seq _.0 _.1) (rep (seq _.0 _.1)))) - ((alt _.0 _.1) (seq (alt _.0 _.1) (rep (alt _.0 _.1)))))) + (((seq _.0 _.1) (seq (seq _.0 _.1) (rep (seq _.0 _.1)))) (=/= ((_.0 . #t)) ((_.1 . #t)))) + (((alt _.0 _.1) (seq (alt _.0 _.1) (rep (alt _.0 _.1)))) (=/= ((_.0 . _.1)))))) (check-expect "optiono-1" (run* (q) @@ -170,38 +169,48 @@ (optiono pat out) (== `(,pat ,out) q))) '((#t #t) - (#f #t) - ((_.0 (alt #t _.0)) (=/= ((_.0 . #f)) ((_.0 . #t)))))) + ((_.0 (alt #t _.0)) (=/= ((_.0 . #t)))))) -; run 5 appears to diverge +;;; run 5 apparently diverged under the naive implementation (check-expect "alphas-1" - (run 4 (q) - (regex-matcho alphas q regex-BLANK)) + (run* (q) + (regex-matcho alphas q)) '((a) (b) (c) (d))) (check-expect "specials-1" (run 4 (q) - (regex-matcho specials q regex-BLANK)) + (regex-matcho specials q)) '((_) (?) (hash) (slash))) (check-expect "white-space-1" (run 2 (q) - (regex-matcho white-space q regex-BLANK)) + (regex-matcho white-space q)) '((space) (newline))) (check-expect "parens-1" (run 2 (q) - (regex-matcho parens q regex-BLANK)) + (regex-matcho parens q)) '((left-paren) (right-paren))) (check-expect "any-char-1" - (run 5 (q) - (regex-matcho any-char q regex-BLANK)) - '((a) (b) (c) (d) (_))) + (run* (q) + (regex-matcho any-char q)) + '((a) + (space) + (b) + (newline) + (c) + (left-paren) + (d) + (right-paren) + (_) + (?) + (hash) + (slash))) (check-expect "ch-1" (run 4 (q) - (regex-matcho ch q regex-BLANK)) + (regex-matcho ch q)) '((hash slash slash a) (hash slash slash b) (hash slash slash c) @@ -209,41 +218,50 @@ (check-expect "id-1" (run 20 (q) - (regex-matcho id q regex-BLANK)) + (regex-matcho id q)) '((a) (b) + (c) (a a) (a b) - (c) + (d) (b a) - (a a a) (a c) + (a a a) (b b) (a a b) - (d) + (_) (a d) (c a) - (b a a) - (a a a a) + (a b a) (b c) + (b a a) (a a c) - (a _) - (a b a) - (c b))) + (c b) + (a a a a))) (check-expect "rep-any-char-1" (run 10 (q) - (regex-matcho `(rep ,any-char) q regex-BLANK)) - '(() (a) (b) (a a) (c) (a b) (b a) (d) (a a a) (a c))) + (regex-matcho `(rep ,any-char) q)) + '(() (a) (space) (b) (newline) (a a) (a space) (a b) (c) (space a))) (check-expect "pluso-any-char-1" (run 10 (q) (fresh (pat) (pluso any-char pat) - (regex-matcho pat q regex-BLANK))) - '((a) (b) (a a) (a b) (c) (b a) (a a a) (a c) (b b) (a a b))) + (regex-matcho pat q))) + '((a) + (space) + (b) + (newline) + (a a) + (a space) + (a b) + (c) + (space a) + (a newline))) -(check-expect "appemdo-1" +(check-expect "appendo-1" (run* (q) (appendo '(a b c) '(d e) q)) '((a b c d e))) @@ -253,64 +271,22 @@ (fresh (pat chars prefix suffix tok) (emito pat chars prefix suffix tok) (== `(,pat ,chars ,prefix ,suffix ,tok) q))) - '((left-paren - (left-paren . _.0) - (left-paren) - _.0 - ((PuncToken left-paren))) - (right-paren - (right-paren . _.0) - (right-paren) - _.0 - ((PuncToken right-paren))) + '((left-paren (left-paren . _.0) (left-paren) _.0 ((PuncToken left-paren))) + (right-paren (right-paren . _.0) (right-paren) _.0 ((PuncToken right-paren))) ((alt space newline) (space . _.0) (space) _.0 ()) ((alt space newline) (newline . _.0) (newline) _.0 ()) - ((seq (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))) - (rep (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))))) - (a . _.0) - (a) - _.0 - ((SymbolToken (a)))) - ((seq (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))) - (rep (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))))) - (b . _.0) - (b) - _.0 - ((SymbolToken (b)))) - ((seq (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))) - (rep (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))))) - (a a . _.0) - (a a) - _.0 - ((SymbolToken (a a)))) - ((seq (seq hash slash) - (seq slash (alt (alt a b) (alt c d)))) - (hash slash slash a . _.0) ; !!!!!! WTF!? sweet - (hash slash slash a) - _.0 - ((CharToken (hash slash slash a)))) - ((seq (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))) - (rep (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))))) - (a b . _.0) - (a b) - _.0 - ((SymbolToken (a b)))) - ((seq (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))) - (rep (alt (alt (alt a b) (alt c d)) - (alt (alt _ ?) (alt hash slash))))) - (c . _.0) - (c) - _.0 - ((SymbolToken (c)))))) + ((seq (seq hash slash) (seq slash (alt (alt a b) (alt c d)))) + (hash slash slash a . _.0) (hash slash slash a) _.0 ((CharToken (hash slash slash a)))) + ((seq (seq hash slash) (seq slash (alt (alt a b) (alt c d)))) + (hash slash slash b . _.0) (hash slash slash b) _.0 ((CharToken (hash slash slash b)))) + ((seq (seq hash slash) (seq slash (alt (alt a b) (alt c d)))) + (hash slash slash c . _.0) (hash slash slash c) _.0 ((CharToken (hash slash slash c)))) + ((seq (seq hash slash) (seq slash (alt (alt a b) (alt c d)))) + (hash slash slash d . _.0) (hash slash slash d) _.0 ((CharToken (hash slash slash d)))) + ((seq (alt (alt (alt a b) (alt c d)) (alt (alt _ ?) (alt hash slash))) (rep (alt (alt (alt a b) (alt c d)) (alt (alt _ ?) (alt hash slash))))) + (a . _.0) (a) _.0 ((SymbolToken (a)))) + ((seq (alt (alt (alt a b) (alt c d)) (alt (alt _ ?) (alt hash slash))) (rep (alt (alt (alt a b) (alt c d)) (alt (alt _ ?) (alt hash slash))))) + (b . _.0) (b) _.0 ((SymbolToken (b)))))) (check-expect "maino-1" (run 50 (q) @@ -320,148 +296,118 @@ '((() ()) ((left-paren) ((PuncToken left-paren))) ((right-paren) ((PuncToken right-paren))) + ((left-paren left-paren) ((PuncToken left-paren) (PuncToken left-paren))) ((space) ()) - ((left-paren left-paren) - ((PuncToken left-paren) (PuncToken left-paren))) - ((left-paren right-paren) - ((PuncToken left-paren) (PuncToken right-paren))) - ((right-paren left-paren) - ((PuncToken right-paren) (PuncToken left-paren))) - ((right-paren right-paren) - ((PuncToken right-paren) (PuncToken right-paren))) - ((left-paren space) ((PuncToken left-paren))) - ((left-paren left-paren left-paren) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren))) + ((left-paren right-paren) ((PuncToken left-paren) (PuncToken right-paren))) + ((right-paren left-paren) ((PuncToken right-paren) (PuncToken left-paren))) + ((right-paren right-paren) ((PuncToken right-paren) (PuncToken right-paren))) ((newline) ()) - ((left-paren left-paren right-paren) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken right-paren))) + ((left-paren left-paren left-paren) ((PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren))) + ((left-paren space) ((PuncToken left-paren))) + ((left-paren left-paren right-paren) ((PuncToken left-paren) (PuncToken left-paren) (PuncToken right-paren))) ((space left-paren) ((PuncToken left-paren))) - ((left-paren right-paren left-paren) - ((PuncToken left-paren) - (PuncToken right-paren) - (PuncToken left-paren))) + ((left-paren right-paren left-paren) ((PuncToken left-paren) (PuncToken right-paren) (PuncToken left-paren))) ((space right-paren) ((PuncToken right-paren))) - ((left-paren right-paren right-paren) - ((PuncToken left-paren) - (PuncToken right-paren) - (PuncToken right-paren))) + ((left-paren right-paren right-paren) ((PuncToken left-paren) (PuncToken right-paren) (PuncToken right-paren))) + ((right-paren left-paren left-paren) ((PuncToken right-paren) (PuncToken left-paren) (PuncToken left-paren))) ((right-paren space) ((PuncToken right-paren))) - ((right-paren left-paren left-paren) - ((PuncToken right-paren) - (PuncToken left-paren) - (PuncToken left-paren))) - ((right-paren left-paren right-paren) - ((PuncToken right-paren) - (PuncToken left-paren) - (PuncToken right-paren))) - ((left-paren left-paren space) - ((PuncToken left-paren) (PuncToken left-paren))) - ((left-paren left-paren left-paren left-paren) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren))) ((left-paren newline) ((PuncToken left-paren))) - ((left-paren left-paren left-paren right-paren) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren) - (PuncToken right-paren))) - ((right-paren right-paren left-paren) - ((PuncToken right-paren) - (PuncToken right-paren) - (PuncToken left-paren))) - ((left-paren space left-paren) - ((PuncToken left-paren) (PuncToken left-paren))) + ((right-paren left-paren right-paren) ((PuncToken right-paren) (PuncToken left-paren) (PuncToken right-paren))) + ((left-paren left-paren left-paren left-paren) ((PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren))) + ((left-paren left-paren space) ((PuncToken left-paren) (PuncToken left-paren))) + ((left-paren left-paren left-paren right-paren) ((PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren) (PuncToken right-paren))) + ((right-paren right-paren left-paren) ((PuncToken right-paren) (PuncToken right-paren) (PuncToken left-paren))) ((newline left-paren) ((PuncToken left-paren))) - ((right-paren right-paren right-paren) - ((PuncToken right-paren) - (PuncToken right-paren) - (PuncToken right-paren))) - ((left-paren left-paren right-paren left-paren) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken right-paren) - (PuncToken left-paren))) - ((left-paren space right-paren) - ((PuncToken left-paren) (PuncToken right-paren))) + ((left-paren space left-paren) ((PuncToken left-paren) (PuncToken left-paren))) + ((left-paren left-paren right-paren left-paren) ((PuncToken left-paren) (PuncToken left-paren) (PuncToken right-paren) (PuncToken left-paren))) + ((right-paren right-paren right-paren) ((PuncToken right-paren) (PuncToken right-paren) (PuncToken right-paren))) ((newline right-paren) ((PuncToken right-paren))) - ((left-paren left-paren right-paren right-paren) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken right-paren) - (PuncToken right-paren))) - ((space space) ()) - ((left-paren right-paren space) - ((PuncToken left-paren) (PuncToken right-paren))) - ((space left-paren left-paren) - ((PuncToken left-paren) (PuncToken left-paren))) - ((left-paren right-paren left-paren left-paren) - ((PuncToken left-paren) - (PuncToken right-paren) - (PuncToken left-paren) - (PuncToken left-paren))) - ((right-paren left-paren space) - ((PuncToken right-paren) (PuncToken left-paren))) - ((space left-paren right-paren) - ((PuncToken left-paren) (PuncToken right-paren))) - ((left-paren right-paren left-paren right-paren) - ((PuncToken left-paren) - (PuncToken right-paren) - (PuncToken left-paren) - (PuncToken right-paren))) - ((right-paren left-paren left-paren left-paren) - ((PuncToken right-paren) - (PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren))) - ((left-paren left-paren left-paren space) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren))) + ((left-paren space right-paren) ((PuncToken left-paren) (PuncToken right-paren))) + ((left-paren left-paren right-paren right-paren) ((PuncToken left-paren) (PuncToken left-paren) (PuncToken right-paren) (PuncToken right-paren))) ((right-paren newline) ((PuncToken right-paren))) - ((right-paren left-paren left-paren right-paren) - ((PuncToken right-paren) - (PuncToken left-paren) - (PuncToken left-paren) - (PuncToken right-paren))) - ((left-paren left-paren left-paren left-paren left-paren) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren))) - ((left-paren left-paren newline) - ((PuncToken left-paren) (PuncToken left-paren))) - ((left-paren left-paren left-paren left-paren right-paren) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren) - (PuncToken right-paren))) - ((space right-paren left-paren) - ((PuncToken right-paren) (PuncToken left-paren))) - ((left-paren right-paren right-paren left-paren) - ((PuncToken left-paren) - (PuncToken right-paren) - (PuncToken right-paren) - (PuncToken left-paren))) - ((right-paren space left-paren) - ((PuncToken right-paren) (PuncToken left-paren))) - ((right-paren left-paren right-paren left-paren) - ((PuncToken right-paren) - (PuncToken left-paren) - (PuncToken right-paren) - (PuncToken left-paren))) - ((left-paren left-paren space left-paren) - ((PuncToken left-paren) - (PuncToken left-paren) - (PuncToken left-paren))))) + ((space left-paren left-paren) ((PuncToken left-paren) (PuncToken left-paren))) + ((left-paren right-paren left-paren left-paren) ((PuncToken left-paren) (PuncToken right-paren) (PuncToken left-paren) (PuncToken left-paren))) + ((space space) ()) + ((left-paren right-paren space) ((PuncToken left-paren) (PuncToken right-paren))) + ((left-paren left-paren newline) ((PuncToken left-paren) (PuncToken left-paren))) + ((right-paren left-paren left-paren left-paren) ((PuncToken right-paren) (PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren))) + ((space left-paren right-paren) ((PuncToken left-paren) (PuncToken right-paren))) + ((left-paren right-paren left-paren right-paren) ((PuncToken left-paren) (PuncToken right-paren) (PuncToken left-paren) (PuncToken right-paren))) + ((right-paren left-paren space) ((PuncToken right-paren) (PuncToken left-paren))) + ((left-paren left-paren left-paren left-paren left-paren) ((PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren))) + ((right-paren left-paren left-paren right-paren) ((PuncToken right-paren) (PuncToken left-paren) (PuncToken left-paren) (PuncToken right-paren))) + ((left-paren left-paren left-paren space) ((PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren))) + ((left-paren left-paren left-paren left-paren right-paren) ((PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren) (PuncToken left-paren) (PuncToken right-paren))) + ((space right-paren left-paren) ((PuncToken right-paren) (PuncToken left-paren))) + ((left-paren right-paren right-paren left-paren) ((PuncToken left-paren) (PuncToken right-paren) (PuncToken right-paren) (PuncToken left-paren))) + ((right-paren space left-paren) ((PuncToken right-paren) (PuncToken left-paren))) + ((left-paren newline left-paren) ((PuncToken left-paren) (PuncToken left-paren))) + ((hash slash slash a) ((CharToken (hash slash slash a)))))) + +;;; Bug! The matching isn't always greedy. +(check-expect "maino-3" + (run 2 (q) + (maino '(left-paren a b right-paren) q)) + '(((PuncToken left-paren) + (SymbolToken (a b)) + (PuncToken right-paren)) + ((PuncToken left-paren) + (SymbolToken (a)) + (SymbolToken (b)) + (PuncToken right-paren)))) + +;;; Bug! The matching isn't always greedy. +(check-expect "maino-3b" + (run 2 (q) + (maino '(a a) q)) + '(((SymbolToken (a a))) + ((SymbolToken (a)) (SymbolToken (a))))) + +;;; This test takes a long time to run, but apparently less time than with the naive translation +(check-expect "maino-4" + (run 1 (q) + (maino '(left-paren a b c c a space c a space right-paren) q)) + '(((PuncToken left-paren) + (SymbolToken (a b)) + (SymbolToken (c)) + (SymbolToken (c)) + (SymbolToken (a)) + (SymbolToken (c)) + (SymbolToken (a)) + (PuncToken right-paren)))) + +(check-expect "maino-5" + (run 1 (q) + (maino q '((PuncToken left-paren)))) + '((left-paren))) + +(check-expect "maino-6" + (run 1 (q) + (maino q '((PuncToken left-paren) + (SymbolToken (a))))) + '((left-paren a))) +;;; under naive translation, was too slow to run +(check-expect "maino-9" + (run 1 (q) + (maino q '((PuncToken left-paren) + (SymbolToken (a)) + (SymbolToken (b))))) + '((left-paren a b))) + +;;; under naive translation, was too slow to run +(check-expect "maino-10" + (run 1 (q) + (maino q '((PuncToken left-paren) + (SymbolToken (a)) + (SymbolToken (b)) + (PuncToken right-paren)))) + '((left-paren a b right-paren))) + +#!eof + +;;; Even a run1 doesn't seem to return until the new implementation. +;;; Although this may not actually be a problem. (check-expect "maino-2" (run 9 (q) (fresh (chars tokens x y z* rest) @@ -492,52 +438,3 @@ (PuncToken right-paren))) ((a a left-paren space) ((SymbolToken (a a)) (PuncToken left-paren))))) - -(check-expect "maino-3" - (run 1 (q) - (maino '(left-paren a b right-paren) q)) - '(((PuncToken left-paren) - (SymbolToken (a)) - (SymbolToken (b)) - (PuncToken right-paren)))) - -;;; Bug! The matching isn't always greedy. -(check-expect "maino-3b" - (run 2 (q) - (maino '(a a) q)) - '(((SymbolToken (a)) (SymbolToken (a))) - ((SymbolToken (a a))))) - -;; this takes too long to run -;(check-expect "maino-4" -; (run 1 (q) -; (maino '(left-paren a b c c a space c a space right-paren) q)) -; '???) - -(check-expect "maino-5" - (run 1 (q) - (maino q '((PuncToken left-paren)))) - '((left-paren))) - -(check-expect "maino-6" - (run 1 (q) - (maino q '((PuncToken left-paren) - (SymbolToken (a))))) - '((left-paren a))) - -;;; too slow to run -;(check-expect "maino-9" -; (run 1 (q) -; (maino q '((PuncToken left-paren) -; (SymbolToken (a)) -; (SymbolToken (b))))) -; '???) - -;;; too slow -;(check-expect "maino-10" -; (run 1 (q) -; (maino q '((PuncToken left-paren) -; (SymbolToken (a)) -; (SymbolToken (b)) -; (PuncToken right-paren)))) -; '???)