Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Complete initial lexer.

  • Loading branch information...
commit 857b26befa050b400c609bdcf092297e8a55351b 1 parent 200d5b8
@umitanuki authored
View
4 src/bigpot/parser/gram.y
@@ -36,11 +36,11 @@ type Node struct {
* DOT_DOT is unused in the core SQL grammar, and so will always provoke
* parse errors. It is needed by PL/pgsql.
*/
-%token <str> IDENT FCONST SCONST Op
+%token <str> IDENT FCONST SCONST BCONST XCONST Op
%token <ival> ICONST PARAM
%token TYPECAST DOT_DOT COLON_EQUALS
-%token <keyword> SELECT
+%token <keyword> FROM SELECT
%%
statements: /* empty */
View
1  src/bigpot/parser/kwlist.go
@@ -24,6 +24,7 @@ type keyword struct {
* the set of keywords at compile time.
*/
var keywordList = []keyword{
+ {"from", FROM, ReservedKeyword},
{"select", SELECT, ReservedKeyword},
}
View
108 src/bigpot/parser/scan.l
@@ -18,6 +18,8 @@ type lexer struct {
lesspos int
lessbuf []rune
+
+ dolqstart string
}
func newLexer(source io.RuneReader) *lexer {
@@ -53,6 +55,10 @@ func (l *lexer) startLiteral() {
l.literalbuf = l.literalbuf[:0]
}
+func (l *lexer) addLiteralRune(lit rune) {
+ l.literalbuf = append(l.literalbuf, lit)
+}
+
func (l *lexer) addLiteral(lits []rune) {
l.literalbuf = append(l.literalbuf, lits...)
}
@@ -218,9 +224,6 @@ xbinside [^']*
xhstart [xX]{quote}
xhinside [^']*
-/* National character */
-xnstart [nN]{quote}
-
/* Quoted string that allows backslash escapes */
xestart [eE]{quote}
xeinside [^\\']+
@@ -230,7 +233,8 @@ xehexesc [\\]x[0-9A-Fa-f]{1,2}
xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
xeunicodebad [\\]([uU])
-/* Extended quote
+/*
+ * Extended quote
* xqdouble implements embedded quote, ''''
*/
xqstart {quote}
@@ -393,32 +397,58 @@ other .
l.Error("unterminated /* comment")
{xbstart}
+ /*
+ * Binary bit type.
+ * At some point we should simply pass the string
+ * forward to the parser and label it there.
+ * In the meantime, place a leading "b" on the string
+ * to mark it for the input routine as a binary string.
+ */
+ l.SET_YYLLOC()
l.BEGIN(xb)
+ l.startLiteral()
+ l.addLiteralRune('b')
<xb>{quotestop} |
<xb>{quotefail}
l.yyless(1)
l.BEGIN(INITIAL)
+ lval.str = string(l.literalbuf)
+ return BCONST
<xh>{xhinside} |
<xb>{xbinside}
+ l.addLiteral(l.buf)
<xh>{quotecontinue} |
<xb>{quotecontinue}
/* ignore */
- /*
- <xb><<EOF>>
- l.Error("unterminated bit string literal")
- */
+<xb>\0
+ l.Error("unterminated bit string literal")
{xhstart}
+ /*
+ * Hexadecimal bit type.
+ * At some point we should simply pass the string
+ * forward to the parser and label it there.
+ * In the meantime, place a leading "x" on the string
+ * to mark it for the input routine as a hex string.
+ */
+ l.SET_YYLLOC()
+ l.BEGIN(xh)
+ l.startLiteral()
+ l.addLiteralRune('x')
<xh>{quotestop} |
<xh>{quotefail}
+ l.yyless(1)
+ l.BEGIN(INITIAL)
+ lval.str = string(l.literalbuf)
+ return XCONST
-{xnstart}
- l.BEGIN(xh)
+<xh>\0
+ l.Error("unterminated hexadecimal string literal")
{xqstart}
l.SET_YYLLOC()
@@ -444,32 +474,88 @@ other .
{dolqdelim}
+ l.SET_YYLLOC()
+ l.dolqstart = l.getBuf()
l.BEGIN(xdolq)
+ l.startLiteral()
{dolqfailed}
+ l.SET_YYLLOC()
+ l.yyless(1)
+ return int(l.buf[0])
<xdolq>{dolqdelim}
- l.BEGIN(INITIAL)
+ if l.getBuf() == l.dolqstart {
+ l.dolqstart = ""
+ l.BEGIN(INITIAL)
+ lval.str = string(l.literalbuf)
+ return SCONST
+ } else {
+ /*
+ * When we fail to match $...$ to dolqstart, transfer
+ * the $... part to the output, but put back the final
+ * $ for rescanning. Consider $delim$...$junk$delim$
+ */
+ yyleng := len(l.buf)
+ l.addLiteral(l.buf[:yyleng-1])
+ l.yyless(yyleng - 1)
+ }
<xdolq>{dolqinside}
+ l.addLiteral(l.buf)
<xdolq>{dolqfailed}
+ l.addLiteral(l.buf)
<xdolq>.
+ /* This is only needed for $ inside the quoted text */
+ l.addLiteralRune(l.buf[0])
+
+<xdolq>\0
+ l.Error("unterminated dollar-quoted string")
{xdstart}
+ l.SET_YYLLOC()
l.BEGIN(xd)
+ l.startLiteral()
<xd>{xdstop}
l.BEGIN(INITIAL)
+ if len(l.literalbuf) == 0 {
+ l.Error("zero-length delimited identifier")
+ }
+ ident := string(l.literalbuf)
+ /* TODO: NAMEDATALEN */
+ /*
+ if (yyextra->literallen >= NAMEDATALEN)
+ truncate_identifier(ident, yyextra->literallen, true);
+ */
+ lval.str = ident
+ return IDENT
<xd>{xddouble}
+ l.addLiteralRune('"')
<xd>{xdinside}
+ l.addLiteral(l.buf)
+
+<xd>\0
+ l.Error("unterminated quoted identifier")
{typecast}
+ l.SET_YYLLOC()
return TYPECAST
+ /*
+ {dot_dot}
+ l.SET_YYLLOC()
+ return DOT_DOT
+
+ {colon_equals}
+ l.SET_YYLLOC()
+ return COLON_EQUALS
+ */
+
{self}
l.SET_YYLLOC()
return int(l.buf[0])
View
94 src/bigpot/parser/scan_test.go
@@ -13,20 +13,20 @@ func strLexer(input string) *lexer {
func (l *lexer) lexPrintExpect(expected int) {
lval := &yySymType{}
if token := l.Lex(lval); token != expected {
- fmt.Printf("not expected token: %d != %d", token, expected)
+ fmt.Printf("not expected token: %d != %d\n", token, expected)
return
}
switch expected {
default:
if expected > 0 && expected < 127 {
- fmt.Printf(" %c", expected)
+ fmt.Printf("%c\n", expected)
} else if expected > COLON_EQUALS { /* too hacky... */
- fmt.Printf(" %s", lval.keyword)
+ fmt.Printf("%s\n", lval.keyword)
} else {
- fmt.Printf(" %s", lval.str)
+ fmt.Printf("%s\n", lval.str)
}
case ICONST, PARAM:
- fmt.Printf(" %d", lval.ival)
+ fmt.Printf("%d\n", lval.ival)
}
}
@@ -37,17 +37,41 @@ func recoverPanic() {
}
func ExampleLex_1() {
- lexer := strLexer("select 1")
+ lexer := strLexer("select 1 -- comment_comment\n2")
lexer.lexPrintExpect(SELECT)
- // Output: select
+ lexer.lexPrintExpect(ICONST)
+ lexer.lexPrintExpect(ICONST)
+ // Output:
+ // select
+ // 1
+ // 2
+}
+
+func ExampleLex_consts() {
+ lexer := strLexer("b'0101' x'ff'")
+ lexer.lexPrintExpect(BCONST)
+ lexer.lexPrintExpect(XCONST)
+ // Output:
+ // b0101
+ // xff
}
-func ExampleLex_2() {
- lexer := strLexer("select 'foo' /* comment */ bar")
+func ExampleLex_sconsts() {
+ lexer := strLexer("select 'foo' /* comment /* c2 */ */ bar " +
+ "$$lex$$ $body$text$body$ $a$sentence$c$a$ ")
lexer.lexPrintExpect(SELECT)
lexer.lexPrintExpect(SCONST)
lexer.lexPrintExpect(IDENT)
- // Output: select foo bar
+ lexer.lexPrintExpect(SCONST)
+ lexer.lexPrintExpect(SCONST)
+ lexer.lexPrintExpect(SCONST)
+ // Output:
+ // select
+ // foo
+ // bar
+ // lex
+ // text
+ // sentence$c
}
func ExampleLex_numbers() {
@@ -59,7 +83,14 @@ func ExampleLex_numbers() {
lexer.lexPrintExpect(FCONST)
lexer.lexPrintExpect(FCONST)
lexer.lexPrintExpect(FCONST)
- // Output: 10 0.1 e 1.53e-1 1. 0001.999 9999999999999999999
+ // Output:
+ // 10
+ // 0.1
+ // e
+ // 1.53e-1
+ // 1.
+ // 0001.999
+ // 9999999999999999999
}
func ExampleLex_operators() {
@@ -71,14 +102,23 @@ func ExampleLex_operators() {
lexer.lexPrintExpect(ICONST)
lexer.lexPrintExpect(Op)
lexer.lexPrintExpect(Op)
- // Output: 1 % 2 - 10 <> <>
+ // Output:
+ // 1
+ // %
+ // 2
+ // -
+ // 10
+ // <>
+ // <>
}
func ExampleLex_params() {
lexer := strLexer("$1 $0")
lexer.lexPrintExpect(PARAM)
lexer.lexPrintExpect(PARAM)
- // Output: 1 0
+ // Output:
+ // 1
+ // 0
}
func ExampleLex_negative1() {
@@ -96,3 +136,31 @@ func ExampleLex_params_negative() {
lexer.lexPrintExpect(PARAM)
// Output: failed: value out of range for param
}
+
+func ExampleLex_xb_negative() {
+ defer recoverPanic()
+ lexer := strLexer("b'01010")
+ lexer.lexPrintExpect(BCONST)
+ // Output: failed: unterminated bit string literal
+}
+
+func ExmapleLex_xh_negative() {
+ defer recoverPanic()
+ lexer := strLexer("x'ffee")
+ lexer.lexPrintExpect(XCONST)
+ // Output: failed: unterminated hexadecimal string literal
+}
+
+func ExampleLex_dolq_ngative() {
+ defer recoverPanic()
+ lexer := strLexer("$$abcd")
+ lexer.lexPrintExpect(SCONST)
+ // Output: failed: unterminated dollar-quoted string
+}
+
+func ExampleLex_xd_negative() {
+ defer recoverPanic()
+ lexer := strLexer("\"abcd")
+ lexer.lexPrintExpect(IDENT)
+ // Output: failed: unterminated quoted identifier
+}
Please sign in to comment.
Something went wrong with that request. Please try again.