Skip to content

Commit

Permalink
add lexer options parser and flex mode
Browse files Browse the repository at this point in the history
  • Loading branch information
zaach committed Jan 18, 2012
1 parent 5ac5eb2 commit d6f1d62
Show file tree
Hide file tree
Showing 6 changed files with 109 additions and 71 deletions.
7 changes: 5 additions & 2 deletions lib/jison/lexer.js
Expand Up @@ -124,7 +124,7 @@ function RegExpLexer (dict, input, tokens) {

this.moduleInclude = dict.moduleInclude;

this.longestMatch = dict.options && dict.options.longestMatch || false;
this.options = dict.options || {};
//if (dict.options && dict.options.longestMatch) {
//this.next = flexNext;
//}
Expand Down Expand Up @@ -220,7 +220,7 @@ RegExpLexer.prototype = {
if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
match = tempMatch;
index = i;
if (!this.longestMatch) break;
if (!this.options.flex) break;
}
}
if (match) {
Expand Down Expand Up @@ -293,6 +293,9 @@ RegExpLexer.prototype = {
p.push(k + ":" + (RegExpLexer.prototype[k].toString() || '""'));
out += p.join(",\n");
out += "})";
if (this.options) {
out += ";\nlexer.options = "+JSON.stringify(this.options);
}
out += ";\nlexer.performAction = "+String(this.performAction);
out += ";\nlexer.rules = [" + this.rules + "]";
out += ";\nlexer.conditions = " + JSON.stringify(this.conditions);
Expand Down
149 changes: 83 additions & 66 deletions lib/jison/util/lex-parser.js
@@ -1,6 +1,5 @@
/* Jison generated parser */
var jisonlex = (function(){

var parser = {trace: function trace() { },
yy: {},
symbols_: {"error":2,"lex":3,"definitions":4,"include":5,"%%":6,"rules":7,"epilogue":8,"EOF":9,"CODE":10,"action":11,"definition":12,"NAME":13,"regex":14,"START_INC":15,"names_inclusive":16,"START_EXC":17,"names_exclusive":18,"START_COND":19,"name":20,"rule":21,"start_conditions":22,"<":23,"name_list":24,">":25,"*":26,",":27,"ACTION":28,"regex_list":29,"|":30,"regex_concat":31,"regex_base":32,"(":33,")":34,"SPECIAL_GROUP":35,"+":36,"?":37,"/":38,"/!":39,"name_expansion":40,"range_regex":41,"any_group_regex":42,".":43,"^":44,"$":45,"string":46,"escape_char":47,"{":48,"}":49,"ANY_GROUP_REGEX":50,"ESCAPE_CHAR":51,"RANGE_REGEX":52,"STRING_LIT":53,"$accept":0,"$end":1},
Expand All @@ -15,6 +14,7 @@ case 1: this.$ = {rules: $$[$0-1]};
if ($$[$0-4][1]) this.$.startConditions = $$[$0-4][1];
if ($$[$0-3]) this.$.actionInclude = $$[$0-3];
if ($$[$0]) this.$.moduleInclude = $$[$0];
if (yy.options) this.$.options = yy.options;
return this.$;
break;
case 2: this.$ = null;
Expand Down Expand Up @@ -71,7 +71,7 @@ break;
case 25: this.$ = yytext;
break;
case 26: this.$ = $$[$0];
if (this.$.match(/[\w\d]$/) && !this.$.match(/\\(b|c[A-Z]|x[0-9A-F]{2}|u[a-fA-F0-9]{4}|[0-7]{1,3})$/))
if (!(yy.options && yy.options.flex) && this.$.match(/[\w\d]$/) && !this.$.match(/\\(b|c[A-Z]|x[0-9A-F]{2}|u[a-fA-F0-9]{4}|[0-7]{1,3})$/))
this.$ += "\\b";

break;
Expand Down Expand Up @@ -219,11 +219,9 @@ parse: function parse(input) {
}
return true;
}
};/* Jison generated lexer */
};
/* Jison generated lexer */
var lexer = (function(){



var lexer = ({EOF:1,
parseError:function parseError(str, hash) {
if (this.yy.parseError) {
Expand Down Expand Up @@ -284,6 +282,8 @@ next:function () {

var token,
match,
tempMatch,
index,
col,
lines;
if (!this._more) {
Expand All @@ -292,26 +292,30 @@ next:function () {
}
var rules = this._currentRules();
for (var i=0;i < rules.length; i++) {
match = this._input.match(this.rules[rules[i]]);
if (match) {
lines = match[0].match(/\n.*/g);
if (lines) this.yylineno += lines.length;
this.yylloc = {first_line: this.yylloc.last_line,
last_line: this.yylineno+1,
first_column: this.yylloc.last_column,
last_column: lines ? lines[lines.length-1].length-1 : this.yylloc.last_column + match[0].length}
this.yytext += match[0];
this.match += match[0];
this.matches = match;
this.yyleng = this.yytext.length;
this._more = false;
this._input = this._input.slice(match[0].length);
this.matched += match[0];
token = this.performAction.call(this, this.yy, this, rules[i],this.conditionStack[this.conditionStack.length-1]);
if (token) return token;
else return;
tempMatch = this._input.match(this.rules[rules[i]]);
if (tempMatch && (!match || tempMatch[0].length > match[0].length)) {
match = tempMatch;
index = i;
if (!this.options.flex) break;
}
}
if (match) {
lines = match[0].match(/\n.*/g);
if (lines) this.yylineno += lines.length;
this.yylloc = {first_line: this.yylloc.last_line,
last_line: this.yylineno+1,
first_column: this.yylloc.last_column,
last_column: lines ? lines[lines.length-1].length-1 : this.yylloc.last_column + match[0].length}
this.yytext += match[0];
this.match += match[0];
this.yyleng = this.yytext.length;
this._more = false;
this._input = this._input.slice(match[0].length);
this.matched += match[0];
token = this.performAction.call(this, this.yy, this, rules[index],this.conditionStack[this.conditionStack.length-1]);
if (token) return token;
else return;
}
if (this._input === "") {
return this.EOF;
} else {
Expand Down Expand Up @@ -342,98 +346,111 @@ topState:function () {
pushState:function begin(condition) {
this.begin(condition);
}});
lexer.options = {};
lexer.performAction = function anonymous(yy,yy_,$avoiding_name_collisions,YY_START) {

var YYSTATE=YY_START
switch($avoiding_name_collisions) {
case 0:return 19
case 0:yy.options[yy_.yytext] = true
break;
case 1:this.begin('INITIAL')
break;
case 2:/* empty */
break;
case 3:this.begin('INITIAL')
case 3:return 19
break;
case 4:this.begin('trail'); yy_.yytext = yy_.yytext.substr(1, yy_.yytext.length-2);return 28;
case 4:this.begin('INITIAL')
break;
case 5:this.begin('trail'); yy_.yytext = yy_.yytext.substr(2, yy_.yytext.length-4);return 28;
case 5:/* empty */
break;
case 6:this.begin('INITIAL'); return 28
case 6:this.begin('INITIAL')
break;
case 7:this.begin('INITIAL')
case 7:this.begin('trail'); yy_.yytext = yy_.yytext.substr(1, yy_.yytext.length-2);return 28;
break;
case 8:if (yy.ruleSection) this.begin('indented')
case 8:this.begin('trail'); yy_.yytext = yy_.yytext.substr(2, yy_.yytext.length-4);return 28;
break;
case 9:return 13
case 9:this.begin('INITIAL'); return 28
break;
case 10:yy_.yytext = yy_.yytext.replace(/\\"/g,'"');return 53;
case 10:this.begin('INITIAL')
break;
case 11:yy_.yytext = yy_.yytext.replace(/\\'/g,"'");return 53;
case 11:if (yy.ruleSection) this.begin('indented')
break;
case 12:return 30
case 12:return 13
break;
case 13:return 50
case 13:yy_.yytext = yy_.yytext.replace(/\\"/g,'"');return 53;
break;
case 14:return 35
case 14:yy_.yytext = yy_.yytext.replace(/\\'/g,"'");return 53;
break;
case 15:return 35
case 15:return 30
break;
case 16:return 35
case 16:return 50
break;
case 17:return 33
case 17:return 35
break;
case 18:return 34
case 18:return 35
break;
case 19:return 36
case 19:return 35
break;
case 20:return 26
case 20:return 33
break;
case 21:return 37
case 21:return 34
break;
case 22:return 44
case 22:return 36
break;
case 23:return 27
case 23:return 26
break;
case 24:return 45
case 24:return 37
break;
case 25:return 23
case 25:return 44
break;
case 26:return 25
case 26:return 27
break;
case 27:return 39
case 27:return 45
break;
case 28:return 38
case 28:return 23
break;
case 29:return 51
case 29:return 25
break;
case 30:yy_.yytext = yy_.yytext.replace(/^\\/g,''); return 51
case 30:return 39
break;
case 31:return 45
case 31:return 38
break;
case 32:return 43
case 32:return 51
break;
case 33:this.begin('start_condition');return 15
case 33:yy_.yytext = yy_.yytext.replace(/^\\/g,''); return 51
break;
case 34:this.begin('start_condition');return 17
case 34:return 45
break;
case 35:if (yy.ruleSection) this.begin('code'); yy.ruleSection = true; return 6
case 35:return 43
break;
case 36:return 52
case 36:yy.options = {}; this.begin('options');
break;
case 37:return 48
case 37:this.begin('start_condition');return 15
break;
case 38:return 49
case 38:this.begin('start_condition');return 17
break;
case 39:/* ignore bad characters */
case 39:if (yy.ruleSection) this.begin('code'); yy.ruleSection = true; return 6
break;
case 40:return 9
case 40:return 52
break;
case 41:return 10;
case 41:return 48
break;
case 42:return 49
break;
case 43:/* ignore bad characters */
break;
case 44:return 9
break;
case 45:return 10;
break;
}
};
lexer.rules = [/^[a-zA-Z_][a-zA-Z0-9_-]*/,/^\n+/,/^\s+/,/^.*\n+/,/^\{[^}]*\}/,/^%\{(.|\n)*?%\}/,/^.+/,/^\n+/,/^\s+/,/^[a-zA-Z_][a-zA-Z0-9_-]*/,/^"(\\\\|\\"|[^"])*"/,/^'(\\\\|\\'|[^'])*'/,/^\|/,/^\[(\\\]|[^\]])*\]/,/^\(\?:/,/^\(\?=/,/^\(\?!/,/^\(/,/^\)/,/^\+/,/^\*/,/^\?/,/^\^/,/^,/,/^<<EOF>>/,/^</,/^>/,/^\/!/,/^\//,/^\\([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|c[A-Z]|x[0-9A-F]{2}|u[a-fA-F0-9]{4})/,/^\\./,/^\$/,/^\./,/^%s\b/,/^%x\b/,/^%%/,/^\{\d+(,\s?\d+|,)?\}/,/^\{/,/^\}/,/^./,/^$/,/^(.|\n)+/];
lexer.conditions = {"code":{"rules":[40,41],"inclusive":false},"start_condition":{"rules":[0,1,2,40],"inclusive":false},"indented":{"rules":[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40],"inclusive":true},"trail":{"rules":[3,5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40],"inclusive":true},"INITIAL":{"rules":[5,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40],"inclusive":true}};return lexer;})()
lexer.rules = [/^[a-zA-Z_][a-zA-Z0-9_-]*/,/^\n+/,/^\s+/,/^[a-zA-Z_][a-zA-Z0-9_-]*/,/^\n+/,/^\s+/,/^.*\n+/,/^\{[^}]*\}/,/^%\{(.|\n)*?%\}/,/^.+/,/^\n+/,/^\s+/,/^[a-zA-Z_][a-zA-Z0-9_-]*/,/^"(\\\\|\\"|[^"])*"/,/^'(\\\\|\\'|[^'])*'/,/^\|/,/^\[(\\\]|[^\]])*\]/,/^\(\?:/,/^\(\?=/,/^\(\?!/,/^\(/,/^\)/,/^\+/,/^\*/,/^\?/,/^\^/,/^,/,/^<<EOF>>/,/^</,/^>/,/^\/!/,/^\//,/^\\([0-7]{1,3}|[rfntvsSbBwWdD\\*+()${}|[\]\/.^?]|c[A-Z]|x[0-9A-F]{2}|u[a-fA-F0-9]{4})/,/^\\./,/^\$/,/^\./,/^%options\b/,/^%s\b/,/^%x\b/,/^%%/,/^\{\d+(,\s?\d+|,)?\}/,/^\{/,/^\}/,/^./,/^$/,/^(.|\n)+/];
lexer.conditions = {"code":{"rules":[44,45],"inclusive":false},"start_condition":{"rules":[3,4,5,44],"inclusive":false},"options":{"rules":[0,1,2,44],"inclusive":false},"indented":{"rules":[7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44],"inclusive":true},"trail":{"rules":[6,8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44],"inclusive":true},"INITIAL":{"rules":[8,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44],"inclusive":true}};


;
return lexer;})()
parser.lexer = lexer;
return parser;
})();
Expand Down
3 changes: 2 additions & 1 deletion src/jisonlex.jison
Expand Up @@ -13,6 +13,7 @@ lex
if ($1[1]) $$.startConditions = $1[1];
if ($2) $$.actionInclude = $2;
if ($5) $$.moduleInclude = $5;
if (yy.options) $$.options = yy.options;
return $$; }
;

Expand Down Expand Up @@ -111,7 +112,7 @@ action
regex
: regex_list
{ $$ = $1;
if ($$.match(/[\w\d]$/) && !$$.match(/\\(b|c[A-Z]|x[0-9A-F]{2}|u[a-fA-F0-9]{4}|[0-7]{1,3})$/))
if (!(yy.options && yy.options.flex) && $$.match(/[\w\d]$/) && !$$.match(/\\(b|c[A-Z]|x[0-9A-F]{2}|u[a-fA-F0-9]{4}|[0-7]{1,3})$/))
$$ += "\\b";
}
;
Expand Down
7 changes: 6 additions & 1 deletion src/jisonlex.jisonlex
Expand Up @@ -2,10 +2,14 @@
NAME [a-zA-Z_][a-zA-Z0-9_-]*

%s indented trail
%x code start_condition
%x code start_condition options

%%

<options>{NAME} yy.options[yytext] = true
<options>\n+ this.begin('INITIAL')
<options>\s+ /* empty */

<start_condition>{NAME} return 'START_COND'
<start_condition>\n+ this.begin('INITIAL')
<start_condition>\s+ /* empty */
Expand Down Expand Up @@ -41,6 +45,7 @@ NAME [a-zA-Z_][a-zA-Z0-9_-]*
"\\". yytext = yytext.replace(/^\\/g,''); return 'ESCAPE_CHAR'
"$" return '$'
"." return '.'
"%options" yy.options = {}; this.begin('options');
"%s" this.begin('start_condition');return 'START_INC'
"%x" this.begin('start_condition');return 'START_EXC'
"%%" if (yy.ruleSection) this.begin('code'); yy.ruleSection = true; return '%%'
Expand Down
12 changes: 12 additions & 0 deletions tests/grammar/lex_parse.js
Expand Up @@ -247,3 +247,15 @@ exports["test empty or regex"] = function () {

assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
}

exports["test options"] = function () {
var lexgrammar = '%options flex\n%%\n"foo" return 1;';
var expected = {
rules: [
["foo", "return 1;"]
],
options: {flex: true}
};

assert.deepEqual(lex.parse(lexgrammar), expected, "grammar should be parsed correctly");
}
2 changes: 1 addition & 1 deletion tests/lexer/regexplexer.js
Expand Up @@ -640,7 +640,7 @@ exports["test longest match returns"] = function() {
[".", "return 'DOT';" ],
["cat", "return 'CAT';" ]
],
options: {longestMatch: true}
options: {flex: true}
};
var input = "cat!";

Expand Down

0 comments on commit d6f1d62

Please sign in to comment.