From d48466f5ef57d4dfc1ff6f0b78e12afd1f8983dd Mon Sep 17 00:00:00 2001 From: Ger Hobbelt Date: Sun, 6 Jan 2013 02:16:22 +0100 Subject: [PATCH] fixes error recovery logic in the parser run-time (tested & verified using the errorlab.js test file): previously 5 tests would fail, but after this fix, all pass. The loop which looks for a matching error rule has been abstracted out into the function locateNearestErrorRecoveryRule() because the first cause for failed the tests was the parseError() handler firing before the erorr recovery could kick in: this (and user-defined) parseError handlers need a way to detect whether an error recovery rule is available (via the hash.recoverable boolean). The tests also uncovered an infinitely loop in error recovery in the new code when the lexer hits EOF. This bug has been fixed. --- lib/jison.js | 77 +++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 19 deletions(-) diff --git a/lib/jison.js b/lib/jison.js index ce3299763..784b5b7cd 100755 --- a/lib/jison.js +++ b/lib/jison.js @@ -1000,6 +1000,18 @@ lrGeneratorMixin.generateModule = function generateModule (opt) { + " last_column: n,\n" + " range: [start_number, end_number] (where the numbers are indexes into the input string, regular zero-based)\n" + " }\n" + + "\n" + + "\n" + + " the parseError function receives a 'hash' object with these members for lexer and parser errors: {\n" + + " text: (matched text)\n" + + " token: (the produced terminal token, if any)\n" + + " line: (yylineno)\n" + + " }\n" + + " while parser (grammar) errors will also provide these members, i.e. parser errors deliver a superset of attributes: {\n" + + " loc: (yyloc)\n" + + " expected: (string describing the set of expected tokens)\n" + + " recoverable: (boolean: TRUE when the parser has a error recovery rule available for this particular error)\n" + + " }\n" + "*/\n"; out += (moduleName.match(/\./) ? moduleName : "var "+moduleName)+" = (function(){"; out += "\nvar parser = "+this.generateModule_(); @@ -1152,7 +1164,11 @@ function traceParseError (err, hash) { } function parseError (str, hash) { - throw new Error(str); + if (hash.recoverable) { + this.trace(str); + } else { + throw new Error(str); + } } parser.parseError = lrGeneratorMixin.parseError = parseError; @@ -1198,7 +1214,7 @@ parser.parse = function parse (input) { function lex() { var token; - token = self.lexer.lex() || 1; // $end = 1 + token = self.lexer.lex() || EOF; // $end = 1 // if token isn't its numeric value, convert if (typeof token !== 'number') { token = self.symbols_[token] || token; @@ -1206,6 +1222,27 @@ parser.parse = function parse (input) { return token; } + // Return the rule stack depth where the nearest error rule can be found. + // Return FALSE when no error recovery rule was found. + function locateNearestErrorRecoveryRule(state) { + var stack_probe = stack.length - 1; + var depth = 0; + + // try to recover from error + for(;;) { + // check for error recovery rule in this state + if ((TERROR.toString()) in table[state]) { + return depth; + } + if (state === 0 || stack_probe < 2) { + return false; // No suitable error recovery rule available. + } + stack_probe -= 2; // popStack(1): [symbol, action] + state = stack[stack_probe]; + ++depth; + } + } + var symbol, preErrorSymbol, state, action, a, r, yyval = {}, p, len, newState, expected; while (true) { // retreive state number from top of stack @@ -1225,18 +1262,24 @@ parser.parse = function parse (input) { _handle_error: // handle parse error if (typeof action === 'undefined' || !action.length || !action[0]) { + var error_rule_depth; var errStr = ''; if (!recovering) { + // first see if there's any chance at hitting an error recovery rule: + error_rule_depth = locateNearestErrorRecoveryRule(state); + // Report error expected = []; - for (p in table[state]) if (this.terminals_[p] && p > 2) { - expected.push("'"+this.terminals_[p]+"'"); + for (p in table[state]) { + if (this.terminals_[p] && p > TERROR) { + expected.push("'"+this.terminals_[p]+"'"); + } } if (this.lexer.showPosition) { errStr = 'Parse error on line '+(yylineno+1)+":\n"+this.lexer.showPosition()+"\nExpecting "+expected.join(', ') + ", got '" + (this.terminals_[symbol] || symbol)+ "'"; } else { errStr = 'Parse error on line '+(yylineno+1)+": Unexpected " + - (symbol == 1 /*EOF*/ ? "end of input" : + (symbol == EOF ? "end of input" : ("'"+(this.terminals_[symbol] || symbol)+"'")); } this.parseError(errStr, { @@ -1244,14 +1287,17 @@ _handle_error: token: this.terminals_[symbol] || symbol, line: this.lexer.yylineno, loc: yyloc, - expected: expected + expected: expected, + recoverable: (error_rule_depth !== false) }); + } else if (preErrorSymbol !== EOF) { + error_rule_depth = locateNearestErrorRecoveryRule(state); } // just recovered from another error if (recovering == 3) { - if (symbol == EOF) { - throw new Error(errStr || 'Parsing halted.'); + if (symbol === EOF || preErrorSymbol === EOF) { + throw new Error(errStr || 'Parsing halted while starting to recover from another error.'); } // discard current lookahead and grab another @@ -1263,19 +1309,12 @@ _handle_error: } // try to recover from error - while (1) { - // check for error recovery rule in this state - if ((TERROR.toString()) in table[state]) { - break; - } - if (state === 0) { - throw new Error(errStr || 'Parsing halted.'); - } - popStack(1); - state = stack[stack.length-1]; + if (error_rule_depth === false) { + throw new Error(errStr || 'Parsing halted. No suitable error recovery rule available.'); } + popStack(error_rule_depth); - preErrorSymbol = symbol == 2 ? null : symbol; // save the lookahead token + preErrorSymbol = (symbol == TERROR ? null : symbol); // save the lookahead token symbol = TERROR; // insert generic error symbol as new lookahead state = stack[stack.length-1]; action = table[state] && table[state][TERROR];