From bb0777f2ac7fdf4eb3590ca96124d923c0cb08ee Mon Sep 17 00:00:00 2001 From: Tim Taubert Date: Tue, 17 Apr 2012 13:43:57 +0200 Subject: [PATCH] implemented lexer iterator --- README.md | 22 +++++++------ lex.js | 93 +++++++++++++++++++++++++++++++------------------------ 2 files changed, 64 insertions(+), 51 deletions(-) diff --git a/README.md b/README.md index 48f4c8e..29e1079 100644 --- a/README.md +++ b/README.md @@ -18,17 +18,19 @@ var tokens = { }; var lexer = new lex.Lexer(tokens); -lexer.tokenize("3 + 5 * 6 - 7").forEach(function (lexeme) { - console.log("[" + lexeme.type + ", " + lexeme.value + "]"); -}); +var iter = lexer.tokenize("3 + 5 * 6 - 7"); + +for (var lexeme; lexeme = iter.next();) { + console.log(lexeme); +} ``` This will output the following: - [number, 3] - [plus, +] - [number, 5] - [times, *] - [number, 6] - [minus, -] - [number, 7] + { token: 'number', value: 3 } + { token: 'plus', value: '+' } + { token: 'number', value: 5 } + { token: 'times', value: '*' } + { token: 'number', value: 6 } + { token: 'minus', value: '-' } + { token: 'number', value: 7 } diff --git a/lex.js b/lex.js index ebcd5de..f820453 100644 --- a/lex.js +++ b/lex.js @@ -1,58 +1,55 @@ -function token(def, callback) { - if (def instanceof RegExp) - return new Token(def, callback); - - throw "invalid token definition"; +function LexerContext(input) { + this.input = input; } -function Lexer(tokens) { - this.tokens = tokens; +LexerContext.prototype = { + input: null, - for (var type in this.tokens) { - var def = this.tokens[type]; - if (!(def instanceof Token)) { - this.tokens[type] = token(def); - } + ignore: function Context_ignore() { + }, + + skip: function Context_skip(num) { + this.input = this.input.slice(num); } +}; + +function LexerIterator(tokens, input) { + this.tokens = tokens; + this.context = new LexerContext(input); } -Lexer.prototype = { +LexerIterator.prototype = { tokens: null, + context: null, - tokenize: function Lexer_tokenize(input) { - var lexeme; - var lexemes = []; + next: function LexerIterator_next() { + var next; + var context = this.context; - var context = { - skip: function Context_skip(num) { - input = input.slice(num); - }, - - ignore: function Context_ignore() { - lexeme = null; - } + context.ignore = function Context_ignore() { + next = null; }; - loop: while (input.length) { + loop: while (context.input.length) { // iterate through and match all tokens - for (var type in this.tokens) { - var token = this.tokens[type]; - var value = token.match(input); + for (var name in this.tokens) { + var token = this.tokens[name]; + var value = token.match(context.input); if (value) { // skip the lexeme we just found context.skip(value.length); // create the lexeme - lexeme = new Lexeme(type, value); + next = {token: name, value: value}; // call the token callback if any if (token.callback) { - token.callback(context, lexeme); + token.callback(context, next); } - if (lexeme) { - lexemes.push(lexeme); + if (next) { + return next; } continue loop; @@ -62,8 +59,25 @@ Lexer.prototype = { // nothing found context.skip(1); } + } +}; + +function Lexer(tokens) { + this.tokens = tokens; + + for (var name in this.tokens) { + var def = this.tokens[name]; + if (!(def instanceof Token)) { + this.tokens[name] = token(def); + } + } +} - return lexemes; +Lexer.prototype = { + tokens: null, + + tokenize: function Lexer_tokenize(input) { + return new LexerIterator(this.tokens, input); } }; @@ -86,15 +100,12 @@ Token.prototype = { } }; -function Lexeme(type, value) { - this.type = type; - this.value = value; -} +function token(def, callback) { + if (def instanceof RegExp) + return new Token(def, callback); -Lexeme.prototype = { - type: null, - value: null -}; + throw "invalid token definition"; +} exports.token = token; exports.Lexer = Lexer;