Skip to content

Commit

Permalink
implemented lexer iterator
Browse files Browse the repository at this point in the history
  • Loading branch information
ttaubert committed Apr 17, 2012
1 parent 2542685 commit bb0777f
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 51 deletions.
22 changes: 12 additions & 10 deletions README.md
Expand Up @@ -18,17 +18,19 @@ var tokens = {
}; };


var lexer = new lex.Lexer(tokens); var lexer = new lex.Lexer(tokens);
lexer.tokenize("3 + 5 * 6 - 7").forEach(function (lexeme) { var iter = lexer.tokenize("3 + 5 * 6 - 7");
console.log("[" + lexeme.type + ", " + lexeme.value + "]");
}); for (var lexeme; lexeme = iter.next();) {
console.log(lexeme);
}
``` ```


This will output the following: This will output the following:


[number, 3] { token: 'number', value: 3 }
[plus, +] { token: 'plus', value: '+' }
[number, 5] { token: 'number', value: 5 }
[times, *] { token: 'times', value: '*' }
[number, 6] { token: 'number', value: 6 }
[minus, -] { token: 'minus', value: '-' }
[number, 7] { token: 'number', value: 7 }
93 changes: 52 additions & 41 deletions lex.js
@@ -1,58 +1,55 @@
function token(def, callback) { function LexerContext(input) {
if (def instanceof RegExp) this.input = input;
return new Token(def, callback);

throw "invalid token definition";
} }


function Lexer(tokens) { LexerContext.prototype = {
this.tokens = tokens; input: null,


for (var type in this.tokens) { ignore: function Context_ignore() {
var def = this.tokens[type]; },
if (!(def instanceof Token)) {
this.tokens[type] = token(def); skip: function Context_skip(num) {
} this.input = this.input.slice(num);
} }
};

function LexerIterator(tokens, input) {
this.tokens = tokens;
this.context = new LexerContext(input);
} }


Lexer.prototype = { LexerIterator.prototype = {
tokens: null, tokens: null,
context: null,


tokenize: function Lexer_tokenize(input) { next: function LexerIterator_next() {
var lexeme; var next;
var lexemes = []; var context = this.context;


var context = { context.ignore = function Context_ignore() {
skip: function Context_skip(num) { next = null;
input = input.slice(num);
},

ignore: function Context_ignore() {
lexeme = null;
}
}; };


loop: while (input.length) { loop: while (context.input.length) {
// iterate through and match all tokens // iterate through and match all tokens
for (var type in this.tokens) { for (var name in this.tokens) {
var token = this.tokens[type]; var token = this.tokens[name];
var value = token.match(input); var value = token.match(context.input);


if (value) { if (value) {
// skip the lexeme we just found // skip the lexeme we just found
context.skip(value.length); context.skip(value.length);


// create the lexeme // create the lexeme
lexeme = new Lexeme(type, value); next = {token: name, value: value};


// call the token callback if any // call the token callback if any
if (token.callback) { if (token.callback) {
token.callback(context, lexeme); token.callback(context, next);
} }


if (lexeme) { if (next) {
lexemes.push(lexeme); return next;
} }


continue loop; continue loop;
Expand All @@ -62,8 +59,25 @@ Lexer.prototype = {
// nothing found // nothing found
context.skip(1); context.skip(1);
} }
}
};

function Lexer(tokens) {
this.tokens = tokens;

for (var name in this.tokens) {
var def = this.tokens[name];
if (!(def instanceof Token)) {
this.tokens[name] = token(def);
}
}
}


return lexemes; Lexer.prototype = {
tokens: null,

tokenize: function Lexer_tokenize(input) {
return new LexerIterator(this.tokens, input);
} }
}; };


Expand All @@ -86,15 +100,12 @@ Token.prototype = {
} }
}; };


function Lexeme(type, value) { function token(def, callback) {
this.type = type; if (def instanceof RegExp)
this.value = value; return new Token(def, callback);
}


Lexeme.prototype = { throw "invalid token definition";
type: null, }
value: null
};


exports.token = token; exports.token = token;
exports.Lexer = Lexer; exports.Lexer = Lexer;

0 comments on commit bb0777f

Please sign in to comment.