Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 17fe62b
Showing
8 changed files
with
181 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import gen_lexer | ||
|
||
RESERVED = 'RESERVED' | ||
INT = 'INT' | ||
ID = 'ID' | ||
|
||
token_regex_list = [ | ||
(r'[ \n\t]+', None), | ||
(r'#[^\n]*', None), | ||
(r'\:=', RESERVED), | ||
(r'\(', RESERVED), | ||
(r'\)', RESERVED), | ||
(r';', RESERVED), | ||
(r':', RESERVED), | ||
(r'\+', RESERVED), | ||
(r'-', RESERVED), | ||
(r'\*', RESERVED), | ||
(r'/', RESERVED), | ||
(r'<', RESERVED), | ||
(r'<=', RESERVED), | ||
(r'>', RESERVED), | ||
(r'>=', RESERVED), | ||
(r'=', RESERVED), | ||
(r'!=', RESERVED), | ||
(r'and', RESERVED), | ||
(r'or', RESERVED), | ||
(r'not', RESERVED), | ||
(r'if', RESERVED), | ||
(r'then', RESERVED), | ||
(r'else', RESERVED), | ||
(r'while', RESERVED), | ||
(r'do', RESERVED), | ||
(r'end', RESERVED), | ||
(r'[0-9]+', INT), | ||
(r'[A-Za-z][A-Za-z0-9_]*', ID), | ||
] | ||
|
||
def chatlang_lexer(character_stream): | ||
return gen_lexer.lexer(character_stream, token_regex_list) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
class Parser: | ||
pass | ||
|
||
class Reserved(Parser): | ||
def __init__(self, name, tag): | ||
self.name = name | ||
self.tag = tag | ||
|
||
def __call__(self, tokens, pos): | ||
if pos < len(tokens): | ||
token = tokens[pos] | ||
name, tag = token | ||
if name == self.name and tag == self.tag: | ||
return (token, pos+1) | ||
else: | ||
return None | ||
else: | ||
raise Exception("Unexpected eof") | ||
|
||
class Tag(Parser): | ||
def __init__(self, tag): | ||
self.tag = tag | ||
|
||
def __call__(self, tokens, pos): | ||
if pos < len(tokens): | ||
token = tokens[pos] | ||
_, tag = token | ||
return (token, pos+1) if tag is self.tag else None | ||
else: | ||
raise Exception("Unexpected eof") | ||
|
||
class Sequence(Parser): | ||
def __init__(self, *parsers): | ||
self.parsers = parsers | ||
|
||
def __call__(self, tokens, pos): | ||
cur_pos = pos | ||
values = [] | ||
for parser in self.parsers: | ||
result = parser(tokens, cur_pos) | ||
if result: | ||
ast, cur_pos = result | ||
values.append(ast) | ||
else: | ||
return None | ||
return tuple(values), pos | ||
|
||
class Or(Parser): | ||
def __init__(self, *parsers): | ||
self.parsers = parsers | ||
|
||
def __call__(self, tokens, pos): | ||
for parser in self.parsers: | ||
result = parser(tokens, pos) | ||
if result: | ||
return result | ||
return None | ||
|
||
class Optional(Parser): | ||
def __init__(self, parser): | ||
self.parser = parser | ||
|
||
def __call__(self, tokens, pos): | ||
result = self.parser(tokens, pos) | ||
if result: | ||
return result | ||
else: | ||
return None, pos | ||
|
||
class BinOp(Parser): | ||
def __init__(self, op, left_op, right_op=None): | ||
self.sequence = Sequence(op, left_op, right_op or left_op) | ||
|
||
def __call__(self, tokens, pos): | ||
result = self.sequence(tokens, pos) | ||
if result: | ||
(left, op, right), new_pos = result | ||
return (op, left, right), new_pos | ||
else: | ||
return None | ||
|
||
class Conditional(Parser): | ||
def __init__(self): | ||
exp = Exp() | ||
self.sequence = Sequence( | ||
Reserved("if", "RESERVED"), | ||
Reserved("(", "RESERVED"), | ||
exp, | ||
Reserved(")", "RESERVED"), | ||
Reserved(":", "RESERVED"), | ||
exp, | ||
Optional(Sequence( | ||
Reserved("else", "RESERVED"), | ||
Reserved(":", "RESERVED"), | ||
exp))) | ||
|
||
if __name__ == "__main__": | ||
b = Reserved("if", "RESERVED") | ||
print b([("if", "RESERVED")], 1) | ||
print b([("foo", "RESERVED")], 0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import sys | ||
import re | ||
|
||
def lexer (character_stream, token_regex_list): | ||
tokens = [] | ||
char_pos = 0 | ||
compiled_regexes = [ (re.compile(p), tag) for p, tag in token_regex_list ] | ||
while char_pos < len(character_stream): | ||
match = None | ||
for token_regex in compiled_regexes: | ||
regex, tag = token_regex | ||
#regex = re.compile(pattern) | ||
match = regex.match(character_stream, char_pos) | ||
if match: | ||
matched_text = match.group(0) | ||
if tag: | ||
token = (matched_text, tag) | ||
tokens.append(token) | ||
break | ||
if not match: | ||
sys.stderr.write('Error! Invalid character: %s ' % characters[pos]) | ||
sys.exit(1) | ||
else: | ||
char_pos = match.end(0) | ||
return tokens |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
n := 5; | ||
p := 1; | ||
while n > 0 do | ||
p := p * n; | ||
n := n - 1 | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
import sys | ||
from chatlang_lexer import * | ||
|
||
if __name__ == '__main__': | ||
filename = sys.argv[1] | ||
file = open(filename) | ||
characters = file.read() | ||
file.close() | ||
tokens = chatlang_lexer(characters) | ||
for token in tokens: | ||
print token |