### P0 Grammar is extended with explicit allocation and deallocation

### What is extended/modified?



#### Grammar:
    selector ::= {"." ident | "[" expression "]"}["^"].
    
    type ::=
        ident |
        "array" "[" expression ".." expression "]" "of" type |
        "record" typedIds {";" typedIds} "end". |
        "^" type 

#### Procedure
    - selector(x)
    - factor()
    - term()
    - simpleExpression()
    - expression()
    - statement()
    - typ()

### What is new in extended P0 Grammar?

#### Statements:
* _Assignment statement_: (`x` variable identifier, `p` pointer variable identifer, `f` field identifier, `i`, `e` expressions):
        p^ := e;
        p[i]^ := e;
        p.f^ := e;
        x := p^;
        x := p[i]^;
        x := p.f^;

* _StdProc call_: (`p` pointer variable identifier):
        new(p);
        dispose(p);

#### Types:

* _Pointers_: (`T` type)
        ^T

#### Declarations

* _Type Declaration_ (`p` type identifier, `T` type):
        type p : ^T;
    
* _Variable Declaration_ (`x₁`, `x₂`, … variable identifiers, `T` type):
        var x₁, x₂, ... : ^T;

### The P0 Grammar

    selector ::= {"." ident | "[" expression "]"}["^"].
    factor ::= ident selector | integer | "(" expression ")" | "not" factor.
    term ::= factor {("*" | "div" | "mod" | "and") factor}.
    simpleExpression ::= ["+" | "-"] term {("+" | "-" | "or") term}.
    expression ::= simpleExpression
        {("=" | "<>" | "<" | "<=" | ">" | ">=") simpleExpression}.
    compoundStatement = "begin" statement {";" statement} "end"
    statement ::=
        ident selector ":=" expression |
        ident "(" [expression {"," expression}] ")" |
        compoundStatement |
        "if" expression "then" statement ["else"statement] |
        "while" expression "do" statement.
    type ::=
        ident |
        "array" "[" expression ".." expression "]" "of" type |
        "record" typedIds {";" typedIds} "end". |
        "^" type 
    typedIds ::= ident {"," ident} ":" type.
    declarations ::=
        {"const" ident "=" expression ";"}
        {"type" ident "=" type ";"}
        {"var" typedIds ";"}
        {"procedure" ident ["(" [["var"] typedIds {";" ["var"] typedIds}] ")"] ";"
            declarations compoundStatement ";"}.
    program ::= "program" ident ";" declarations compoundStatement.


In [1]:
import nbimporter
nbimporter.options["only_defs"] = False
import SC  #  used for SC.init, SC.sym, SC.val, SC.error
from SC import TIMES, DIV, MOD, AND, PLUS, MINUS, OR, EQ, NE, LT, GT, \
    LE, GE, PERIOD, COMMA, COLON, RPAREN, RBRAK, OF, THEN, DO, LPAREN, \
    LBRAK, NOT, BECOMES, NUMBER, IDENT, SEMICOLON, END, ELSE, IF, WHILE, \
    ARRAY, RECORD, CARET, CONST, TYPE, VAR, PROCEDURE, BEGIN, PROGRAM, EOF, \
    getSym, mark
import ST  #  used for ST.init
from ST import Var, Ref, Const, Type, Proc, StdProc, Int, Bool, Enum, \
    Record, Array, newDecl, find, openScope, topScope, closeScope, Pointer #Pointer type should be added

Importing Jupyter notebook from SC.ipynb
Importing Jupyter notebook from ST.ipynb


In [None]:
fid = 0

In [2]:
FIRSTFACTOR = {IDENT, NUMBER, LPAREN, NOT}
FOLLOWFACTOR = {TIMES, DIV, MOD, AND, OR, PLUS, MINUS, EQ, NE, LT, LE, GT, GE,
                COMMA, SEMICOLON, THEN, ELSE, RPAREN, RBRAK, DO, PERIOD, END}
FIRSTEXPRESSION = {PLUS, MINUS, IDENT, NUMBER, LPAREN, NOT}
FIRSTSTATEMENT = {IDENT, IF, WHILE, BEGIN}
FOLLOWSTATEMENT = {SEMICOLON, END, ELSE}
FIRSTTYPE = {IDENT, RECORD, ARRAY, LPAREN, CARET} # CARET is added to FIRSTTYPE; Question: why LPAREN in FIRSTTYPE
FOLLOWTYPE = {SEMICOLON}
FIRSTDECL = {CONST, TYPE, VAR, PROCEDURE}
FOLLOWDECL = {BEGIN}
FOLLOWPROCCALL = {SEMICOLON, END, ELSE}
STRONGSYMS = {CONST, TYPE, VAR, PROCEDURE, WHILE, IF, BEGIN, EOF}

Procedure `selector(x)` parses

    selector ::= {"." ident | "[" expression "]"}["^"].

Assuming `x` is the entry for the identifier in front of the selector, generates code for the selector or reports error.

`selector(x)` is extended for handling the cases when x is a Pointer type with base type of array or record; or array of Pointer; or the fields of record are Pointer. If x.tp is Pointer type, then it will return x with unchanged `x.tp` and `x.select` which is the final selected type.

In [1]:
def selector(x):
    field = None
    if type(x.tp) == Pointer and type(x.tp.base) == Array:
        x.dim = 1;
    while SC.sym in {PERIOD, LBRAK}:
        if SC.sym == PERIOD:  #  x.f
            getSym()
            if SC.sym == IDENT:
                if type(x.tp) == Record:
                    for f in x.tp.fields:
                        if f.name == SC.val:
                            x = CG.genSelect(x, f); break
                    else: mark("not a field"); 
                    getSym()
                elif type(x.tp) == Pointer: #x.f^ # x is a Pointer Var; for exmaple, type p = ^record f : integer end; var x : p;
                    if type(x.tp.base) == Record:
                        for f in x.tp.base.fields:
                            if f.name == SC.val:
                                field = f.name
                                x = CG.genSelect(x, f); break
                        #####
                        else: mark("not a field"); x.select = x.tp.base
                        ######
                    else: # for example, type p = ^array [1..3] of record f : integer end; var x : p;
                        for f in x.select.fields: # x.select is Record(Int) at this example
                            if f.name == SC.val:
                                field = f.name
                                x = CG.genSelect(x, f); break
                        else: mark("not a field"); x.select = x.tp.base
                    getSym()
                else: mark("not a record")
            else: mark("identifier expected")
        elif SC.sym == LBRAK:  #  x[y]
            getSym(); y = expression()
            if type(x.tp) == Array:
                if y.tp == Int:
                    if type(y) == Const and \
                       (y.val < x.tp.lower or y.val >= x.tp.lower + x.tp.length):
                        mark('index out of bounds')
                    else:
                        x = CG.genIndex(x, y)
                else: mark('index not integer')
            elif type(x.tp) == Pointer: # x[y]^
                if y.tp == Int:
                    if type(x.tp.base) == Record:# for example, type p = ^record of tt: array [1..3] of integer end; var x : p;
                        for i in x.tp.base.fields:
                            if field == i.name:
                                if type(y) == Const and (y.val < i.tp.lower or y.val >= i.tp.lower + i.tp.length):
                                    mark('index out of bounds');
                        else:
                            x = CG.genIndex(x, y)
                    else: # for example, type p = ^ array [1..3] of integer; var x : p;
                        if type(y) == Const and (y.val < x.tp.base.lower or y.val >= x.tp.base.lower + x.tp.base.length):
                            mark('index out of bounds'); x.select = x.tp.base
                        else:
                            x = CG.genIndex(x, y)
                else: mark('index not integer'); 
            else: mark('not an array')
            if SC.sym == RBRAK: getSym()
            else: mark("] expected")
    if SC.sym == CARET:
        x.flag = True
        getSym()
    return x


Procedure `factor()` parses

    factor ::= ident selector | integer | "(" expression ")" | "not" factor.

and generates code for the factor if no error is reported. If the factor is a constant, a `Const` item is returned (and code may not need to be generated); if the factor is not a constant, the location of the result is returned as determined by the code generator. 

`factor()` is extended for pointer type variable.

In [4]:
def factor():
    if SC.sym not in FIRSTFACTOR:
        mark("expression expected")
        while SC.sym not in FIRSTFACTOR | FOLLOWFACTOR | STRONGSYMS: getSym()
    if SC.sym == IDENT:
        x = find(SC.val)
        if type(x) in {Var, Ref}: 
            x = CG.genVar(x); getSym()
        elif type(x) == Const: x = Const(x.tp, x.val); x = CG.genConst(x); getSym()
        else: mark('expression expected')
        x = selector(x)
    elif SC.sym == NUMBER:
        x = Const(Int, SC.val); x = CG.genConst(x); getSym()
    elif SC.sym == LPAREN:
        getSym(); x = expression()
        if SC.sym == RPAREN: getSym()
        else: mark(") expected")
    elif SC.sym == NOT:
        getSym(); x = factor()
        if x.tp != Bool: # x.tp can be pointer variable with base type bool or selected type is bool
            try: 
                if x.select != Bool:
                    mark('not boolean')
                else:
                    x = CG.genUnaryOp(NOT, x)
            except:
                if type(x.tp) == Pointer:
                    if x.tp.base != Bool:
                        mark('not boolean')
                    else:
                        x = CG.genUnaryOp(NOT, x)
                else:
                    mark('not boolean')
        elif type(x) == Const: x.val = 1 - x.val # constant folding
        else: x = CG.genUnaryOp(NOT, x)
    else: x = Const(None, 0)
    return x

Procedure `term()` parses

    term ::= factor {("*" | "div" | "mod" | "and") factor}.

and generates code for the term if no error is reported. If the term is a constant, a `Const` item is returned (and code may not need to be generated); if the term is not a constant, the location of the result is returned as determined by the code generator. 

`term() is extended for one or both of x, y are Pointer type variable`.

In [5]:
def term():
    x = factor()
    while SC.sym in {TIMES, DIV, MOD, AND}:
        op = SC.sym; getSym();
        if op == AND and type(x) != Const: x = CG.genUnaryOp(AND, x)
        y = factor() # x op y
        if x.tp == Int == y.tp and op in {TIMES, DIV, MOD}:
            if type(x) == Const == type(y): # constant folding
                if op == TIMES: x.val = x.val * y.val
                elif op == DIV: x.val = x.val // y.val
                elif op == MOD: x.val = x.val % y.val
            else: x = CG.genBinaryOp(op, x, y)
        elif x.tp == Bool == y.tp and op == AND:
            if type(x) == Const: # constant folding
                if x.val: x = y # if x is true, take y, else x
            else: x = CG.genBinaryOp(AND, x, y)
        else: # modified for Pointer type
            if type(x.tp) == Pointer and type(y.tp) == Pointer: # both are pointer type
                if x.tp.base not in (Int, Bool) and y.tp.base not in (Int, Bool):
                    if x.select == Int == y.select and op in {TIMES, DIV, MOD}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.select == Bool == y.select and op == AND:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                elif x.tp.base not in (Int, Bool): 
                    if x.select == Int == y.tp.base and op in {TIMES, DIV, MOD}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.select == Bool == y.tp.base and op == AND:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                elif y.tp.base not in (Int, Bool): 
                    if x.tp.base == Int == y.select and op in {TIMES, DIV, MOD}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp.base == Bool == y.select and op == AND:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                else:
                    if x.tp.base == Int == y.tp.base and op in {TIMES, DIV, MOD}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp.base == Bool == y.tp.base and op == AND:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
            elif type(x.tp) == Pointer:# x is pointer type
                if x.tp.base not in (Int, Bool):
                    if x.select == Int == y.tp and op in {TIMES, DIV, MOD}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.select == Bool == y.tp and op == AND:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                else:
                    if x.tp.base == Int == y.tp and op in {TIMES, DIV, MOD}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp.base == Bool == y.tp and op == AND:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
            elif type(y.tp) == Pointer: # y is pointer type
                if y.tp.base not in (Int, Bool):
                    if x.tp == Int == y.select and op in {TIMES, DIV, MOD}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp == Bool == y.select and op == AND:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                else:
                    if x.tp == Int == y.tp.base and op in {TIMES, DIV, MOD}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp == Bool == y.tp.base and op == AND:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
            else:
                mark('bad type')
    return x

Procedure `simpleExpression()` parses

    simpleExpression ::= ["+" | "-"] term {("+" | "-" | "or") term}.

and generates code for the simple expression if no error is reported. If the simple expression is a constant, a `Const` item is returned (and code may not need to be generated); the simple expression is not constant, the location of the result is returned as determined by the code generator. 

`simpleExpression()` is extended for pointer type variable.

In [6]:
def simpleExpression():
    if SC.sym == PLUS:
        getSym(); x = term()
    elif SC.sym == MINUS:
        getSym(); x = term()
        if x.tp != Int:# x.tp can be pointer variable with base type Int or selected type is Int
            try: 
                if x.select != Int:
                    mark('not Int')
                else:
                    x = CG.genUnaryOp(MINUS, x)
            except:
                if type(x.tp) == Pointer:
                    if x.tp.base != Int:
                        mark('not Int')
                    else:
                        x = CG.genUnaryOp(MINUS, x)
                else:
                    mark('not Int')
        elif type(x) == Const: x.val = - x.val # constant folding
        else: x = CG.genUnaryOp(MINUS, x)
    else: x = term()
    while SC.sym in {PLUS, MINUS, OR}:
        op = SC.sym; getSym()
        if op == OR and type(x) != Const: x = CG.genUnaryOp(OR, x)
        y = term() # x op y
        if x.tp == Int == y.tp and op in {PLUS, MINUS}:
            if type(x) == Const == type(y): # constant folding
                if op == PLUS: x.val = x.val + y.val
                elif op == MINUS: x.val = x.val - y.val
            else: x = CG.genBinaryOp(op, x, y)
        elif x.tp == Bool == y.tp and op == OR:
            if type(x) == Const: # constant folding
                if not x.val: x = y # if x is false, take y, else x
            else: x = CG.genBinaryOp(OR, x, y)
        else: # one or both of x, y are pointer type variable
            if type(x.tp) == Pointer and type(y.tp) == Pointer: # both are pointer type
                if x.tp.base not in (Int, Bool) and y.tp.base not in (Int, Bool):
                    if x.select == Int == y.select and op in {PLUS, MINUS}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.select == Bool == y.select and op == OR:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                elif x.tp.base not in (Int, Bool): 
                    if x.select == Int == y.tp.base and op in {PLUS, MINUS}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.select == Bool == y.tp.base and op == OR:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                elif y.tp.base not in (Int, Bool): 
                    if x.tp.base == Int == y.select and op in {PLUS, MINUS}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp.base == Bool == y.select and op == OR:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                else:
                    if x.tp.base == Int == y.tp.base and op in {PLUS, MINUS}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp.base == Bool == y.tp.base and op == OR:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
            elif type(x.tp) == Pointer:# x is pointer type
                if x.tp.base not in (Int, Bool):
                    if x.select == Int == y.tp and op in {PLUS, MINUS}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.select == Bool == y.tp and op == OR:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                else:
                    if x.tp.base == Int == y.tp and op in {PLUS, MINUS}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp.base == Bool == y.tp and op == OR:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
            elif type(y.tp) == Pointer: # y is pointer type
                if y.tp.base not in (Int, Bool):
                    if x.tp == Int == y.select and op in {PLUS, MINUS}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp == Bool == y.select and op == OR:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
                else:
                    if x.tp == Int == y.tp.base and op in {PLUS, MINUS}:
                        x = CG.genBinaryOp(op, x, y)
                    elif x.tp == Bool == y.tp.base and op == OR:
                        x = CG.genBinaryOp(AND, x, y)
                    else:
                        mark('bad type')
            else:
                mark('bad type')
    return x

Procedure `expression()` parses

    expression ::= simpleExpression
                 {("=" | "<>" | "<" | "<=" | ">" | ">=") simpleExpression}.

and generates code for the term if no error is reported. The location of the result is returned as determined by the code generator.

`expression()` is extended for pointer type variable

In [7]:
def expression():
    x = simpleExpression()
    while SC.sym in {EQ, NE, LT, LE, GT, GE}:
        op = SC.sym; getSym(); y = simpleExpression() # x op y
        if x.tp == y.tp in (Int, Bool):
            if type(x) == Const == type(y): # constant folding
                if op == EQ: x.val = x.val == y.val
                elif op == NE: x.val = x.val != y.val
                elif op == LT: x.val = x.val < y.val
                elif op == LE: x.val = x.val <= y.val
                elif op == GT: x.val = x.val > y.val
                elif op == GE: x.val = x.val >= y.val
                x.tp = Bool
            else: x = CG.genRelation(op, x, y)
        else: # one or both of x, y is/are pointer type variable
            if type(x.tp) == Pointer and type(y.tp) == Pointer: # both are pointer type variable
                if x.tp.base not in (Int, Bool) and y.tp.base not in (Int, Bool):
                    if x.select == y.select:
                        x = CG.genRelation(op, x, y)
                    else:
                        mark('bad type')
                elif x.tp.base not in (Int, Bool): 
                    if x.select == y.tp.base:
                        x = CG.genRelation(op, x, y)
                    else:
                        mark('bad type')
                elif y.tp.base not in (Int, Bool): 
                    if x.tp.base == y.select:
                        x = CG.genRelation(op, x, y)
                    else:
                        mark('bad type')
                else:
                    if x.tp.base == y.tp.base:
                        x = CG.genRelation(op, x, y)
                    else:
                        mark('bad type')
            elif type(x.tp) == Pointer:# x is pointer type variable
                if x.tp.base not in (Int, Bool):
                    if x.select == y.tp:
                        x = CG.genRelation(op, x, y)
                    else:
                        mark('bad type')
                else:
                    if x.tp.base == y.tp:
                        x = CG.genRelation(op, x, y)
                    else:
                        mark('bad type')
            elif type(y.tp) == Pointer: # y is pointer type variable
                if y.tp.base not in (Int, Bool):
                    if x.tp == y.select:
                        x = CG.genRelation(op, x, y)
                    else:
                        mark('bad type')
                else:
                    if x.tp == y.tp.base:
                        x = x = CG.genRelation(op, x, y)
                    else:
                        mark('bad type')
            else:
                mark('bad type')
    return x

Procedure `compoundStatement()` parses

    compoundStatement ::= "begin" statement {";" statement} "end"

and generates code for the term if no error is reported. A result is returned as determined by the code generator.

In [8]:
def compoundStatement():
    if SC.sym == BEGIN: getSym()
    else: mark("'begin' expected")
    x = statement()
    while SC.sym == SEMICOLON or SC.sym in FIRSTSTATEMENT:
        if SC.sym == SEMICOLON: getSym()
        else: mark("; missing")
        y = statement(); x = CG.genSeq(x, y)
    if SC.sym == END: getSym()
    else: mark("'end' expected")
    return x

`statement()` is modified for assignment of pointer type variable.

    statement ::=
        ident selector ":=" expression |
        ident "(" [expression {"," expression}] ")" |
        compoundStatement |
        "if" expression "then" statement ["else"statement] |
        "while" expression "do" statement.
        
Examples of assignment statement about the Pointer type variables:

    p^ := 4;
    p[3].ff^ := 6;
    x := p^;
    p^ := q^;
    p := q;   -> p and q are two Pointer Var
    ...
    
Two stdProc for explicit allocation and deallocation:

    new(p);      -> p is a Pointer Var
    dispose(p);  -> p is a Pointer Var

In [9]:
def statement():
    if SC.sym not in FIRSTSTATEMENT:
        mark("statement expected"); getSym()
        while SC.sym not in FIRSTSTATEMENT | FOLLOWSTATEMENT | STRONGSYMS : getSym()
    if SC.sym == IDENT:
        x = find(SC.val); getSym()
        if type(x) in {Var, Ref}:
            x = CG.genVar(x); x = selector(x)
            try:
                if type(x.tp) != Pointer:
                    if x.flag == True:
                        mark(x.name+"is not Pointer")
                else:
                    try:
                        if x.flag: # flag is True, this means that the variable associated with the Pointer is referred
                            pass
                    except:
                        x.flag = False # else set flag to false
            except:
                pass
            if SC.sym == BECOMES:
                getSym(); y = expression()
                if type(x.tp) == Pointer and x.flag == False: # should be p1 := p2;
                    if type(y.tp) != Pointer:
                        mark('incompatible assignment')
                    else:
                        try:
                            if y.flag:
                                mark('incompatible assignment')
                        except:
                            pass
                elif type(x.tp) == Pointer and x.flag == True and type(y.tp) == Pointer: # should be p1^ := p2^;
                    try:
                        if not y.flag:
                            mark('incompatible assignment')
                        else:
                            pass
                    except:
                        mark('incompatible assignment')
                elif type(x.tp) != Pointer and type(y.tp) == Pointer: # x is not a Pointer Var, so y should dereference using CARET symbol
                    try:
                        if y.flag:
                            pass
                    except:
                        mark('^ expected')
                # handle assignment
                if x.tp == y.tp in {Bool, Int}: x = CG.genAssign(x, y)
                elif type(y.tp) == Pointer and type(x.tp) == Pointer and x.flag == False: # p := q;
                    x = CG.genAssign1(x, y)
                elif type(y.tp) == Pointer and type(x.tp) == Pointer and x.flag == True: # p^ := q^;
                    x = CG.genAssign(x, y)
                elif type(y.tp) == Pointer: # x := p^ -> p is a pointer typed variable
                    if x.tp == y.tp.base in {Bool, Int}:
                        x = CG.genAssign(x, y)
                    else:
                        try:
                            if x.tp == y.select in {Bool, Int}: # y is a Pointer Var but associated variable is array or record
                                x = CG.genAssign(x, y)          # y.select is the target type after the selector
                            else:
                                mark('incompatible assignment')
                        except:
                            mark('incompatible assignment')
                elif type(x.tp) == Pointer: # p^ := x; 
                    if x.tp.base == y.tp in {Bool, Int}:
                        x = CG.genAssign(x, y)
                    else:
                        try:
                            if x.select == y.tp in {Bool, Int}: # x.tp.base is Array or Record
                                x = CG.genAssign(x, y)
                            else:
                                mark('incompatible assignment')
                        except:
                            mark('incompatible assignment')
                else: mark('incompatible assignment')
            elif SC.sym == EQ:
                mark(':= expected'); getSym(); y = expression()
            else: mark(':= expected')
        elif type(x) in {Proc, StdProc}:
            fp, ap, i = x.par, [], 0   #  list of formals, list of actuals
            if SC.sym == LPAREN:
                getSym()
                if SC.sym in FIRSTEXPRESSION:
                    y = expression()
                    if x.name == 'write' and type(y.tp) == Pointer: # the printed value should be the value of the variable associated with the Pointer, not the Pointer itself
                        try:
                            if y.flag:
                                pass
                        except:
                            mark('^ expected')
                    if i < len(fp):
                        if (type(fp[i]) == Var or type(y) == Var) and \
                           (fp[i].tp == y.tp or type(fp[i].tp) == type(fp[i].tp)):
                            if type(x) == Proc:
                                ap.append(CG.genActualPara(y, fp[i], i))
                        else: mark('illegal parameter mode')
                    else: mark('extra parameter')
                    i = i + 1
                    while SC.sym == COMMA:
                        getSym()
                        y = expression()
                        if x.name == 'write' and type(y.tp) == Pointer: # the printed value should be the value of the variable associated with the Pointer, not the Pointer itself
                            try:
                                if y.flag:
                                    pass
                            except:
                                mark('^ expected')
                        if i < len(fp):
                            if (type(fp[i]) == Var or type(y) == Var) and \
                               fp[i].tp == y.tp:
                                if type(x) == Proc:
                                    ap.append(CG.genActualPara(y, fp[i], i))
                            else: mark('illegal parameter mode')
                        else: mark('extra parameter')
                        i = i + 1
                if SC.sym == RPAREN: getSym()
                else: mark("')' expected")
            if i < len(fp): mark('too few parameters')
            elif type(x) == StdProc:
                if x.name == 'read': x = CG.genRead(y)
                elif x.name == 'write': x = CG.genWrite(y)
                elif x.name == 'writeln': x = CG.genWriteln()
                elif x.name == 'new': x = CG.genNew(y) ## new(p); StdProc for allocating object on the heap
                elif x.name == 'dispose': x = CG.genDispose(y) ## dispose(p); StdProc for deallocated object on the heap
            else: x = CG.genCall(x, ap)
        else: mark("variable or procedure expected")
    elif SC.sym == BEGIN: x = compoundStatement()
    elif SC.sym == IF:
        getSym(); x = expression();
        if x.tp == Bool: x = CG.genThen(x)
        else: mark('boolean expected')
        if SC.sym == THEN: getSym()
        else: mark("'then' expected")
        y = statement()
        if SC.sym == ELSE:
            if x.tp == Bool: y = CG.genElse(x, y)
            getSym(); z = statement()
            if x.tp == Bool: x = CG.genIfElse(x, y, z)
        else:
            if x.tp == Bool: x = CG.genIfThen(x, y)
    elif SC.sym == WHILE:
        getSym(); t = CG.genWhile(); x = expression()
        if x.tp == Bool: x = CG.genDo(x)
        else: mark('boolean expected')
        if SC.sym == DO: getSym()
        else: mark("'do' expected")
        y = statement()
        if x.tp == Bool: x = CG.genWhileDo(t, x, y)
    else: x = None
    return x

Procedure `typ` parses

    type ::= ident |
             "array" "[" expression ".." expression "]" "of" type |
             "record" typedIds {";" typedIds} "end" |
             "^" type

and returns a type descriptor if not error is reported. The array bound are checked to be constants; the lower bound must be smaller or equal to the upper bound. `"^" type` is added to implement `Pointer` type for implementing explicit allocation and deallocation.

Pointer type can be declared as the following:

    type p = ^integer;

In [10]:
def typ():
    global fid
    if SC.sym not in FIRSTTYPE:
        mark("type expected")
        while SC.sym not in FIRSTTYPE | FOLLOWTYPE | STRONGSYMS: getSym()
    if SC.sym == IDENT:
        ident = SC.val; x = find(ident); getSym()
        if type(x) == Type: x = Type(x.val);
        else: mark('not a type'); x = Type(None)
    elif SC.sym == ARRAY:
        getSym()
        if SC.sym == LBRAK: getSym()
        else: mark("'[' expected")
        x = expression()
        if SC.sym == PERIOD: getSym()
        else: mark("'.' expected")
        if SC.sym == PERIOD: getSym()
        else: mark("'.' expected")
        y = expression()
        if SC.sym == RBRAK: getSym()
        else: mark("']' expected")
        if SC.sym == OF: getSym()
        else: mark("'of' expected")
        z = typ().val;
        if type(x) != Const or x.val < 0:
            mark('bad lower bound'); x = Type(None)
        elif type(y) != Const or y.val < x.val:
            mark('bad upper bound'); x = Type(None)
        else: x = Type(CG.genArray(Array(z, x.val, y.val - x.val + 1)))
    elif SC.sym == RECORD:
        fid = 0
        getSym(); openScope(); typedIds(Var)
        while SC.sym == SEMICOLON:
            getSym(); typedIds(Var)
        if SC.sym == END: getSym()
        else: mark("'end' expected")
        r = topScope(); closeScope()
        x = Type(CG.genRec(Record(r)))
    elif SC.sym == CARET: ## CARET is '^'
        getSym()
        t = typ().val #the base type of the Pointer; 
        if t != None:
            x = Type(CG.genPtr(Pointer(t))) #generate Pointer type
        else:
            x = Type(None)
    else: x = Type(None)
    return x

Procedure `typeIds(kind)` parses

    typedIds ::= ident {"," ident} ":" type.

and updates the top scope of symbol table; an error is reported if an identifier is already in the top scope. The parameter `kind` is assumed to be callable and applied to the type before an identifier and its type are entered in the symbol table.

In [11]:
def typedIds(kind):
    global fid
    if SC.sym == IDENT: tid = [SC.val]; getSym()
    else: mark("identifier expected"); tid = []
    while SC.sym == COMMA:
        getSym()
        if SC.sym == IDENT: tid.append(SC.val); getSym()
        else: mark('identifier expected')
    if SC.sym == COLON:
        getSym(); tp = typ().val
        if tp != None:
            for i in tid: 
                a = kind(tp)
                a.id = fid
                fid += 1
                newDecl(i, a);
    else: mark("':' expected")

Procedure `declarations(allocVar)` parses

    declarations ::=
        {"const" ident "=" expression ";"}
        {"type" ident "=" type ";"}
        {"var" typedIds ";"}
        {"procedure" ident ["(" [["var"] typedIds {";" ["var"] typedIds}] ")"] ";"
            declarations compoundStatement ";"}

and updates the top scope of symbol table; an error is reported if an identifier is already in the top scope. An error is also reported if the expression of a constant declarations is not constant. For each procedure, a new scope is opened for its formal parameters and local declarations, the formal parameters and added to the symbol table, and code is generated for the body. The size of the variable declarations is returned, as determined by calling paramater `allocVar`.

Pointer variable can be declared as the following:

    type p = ^integer;    -> Pointer type with base type, Int
    var x : p;            -> x is defined as Pointer variable

In [12]:
def declarations(allocVar):
    if SC.sym not in FIRSTDECL | FOLLOWDECL:
        mark("'begin' or declaration expected")
        while SC.sym not in FIRSTDECL | FOLLOWDECL | STRONGSYMS: getSym()
    while SC.sym == CONST:
        getSym()
        if SC.sym == IDENT:
            ident = SC.val; getSym()
            if SC.sym == EQ: getSym()
            else: mark("= expected")
            x = expression()
            if type(x) == Const: newDecl(ident, x)
            else: mark('expression not constant')
        else: mark("constant name expected")
        if SC.sym == SEMICOLON: getSym()
        else: mark("; expected")
    while SC.sym == TYPE:
        getSym()
        if SC.sym == IDENT:
            ident = SC.val; getSym()
            if SC.sym == EQ: getSym()
            else: mark("= expected")
            x = typ(); 
            newDecl(ident, x)  #  x is of type ST.Type
            if SC.sym == SEMICOLON: getSym()
            else: mark("; expected")
        else: mark("type name expected")
    start = len(topScope())
    while SC.sym == VAR:
        getSym(); typedIds(Var)
        if SC.sym == SEMICOLON: getSym()
        else: mark("; expected")
    varsize = allocVar(topScope(), start)#???
    while SC.sym == PROCEDURE:
        getSym()
        if SC.sym == IDENT: getSym()
        else: mark("procedure name expected")
        ident = SC.val; newDecl(ident, Proc([])) #  entered without parameters
        sc = topScope()
        openScope() # new scope for parameters and body
        if SC.sym == LPAREN:
            getSym()
            if SC.sym in {VAR, IDENT}:
                if SC.sym == VAR: getSym(); typedIds(Ref)
                else: typedIds(Var)
                while SC.sym == SEMICOLON:
                    getSym()
                    if SC.sym == VAR: getSym(); typedIds(Ref)
                    else: typedIds(Var)
            else: mark("formal parameters expected")
            fp = topScope()
            sc[-1].par = fp[:] #  procedure parameters updated
            if SC.sym == RPAREN: getSym()
            else: mark(") expected")
        else: fp = []
        parsize = CG.genProcStart(ident, fp)
        if SC.sym == SEMICOLON: getSym()
        else: mark("; expected")
        localsize = declarations(CG.genLocalVars)
        CG.genProcEntry(ident, parsize, localsize)
        x = compoundStatement(); CG.genProcExit(x, parsize, localsize)
        closeScope() #  scope for parameters and body closed
        if SC.sym == SEMICOLON: getSym()
        else: mark("; expected")
    return varsize

Procedure `program` parses

        program ::= "program" ident ";" declarations compoundStatement
 
and returns the generated code if no error is reported. The standard identifiers are entered initially in the symbol table.

In [13]:
def program():
    newDecl('boolean', Type(CG.genBool(Bool)))
    newDecl('integer', Type(CG.genInt(Int)))
    newDecl('true', Const(Bool, 1))
    newDecl('false', Const(Bool, 0))
    newDecl('read', StdProc([Ref(Int)]))
    newDecl('write', StdProc([Var(Int)]))
    newDecl('writeln', StdProc([]))
    newDecl('new', StdProc([Var(Pointer(Int))]))
    newDecl('dispose', StdProc([Var(Pointer(Int))]))
    CG.genProgStart()
    if SC.sym == PROGRAM: getSym()
    else: mark("'program' expected")
    ident = SC.val
    if SC.sym == IDENT: getSym()
    else: mark('program name expected')
    if SC.sym == SEMICOLON: getSym()
    else: mark('; expected')
    declarations(CG.genGlobalVars)
    CG.genNewHeap()
    CG.genProgEntry(ident)
    x = compoundStatement()
    return CG.genProgExit(x)

Procedure `compileString(src, dstfn, target)` compiles the source as given by string `src`; if `dstfn` is provided, the code is written to a file by that name, otherwise printed on the screen. If `target` is omitted, MIPS code is generated.

In [14]:
def compileString(src, dstfn = None, target = 'wat'):
    global CG
    if target == 'wat': import CGwat as CG
    elif target == 'mips': import CGmips as CG
    elif target == 'ast': import CGast as CG
    else: print('unknown target'); return
    try:
        SC.init(src); ST.init(); p = program()
        if p != None and not SC.error:
            if dstfn == None: print(p)
            else:
                with open(dstfn, 'w') as f: f.write(p)
                print(p)
    except Exception as msg:
        # raise Exception(str(msg))
        print(msg)

Procedure `compileFile(srcfn, target)` compiles the file named `scrfn`, which must have the extension `.p`, and generates assembly code in a file with extension `.s`. If `target` is omitted, MIPS code is generated.

In [15]:
def compileFile(srcfn, target = 'wat'):
    if srcfn.endswith('.p'):
        with open(srcfn, 'r') as f: src = f.read()
        dstfn = srcfn[:-2] + '.s'
        compileString(src, dstfn, target)
    else: print("'.p' file extension expected")

Sample usage (in code cell):

    cd /path/to/my/prog
    compileFile('myprog.p')