# LL Parser

Consider the following small LL(1) grammar:

```
1. S → F
2. S → ( S + F )
3. F → a
```

and parse the following input: **( a + a )**

First, we impement lexical analyser
- Define `TOKEN` type that lists of token in our language

In [1]:
type TOKEN = LPAR | RPAR | PLUS | MINUS | A | B | C | INVALID | END

// lexical analyser
let tokenize input =
    if Seq.isEmpty input then
        TOKEN.END, Seq.empty<char>
    else
        let C = Seq.head input
        let S = Seq.tail input
        let T = match C with
                | '(' -> TOKEN.LPAR
                | ')' -> TOKEN.RPAR
                | '+' -> TOKEN.PLUS
                | '-' -> TOKEN.MINUS
                | 'a' -> TOKEN.A
                | 'b' -> TOKEN.B
                | 'c' -> TOKEN.C
                | _   -> TOKEN.INVALID
        T, S

In [2]:
tokenize "a+a"

Item1,Item2
A,"[ +, a ]"


In [3]:
// Function generates a sequance of tokens from the input string
let tokens input =
    let tokenizeNext state =
        match state with
        | TOKEN.END, _ -> None
        | _ -> let S = state |> snd |> tokenize
               Some(S, S)
    Seq.unfold (tokenizeNext) (TOKEN.INVALID, input)

In [4]:
tokens "(a+a)"

index,Item1,Item2
0,LPAR,"[ a, +, a, ) ]"
1,A,"[ +, a, ) ]"
2,PLUS,"[ a, ) ]"
3,A,[ ) ]
4,RPAR,[ ]
5,END,[ ]


In [5]:
for (t,s) in tokens "(a+a)" do
    printf "%A " t

LPAR A PLUS A RPAR END 

Next, we construct a parsing table for this grammar by expanding all the terminals by column and all nonterminals by row: 

N\T|(|)|a|+|$
---|-|-|-|-|
S  |2| |1| |
F  | | |3| |


Then, we define types requred to construct grammar rules, and define grammar and above table

In [6]:
type RULE = S | F

type SYMBOL =
    | Terminal of TOKEN
    | NonTerminal of RULE
    | Error

let grammar = [|
    (SYMBOL.NonTerminal RULE.S, [SYMBOL.NonTerminal RULE.F]);  // S → F
    (SYMBOL.NonTerminal RULE.S, [SYMBOL.Terminal TOKEN.LPAR;   // S → ( S + F )
                                 SYMBOL.NonTerminal RULE.S;
                                 SYMBOL.Terminal TOKEN.PLUS;
                                 SYMBOL.NonTerminal RULE.F;
                                 SYMBOL.Terminal TOKEN.RPAR]); // F → a
    (SYMBOL.NonTerminal RULE.F, [SYMBOL.Terminal TOKEN.A])
|]

let table = [
     (RULE.S, TOKEN.A);    // Rule 1: parse table location [S, a]
     (RULE.S, TOKEN.LPAR); // Rule 2: parse table location [S, )]
     (RULE.F, TOKEN.A);    // Rule 3: parse table location [F, a]
];

In [7]:
// Stack helper functions
let push sym stk = sym::stk

let top stk = List.head stk

let pop stk =
    match stk with
    | top::tl -> top, tl
    | _ -> SYMBOL.Error, List.empty<SYMBOL>

Write a syntactic analyser function `parser` that accepts:
- grammar description
- parse table
- string input

In [8]:
// syntactic analyser
let parser (grammar: (SYMBOL * SYMBOL list)[]) table input =
    // Push a $ on the stack
    // Initialize the stack to the start symbol.
    let stack = List.empty<SYMBOL> |>
                push (Terminal TOKEN.END) |>
                push (NonTerminal RULE.S)

    let rec analyse stack (token, input) =
        printfn "Token: %A, Stack: %A" token stack
        if List.length stack > 0 then
            // take element from top of the stack
            let sym = top stack
            // get next token
            match sym with
            | Terminal term ->
                if term = token then // input symbol matches terminal
                    // pop stack
                    let t, new_stack = pop stack
                    printfn "pop %A" t
                    analyse new_stack (tokenize input) // advance input
                else
                    failwith (sprintf "bad term on input: %A" token)
            | NonTerminal nterm ->
                begin
                    // Use nonterminal and current input symbol to find correct production in table.
                    printfn "svalue: %A, token: %A" nterm token
                    let rule_idx = 
                        try 
                            List.findIndex (fun (r,t) -> r = nterm && t = token) table
                        with
                            | :? System.Collections.Generic.KeyNotFoundException -> failwith (sprintf "No rule found for %A → %A" nterm token)                    
                    let rule = grammar.[rule_idx]
                    printfn "%d: %A → %A" (rule_idx + 1) (fst rule) (snd rule)

                    // Pop stack
                    // Push right-hand side of production from table onto stack, last symbol first.
                    let new_stack = List.append (snd rule) (stack |> pop |> snd)
                    analyse new_stack (token, input)
                end
            | _ -> failwith "error"
        else ()

    analyse stack (tokenize input)

Test our parser with different inputs

In [9]:
parser grammar table "(a+a)"

Token: LPAR, Stack: [NonTerminal S; Terminal END]
svalue: S, token: LPAR
2: NonTerminal S → [Terminal LPAR; NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR]
Token: LPAR, Stack: [Terminal LPAR; NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR;
 Terminal END]
pop Terminal LPAR
Token: A, Stack: [NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
svalue: S, token: A
1: NonTerminal S → [NonTerminal F]
Token: A, Stack: [NonTerminal F; Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
svalue: F, token: A
3: NonTerminal F → [Terminal A]
Token: A, Stack: [Terminal A; Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
pop Terminal A
Token: PLUS, Stack: [Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
pop Terminal PLUS
Token: A, Stack: [NonTerminal F; Terminal RPAR; Terminal END]
svalue: F, token: A
3: NonTerminal F → [Terminal A]
Token: A, Stack: [Terminal A; Terminal RPAR; Terminal END]
pop Terminal A
Token: RPAR, Stack: [T

In [10]:
parser grammar table "((a+a)+a)"

Token: LPAR, Stack: [NonTerminal S; Terminal END]
svalue: S, token: LPAR
2: NonTerminal S → [Terminal LPAR; NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR]
Token: LPAR, Stack: [Terminal LPAR; NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR;
 Terminal END]
pop Terminal LPAR
Token: LPAR, Stack: [NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
svalue: S, token: LPAR
2: NonTerminal S → [Terminal LPAR; NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR]
Token: LPAR, Stack: [Terminal LPAR; NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR;
 Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
pop Terminal LPAR
Token: A, Stack: [NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal PLUS;
 NonTerminal F; Terminal RPAR; Terminal END]
svalue: S, token: A
1: NonTerminal S → [NonTerminal F]
Token: A, Stack: [NonTerminal F; Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal PLUS;
 NonTerminal F; Terminal RPAR;

In [11]:
parser grammar table "(a+)"

Token: LPAR, Stack: [NonTerminal S; Terminal END]
svalue: S, token: LPAR
2: NonTerminal S → [Terminal LPAR; NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR]
Token: LPAR, Stack: [Terminal LPAR; NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR;
 Terminal END]
pop Terminal LPAR
Token: A, Stack: [NonTerminal S; Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
svalue: S, token: A
1: NonTerminal S → [NonTerminal F]
Token: A, Stack: [NonTerminal F; Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
svalue: F, token: A
3: NonTerminal F → [Terminal A]
Token: A, Stack: [Terminal A; Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
pop Terminal A
Token: PLUS, Stack: [Terminal PLUS; NonTerminal F; Terminal RPAR; Terminal END]
pop Terminal PLUS
Token: RPAR, Stack: [NonTerminal F; Terminal RPAR; Terminal END]
svalue: F, token: RPAR


Unhandled exception: System.Exception: No rule found for F → RPAR
   at FSI_0011.analyse@9(Tuple`2[] grammar, FSharpList`1 table, FSharpList`1 stack, TOKEN tupledArg0, IEnumerable`1 tupledArg1)
   at FSI_0011.parser(Tuple`2[] grammar, FSharpList`1 table, IEnumerable`1 input)
   at <StartupCode$FSI_0014>.$FSI_0014.main@()