<a href="https://colab.research.google.com/github/xaemonn/compiler_design/blob/main/LEX_and_YACC_Compiler.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# LEX and YACC Compiler in Colab

Drawbacks:
* Regular interrupts (Ctrl+D, Ctrl+C) for shell won't work in Colab while inputting for program.
<br>Workaround: Store your inputs in a txt file and pass it to the program.

In [31]:
#@title Install *prerqeuisites* (run this cell first to work on LEX/YACC)
!sudo apt install flex bison

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
bison is already the newest version (2:3.8.2+dfsg-1build1).
flex is already the newest version (2.6.4-8build2).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


## Lex only

In [32]:
#@title Writing Lex program

%%writefile program.l

%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int lineno = 1;

#define MAX_IDS 1000
char *id_table[MAX_IDS];
int id_count = 0;

int get_token_num(char *id) {
    for (int i = 0; i < id_count; i++) {
        if (strcmp(id_table[i], id) == 0) {
            return i + 1;
        }
    }
    id_table[id_count] = strdup(id);
    id_count++;
    return id_count;
}
%}

DIGIT       [0-9]
LETTER      [a-zA-Z]
ID          {LETTER}({LETTER}|{DIGIT})*
NUMBER      {DIGIT}+
WS          [ \t]+

%%

"Integer"           { printf("TOKEN: %-15s RETURN: INTEGER\n", yytext); }
"Begin"             { printf("TOKEN: %-15s RETURN: BEGIN\n", yytext); }
"read"              { printf("TOKEN: %-15s RETURN: READ\n", yytext); }
"write"             { printf("TOKEN: %-15s RETURN: WRITE\n", yytext); }
"if"                { printf("TOKEN: %-15s RETURN: IF\n", yytext); }
"then"              { printf("TOKEN: %-15s RETURN: THEN\n", yytext); }
"else"              { printf("TOKEN: %-15s RETURN: ELSE\n", yytext); }
"endif"             { printf("TOKEN: %-15s RETURN: ENDIF\n", yytext); }
"while"             { printf("TOKEN: %-15s RETURN: WHILE\n", yytext); }
"do"                { printf("TOKEN: %-15s RETURN: DO\n", yytext); }
"endwhile"          { printf("TOKEN: %-15s RETURN: ENDWHILE\n", yytext); }
"end"               { printf("TOKEN: %-15s RETURN: END\n", yytext); }

":="                { printf("TOKEN: %-15s RETURN: ASSIGN\n", yytext); }
"<"                 { printf("TOKEN: %-15s RETURN: LT\n", yytext); }
">"                 { printf("TOKEN: %-15s RETURN: GT\n", yytext); }
"=="                { printf("TOKEN: %-15s RETURN: EQ\n", yytext); }
"*"                 { printf("TOKEN: %-15s RETURN: MUL\n", yytext); }
"+"                 { printf("TOKEN: %-15s RETURN: ADD\n", yytext); }
"-"                 { printf("TOKEN: %-15s RETURN: SUB\n", yytext); }
";"                 { printf("TOKEN: %-15s RETURN: SEMICOLON\n", yytext); }
","                 { printf("TOKEN: %-15s RETURN: COMMA\n", yytext); }

{ID} {
    int token_num = get_token_num(yytext);
    printf("TOKEN: %-15s RETURN: IDENTIFIER TOKEN_NUM: %d\n", yytext, token_num);
}

{NUMBER}            { printf("TOKEN: %-15s RETURN: NUMBER\n", yytext); }
{WS}                { /* ignore spaces */ }
\n                  { lineno++; }
.                   { printf("TOKEN: %-15s RETURN: UNKNOWN\n", yytext); }

%%

int main(int argc, char **argv) {
    yylex();
    return 0;
}

int yywrap() {
    return 1;
}

Overwriting program.l


if you want to use at txt as an input

In [33]:
%%writefile input.txt
Integer a, b
Begin
read n;
if a < 10
then
b :=1;
else;
endif
while a < 10
do
b := 5*a;
a := a+1;
endwhile;
write a;
write b;
end

Overwriting input.txt


In [34]:
#@title Shell Execution (you can rewrite the commands as per your need, eg. if you want to include a file as an input)
%%shell

flex program.l
gcc lex.yy.c -o lexer
./lexer < input.txt

TOKEN: Integer         RETURN: INTEGER
TOKEN: a               RETURN: IDENTIFIER TOKEN_NUM: 1
TOKEN: ,               RETURN: COMMA
TOKEN: b               RETURN: IDENTIFIER TOKEN_NUM: 2
TOKEN: Begin           RETURN: BEGIN
TOKEN: read            RETURN: READ
TOKEN: n               RETURN: IDENTIFIER TOKEN_NUM: 3
TOKEN: ;               RETURN: SEMICOLON
TOKEN: if              RETURN: IF
TOKEN: a               RETURN: IDENTIFIER TOKEN_NUM: 1
TOKEN: <               RETURN: LT
TOKEN: 10              RETURN: NUMBER
TOKEN: then            RETURN: THEN
TOKEN: b               RETURN: IDENTIFIER TOKEN_NUM: 2
TOKEN: :=              RETURN: ASSIGN
TOKEN: 1               RETURN: NUMBER
TOKEN: ;               RETURN: SEMICOLON
TOKEN: else            RETURN: ELSE
TOKEN: ;               RETURN: SEMICOLON
TOKEN: endif           RETURN: ENDIF
TOKEN: while           RETURN: WHILE
TOKEN: a               RETURN: IDENTIFIER TOKEN_NUM: 1
TOKEN: <               RETURN: LT
TOKEN: 10              RETURN: NUMBE



## Lex and Yacc combined

In [35]:
#@title Writing YACC program
%%writefile program.y

%{
    #include<stdio.h>
    #include<stdlib.h>
%}
%token DIGIT LETTER UND NL
%%
stmt: variable NL {printf("Valid Identifier\n");exit(0);}
variable: LETTER alphanumeric;
alphanumeric: LETTER alphanumeric
            | DIGIT alphanumeric
            | UND alphanumeric
            | LETTER
            | DIGIT
            | UND;
%%

int yyerror(){
    printf("Invalid Identifier\n");
    exit(0);
}

void main(){
    printf("Enter the variable name: ");
    yyparse();
}

Overwriting program.y


In [36]:
#@title Writing Lex program
%%writefile program.l

%{
    #include "y.tab.h"
%}
%%
[a-zA-Z] {return LETTER;}
[0-9] {return DIGIT;}
[_] {return UND;}
\n {return NL;}
. {return yytext[0];}
%%

Overwriting program.l


if you want to use at txt as an input

In [37]:
%%writefile program.txt

This is a sample file.

Overwriting program.txt


In [None]:
#@title Shell Execution (you can rewrite the commands as per your need, eg. if you want to include a file as an input)
%%shell

yacc -d program.y
lex program.l
cc y.tab.c lex.yy.c -ll
./a.out

[01m[Ky.tab.c:[m[K In function ‘[01m[Kyyparse[m[K’:
 1024 |       yychar = [01;35m[Kyylex[m[K ();
      |                [01;35m[K^~~~~[m[K
 1165 |       [01;35m[Kyyerror[m[K (YY_("syntax error"));
      |       [01;35m[K^~~~~~~[m[K
      |       [32m[Kyyerrok[m[K
Enter the variable name: 