In [15]:
import re

# Define token types
TOKEN_TYPES = [
    ('IF', r'if'),
    ('ELSE', r'else'),
    ('WHILE', r'while'),
    ('FOR', r'for'),
    ('ID', r'[a-zA-Z_][a-zA-Z0-9_]*'),
    ('INT', r'\d+'),
    ('FLOAT', r'\d+\.\d+'),
    ('STRING', r'\".*?\"'),
    ('PLUS', r'\+'),
    ('MINUS', r'-'),
    ('TIMES', r'\*'),
    ('DIVIDE', r'/'),
    ('LPAREN', r'\('),
    ('RPAREN', r'\)'),
    ('LBRACE', r'\{'),
    ('RBRACE', r'\}'),
    ('ASSIGN', r'='),
    ('SEMICOLON', r';'),
    ('NEWLINE', r'\n'),
    ('WS', r'\s+'),  
]

# Concatenate all token regexes into one string for lex to use
TOKEN_REGEX = '|'.join('(?P<%s>%s)' % pair for pair in TOKEN_TYPES)

# Tokenize input string
def tokenize(input_string):
    return [match.group(0) for match in re.finditer(TOKEN_REGEX, input_string)]

# Tokenize input file
def tokenize_file(file_path):
    with open(file_path, 'r') as file:
        input_string = file.read()
    return tokenize(input_string)

def parse(tokens):
    i = 0
    statements = []
    while i < len(tokens):
        if tokens[i] == 'if':
            parse_result, i = parse_if(tokens, i)
            statements.append(parse_result)
        elif tokens[i] in ['int', 'float', 'string']:
            parse_result, i = parse_variable_declaration(tokens, i)
            statements.append(parse_result)
        elif tokens[i] == 'while':
            parse_result, i = parse_while(tokens, i)
            statements.append(parse_result)
        elif tokens[i] in {'\n', ' '}:
            i += 1  
            continue
        else:
            print("No parse result. Unexpected token:", tokens[i])
            return None
    return statements

def parse_if(tokens, i):
    if_index = tokens.index('{', i)
    condition_tokens = tokens[i + 2:if_index - 1] 
    
    # Find the index of '}' token for the if block
    close_brace_index = if_index
    brace_count = 1
    while brace_count != 0:
        close_brace_index += 1
        if tokens[close_brace_index] == '{':
            brace_count += 1
        elif tokens[close_brace_index] == '}':
            brace_count -= 1
    
    if_block_tokens = tokens[if_index + 1:close_brace_index]  
    
    # Find the index of '{' token for the else block
    else_index = close_brace_index + 1
    while tokens[else_index] != '{':
        else_index += 1
    
    # Find the index of '}' token for the else block
    close_else_brace_index = else_index
    brace_count = 1
    while brace_count != 0:
        close_else_brace_index += 1
        if tokens[close_else_brace_index] == '{':
            brace_count += 1
        elif tokens[close_else_brace_index] == '}':
            brace_count -= 1
    
    else_block_tokens = tokens[else_index + 1:close_else_brace_index]  
    
    # Create parse result for if-else statement
    parse_result = ('if_else', parse_expression(condition_tokens), parse_block(if_block_tokens), parse_block(else_block_tokens))
    
    return parse_result, close_else_brace_index + 1  

def parse_variable_declaration(tokens, i):
    end_index = tokens.index(';', i)
    parse_result = ('variable_declaration', tokens[i:end_index + 1])
    return parse_result, end_index + 1  

def parse_while(tokens, i):
    open_brace_index = i + 1
    while tokens[open_brace_index] != '{':
        open_brace_index += 1

    close_brace_index = open_brace_index
    brace_count = 1
    while brace_count != 0:
        close_brace_index += 1
        if tokens[close_brace_index] == '{':
            brace_count += 1
        elif tokens[close_brace_index] == '}':
            brace_count -= 1

    condition_tokens = tokens[i + 2:open_brace_index - 1]  
    block_tokens = tokens[open_brace_index + 1:close_brace_index]  
    parse_result = ('while', parse_expression(condition_tokens), parse_block(block_tokens))
    return parse_result, close_brace_index + 1  

def parse_expression(expression_tokens):
    return ('expression', expression_tokens)

def parse_block(block_tokens):
    statements = []
    statement = []
    for token in block_tokens:
        if token == ';':
            statements.append(parse_statement(statement))
            statement = []
        else:
            statement.append(token)
    return ('block', statements)

def parse_statement(statement_tokens):
    return ('statement', statement_tokens)

# Test the parser with input files
file_paths = ["case1.txt", "case2.txt", "case3.txt"]  

for file_path in file_paths:
    tokens = tokenize_file(file_path)
    print("\nTokens for", file_path, ":", tokens)
    parse_tree = parse(tokens)
    print("\nParse tree for", file_path, ":", parse_tree)
    print("\n\n\n")



Tokens for case1.txt : ['int', ' ', 'x', ' ', '=', ' ', '5', ';', '\n', 'int', ' ', 'y', ' ', '=', ' ', '10', ';', '\n', 'int', ' ', 'result', ';', '\n', '\n', 'if', ' ', '(', 'x', ' ', ' ', 'y', ')', ' ', '{', '\n', '    ', 'result', ' ', '=', ' ', 'x', ';', '\n', '}', ' ', 'else', ' ', '{', '\n', '    ', 'result', ' ', '=', ' ', 'y', ';', '\n', '}', '\n']

Parse tree for case1.txt : [('variable_declaration', ['int', ' ', 'x', ' ', '=', ' ', '5', ';']), ('variable_declaration', ['int', ' ', 'y', ' ', '=', ' ', '10', ';']), ('variable_declaration', ['int', ' ', 'result', ';']), ('if_else', ('expression', ['(', 'x', ' ', ' ', 'y', ')']), ('block', [('statement', ['\n', '    ', 'result', ' ', '=', ' ', 'x'])]), ('block', [('statement', ['\n', '    ', 'result', ' ', '=', ' ', 'y'])]))]





Tokens for case2.txt : ['int', ' ', 'x', ' ', '=', ' ', '5', ';', '\n', 'int', ' ', 'y', ' ', '=', ' ', '6', ';']

Parse tree for case2.txt : [('variable_declaration', ['int', ' ', 'x', ' ', '=', ' ',

In [16]:
def perform_semantic_analysis(parse_tree):
    
    # Define a symbol table to store variable names and their types
    symbol_table = {}
    
    # Recursively traverse the parse tree and perform type checking
    def check_types(node):
        nonlocal symbol_table
        
        if node[0] == 'variable_declaration':
            # Extract variable name and type
            variable_type = node[1][0]
            variable_name = node[1][2]
            
            # Check if variable is already declared
            if variable_name in symbol_table:
                print(f"Error: Variable '{variable_name}' redeclared.")
                return False
            
            # Add variable to symbol table
            symbol_table[variable_name] = variable_type
        
        elif node[0] == 'expression':
            pass
        
        elif node[0] == 'if_else':
            # Recursively check types in if-else statements
            condition = node[1]
            if not check_types(condition):
                return False
            
            if_block = node[2]
            else_block = node[3]
            if not check_types(if_block):
                return False
            if not check_types(else_block):
                return False
        
        elif node[0] == 'while':
            # Recursively check types in while loops
            condition = node[1]
            if not check_types(condition):
                return False
            
            block = node[2]
            if not check_types(block):
                return False
        
        elif node[0] == 'block':
            # Recursively check types in blocks of statements
            for statement in node[1]:
                if not check_types(statement):
                    return False
        
        return True
    
    # Perform semantic analysis starting from the root of the parse tree
    return check_types(parse_tree)


# Test the parser with input files
file_paths = ["case1.txt", "case2.txt", "case3.txt"]  

for file_path in file_paths:
    tokens = tokenize_file(file_path)
    parse_tree = parse(tokens)
    print("\nParse tree for", file_path, ":", parse_tree)
    print("\n")

  
    if parse_tree is not None:
        if perform_semantic_analysis(parse_tree):
            print("\n\nSemantic analysis passed.\n\n")
        else:
            print("\n\nSemantic analysis failed.")



Parse tree for case1.txt : [('variable_declaration', ['int', ' ', 'x', ' ', '=', ' ', '5', ';']), ('variable_declaration', ['int', ' ', 'y', ' ', '=', ' ', '10', ';']), ('variable_declaration', ['int', ' ', 'result', ';']), ('if_else', ('expression', ['(', 'x', ' ', ' ', 'y', ')']), ('block', [('statement', ['\n', '    ', 'result', ' ', '=', ' ', 'x'])]), ('block', [('statement', ['\n', '    ', 'result', ' ', '=', ' ', 'y'])]))]




Semantic analysis passed.



Parse tree for case2.txt : [('variable_declaration', ['int', ' ', 'x', ' ', '=', ' ', '5', ';']), ('variable_declaration', ['int', ' ', 'y', ' ', '=', ' ', '6', ';'])]




Semantic analysis passed.



Parse tree for case3.txt : [('variable_declaration', ['int', ' ', 'x', ' ', '=', ' ', '5', ';']), ('variable_declaration', ['int', ' ', 'result', ' ', '=', ' ', '0', ';']), ('while', ('expression', ['(', 'x', ' ', ' ', '0', ')']), ('block', [('statement', ['\n', '    ', 'result', ' ', '=', ' ', 'result', ' ', '+', ' ', 'x']), ('st

In [17]:
def generate_code(parse_tree):
    code = ""
    for statement in parse_tree:
        code += generate_statement_code(statement) + "\n"
    return code

def generate_statement_code(statement):
    if statement[0] == 'variable_declaration':
        return generate_variable_declaration_code(statement[1])
    elif statement[0] == 'if_else':
        return generate_if_else_code(statement[1])
    elif statement[0] == 'while':
        return generate_while_code(statement[1])
    else:
        return ""  

def generate_variable_declaration_code(declaration_tokens):
    declaration = " ".join(declaration_tokens)
    return declaration

def generate_if_else_code(if_else_tokens):
    condition = generate_expression_code(if_else_tokens[0])  
    if_block_code = generate_block_code(if_else_tokens[1][0]) if if_else_tokens[1] else ""
    else_block_code = generate_block_code(if_else_tokens[2][0]) if if_else_tokens[2] else ""
    code = f"if ({condition}) {{\n{if_block_code}\n}} else {{\n{else_block_code}\n}}"
    return code

def generate_while_code(while_tokens):
    condition = generate_expression_code(while_tokens[0]) 
    block_code = generate_block_code(while_tokens[1])
    code = f"while ({condition}) {{\n{block_code}\n}}"
    return code


def generate_block_code(block_tokens):
    code = ""
    for statement_tokens in block_tokens:
        if statement_tokens[0] == 'statement':
            code += generate_statement_code(statement_tokens[1]) + ";\n"
    return code

def generate_expression_code(expression_tokens):
    # Join tokens while handling nested expressions
    expression = ""
    for token in expression_tokens:
        if isinstance(token, list):  
            expression += generate_expression_code(token)
        else:
            expression += token + " "
    return expression.strip()  

# Test code generation
test_cases = [
    ("Test Case 1", [
        ('variable_declaration', ['int', 'x', '=', '5', ';']),
        ('variable_declaration', ['int', 'y', '=', '10', ';']),
        ('variable_declaration', ['int', 'result', ';']),
        ('if_else', [('expression', ['(', 'x', '>', 'y', ')']), ('block', [[('statement', ['result', '=', 'x'])]]), ('block', [[('statement', ['result', '=', 'y'])]])])
    ]),
    ("Test Case 2", [
        ('variable_declaration', ['int', 'x', '=', '5', ';']),
        ('variable_declaration', ['int', 'y', '=', '6', ';'])
    ]),
    ("Test Case 3", [
        ('variable_declaration', ['int', 'x', '=', '5', ';']),
        ('variable_declaration', ['int', 'result', '=', '0', ';']),
        ('while', [('expression', ['(', 'x', '>', '0', ')']), ('block', [[('statement', ['result', '=', 'result', '+', 'x'])], [('statement', ['x', '=', 'x', '-', '1'])]])])
    ])
]


for name, parse_tree in test_cases:
    print(f"Generated Code for {name}:\n")
    generated_code = generate_code(parse_tree)
    
    print(generated_code)
   




Generated Code for Test Case 1:

int x = 5 ;
int y = 10 ;
int result ;
if (expression ( x > y )) {

} else {

}

Generated Code for Test Case 2:

int x = 5 ;
int y = 6 ;

Generated Code for Test Case 3:

int x = 5 ;
int result = 0 ;
while (expression ( x > 0 )) {

}



In [18]:
def generate_assembly(intermediate_code):
    assembly_code = []
    for operation in intermediate_code:
        if operation[0] == 'assignment':
            assembly_instruction = f"MOV {operation[1]}, {operation[2]}"
        elif operation[0] == 'addition':
            assembly_instruction = f"ADD {operation[1]}, {operation[2]}, {operation[3]}"
        elif operation[0] == 'if_else':
            condition_code = generate_expression_assembly(operation[1][0])
            if_block_code = generate_block_assembly(operation[1][1])
            else_block_code = generate_block_assembly(operation[1][2])
            assembly_instruction = (
                f"IF {condition_code} GOTO {if_block_code} ELSE GOTO {else_block_code}"
            )
        elif operation[0] == 'while':
            condition_code = generate_expression_assembly(operation[1][0])
            block_code = generate_block_assembly(operation[1][1])
            assembly_instruction = (
                f"WHILE {condition_code} DO GOTO {block_code}"
            )
        else:
            assembly_instruction = ""
        assembly_code.append(assembly_instruction)
    return assembly_code

def generate_expression_assembly(expression_tokens):
    if expression_tokens[1] == '>':
        return f"{expression_tokens[0]} > {expression_tokens[2]}"
    elif expression_tokens[1] == '<':
        return f"{expression_tokens[0]} < {expression_tokens[2]}"
    elif expression_tokens[1] == '==':
        return f"{expression_tokens[0]} == {expression_tokens[2]}"
    else:
        return ""  # Placeholder for other types of expressions

def generate_block_assembly(block_tokens):
    assembly_code = []
    for statement_tokens in block_tokens:
        if statement_tokens[0] == 'assignment':
            assembly_instruction = f"MOV {statement_tokens[1]}, {statement_tokens[2]}"
        elif statement_tokens[0] == 'addition':
            assembly_instruction = f"ADD {statement_tokens[1]}, {statement_tokens[2]}, {statement_tokens[3]}"
        else:
            assembly_instruction = ""
        assembly_code.append(assembly_instruction)
    return assembly_code

#Test case 1: Identifiers
intermediate_code = [
    ('assignment', 'x', '5'),
    ('assignment', 'y', '10'),
    ('assignment', 'result', ''),
]

assembly_code = generate_assembly(intermediate_code)
print("Assembly code for test case 1:")
for instruction in assembly_code:
    print(instruction)
print()

# Test case 2: while loop
intermediate_code_while = [
    ('assignment', 'x', '5'),
    ('assignment', 'result', '0'),
    ('while', [('expression', ['(', 'x', '>', '0', ')']), ('block', [[('assignment', 'result', 'result', '+', 'x')], [('assignment', 'x', 'x', '-', '1')]])])
]

assembly_code_while = generate_assembly(intermediate_code_while)
print("Assembly code for test case 2 (while loop):")
for instruction in assembly_code_while:
    print(instruction)
print()

# Test case 3: if-else statement with an equality condition
intermediate_code_if_else = [
    ('assignment', 'x', '5'),
    ('assignment', 'y', '10'),
    ('assignment', 'result', ''),
    ('if_else', [('expression', ['(', 'x', '==', 'y', ')']), ('block', [[('assignment', 'result', 'equal')]]), ('block', [[('assignment', 'result', 'not_equal')]])])
]

assembly_code_if_else = generate_assembly(intermediate_code_if_else)
print("Assembly code for test case 3 (if-else statement with equality condition):")
for instruction in assembly_code_if_else:
    print(instruction)


Assembly code for test case 1:
MOV x, 5
MOV y, 10
MOV result, 

Assembly code for test case 2 (while loop):
MOV x, 5
MOV result, 0
WHILE  DO GOTO ['', '']

Assembly code for test case 3 (if-else statement with equality condition):
MOV x, 5
MOV y, 10
MOV result, 
IF  GOTO ['', ''] ELSE GOTO ['', '']
