In [6]:
from typing import NamedTuple
import re

class Token(NamedTuple):
    class_part: str
    value_part: str
    line_Number: int

def tokenize(code):
    keywords = {'if', 'else', 'do', 'for','print', 'break', 'return','continue','break','new','static','void'}
    AM={'public','private','protected'}
    DT={'int','float','string'}
    token_specification = [
        ('NUMBER',   r'\d+(\.\d*)?'),  # Integer or decimal number
        ('ASSIGN_OP',   r'='),           # Assignment operator
        ('EOL',      r';'),            # Statement terminator
        ('ID',       r'[A-Za-z]+'),    # Identifiers
        ('PM',       r'[+\-]'),      # Plus Minus
        ('MDM',       r'[*\/%]'),    # Multiply Divide Modulus
        ('PUNCT',       r'[-!$^&()|{}\[\]:\<>?,.\/]'), # Special Characters
        ('String', r'("(?:[^"\\]|\\"|\\)*")'),
        ('NEWLINE',  r'\n'),           # Line endings
        ('SKIP',     r'[ \t]+'),       # Skip over spaces and tabs
        ('MISMATCH', r'.'),            # Any other character
       
    ]
    tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
    line_num = 1
    line_start = 0
    for mo in re.finditer(tok_regex, code):
        kind = mo.lastgroup
        value = mo.group()
        
        if kind == 'NUMBER':
            if '.' in value :
                value=float(value)
                kind='Float'
            else:
                value=int(value)
                kind='Integer'
        
        if kind=='String':
            value = value
        
        elif kind == 'ID' and value in keywords:
            kind = 'Keyword'
            
        elif kind == 'ID' and value in AM:
            kind = 'AM'
        elif kind=='ID' and value in DT:
            kind='DT'
             
        elif kind == 'PUNCT':
            kind= 'Special Character'
            
        elif kind == 'NEWLINE':
            line_start = mo.end()
            line_num += 1
            continue
        elif kind == 'SKIP':
            continue
        elif kind == 'MISMATCH':
            raise RuntimeError(f'{value!r} unexpected on line {line_num}')
           # kind='Special Character'
        
        yield Token(kind, value, line_num)

statements = '''
   PUBLIC
   IF quantity THEN
        total = total + price * quantity;
        price= quantity * 0.05
        int a;
        int y;
        y=3 + 4
        "Hello WOrld"
        {}
    ENDIF;
'''
f = open('Source_Code.txt', 'r')
program = f.read()

#print(program)
for token in tokenize(program):
    print(token)


Token(class_part='Keyword', value_part='static', line_Number=1)
Token(class_part='Keyword', value_part='void', line_Number=1)
Token(class_part='ID', value_part='Main', line_Number=1)
Token(class_part='Special Character', value_part='(', line_Number=1)
Token(class_part='Special Character', value_part=')', line_Number=1)
Token(class_part='Special Character', value_part='{', line_Number=2)
Token(class_part='DT', value_part='int', line_Number=3)
Token(class_part='ID', value_part='a', line_Number=3)
Token(class_part='EOL', value_part=';', line_Number=3)
Token(class_part='DT', value_part='float', line_Number=4)
Token(class_part='ID', value_part='b', line_Number=4)
Token(class_part='EOL', value_part=';', line_Number=4)
Token(class_part='DT', value_part='float', line_Number=5)
Token(class_part='ID', value_part='d', line_Number=5)
Token(class_part='EOL', value_part=';', line_Number=5)
Token(class_part='DT', value_part='string', line_Number=6)
Token(class_part='ID', value_part='word', line_Numbe