In [1]:
def clean_input_file(input_path="in.txt", output_path="intermediate.txt"):
    with open(input_path, 'r') as infile, open(output_path, 'w') as outfile:
        for line in infile:
            line = line.strip()
            if not line or line.startswith('.'):  # skip empty or comment lines
                continue
            if '.' in line:
                line = line.split('.')[0]  # remove inline comments
            # Remove line numbers by skipping the first word if it's numeric
            tokens = line.split()
            if tokens and tokens[0].isdigit():
                tokens = tokens[1:]
            cleaned_line = " ".join(tokens)
            if cleaned_line:
                outfile.write(cleaned_line + '\n')

print("✅ Step 1 complete: intermediate.txt generated.")


✅ Step 1 complete: intermediate.txt generated.


In [2]:
clean_input_file()

In [3]:
optable = {
    'ADD': (0x18, 3), 'ADDF': (0x58, 3), 'ADDR': (0x90, 2),
    'AND': (0x40, 3), 'CLEAR': (0xB4, 2), 'COMP': (0x28, 3),
    'COMPF': (0x88, 3), 'COMPR': (0xA0, 2), 'DIV': (0x24, 3),
    'DIVF': (0x64, 3), 'DIVR': (0x9C, 2), 'FIX': (0xC4, 1),
    'FLOAT': (0xC0, 1), 'HIO': (0xF4, 1), 'J': (0x3C, 3),
    'JEQ': (0x30, 3), 'JGT': (0x34, 3), 'JLT': (0x38, 3),
    'JSUB': (0x48, 3), 'LDA': (0x00, 3), 'LDB': (0x68, 3),
    'LDCH': (0x50, 3), 'LDF': (0x70, 3), 'LDL': (0x08, 3),
    'LDS': (0x6C, 3), 'LDT': (0x74, 3), 'LDX': (0x04, 3),
    'LPS': (0xD0, 3), 'MUL': (0x20, 3), 'MULF': (0x60, 3),
    'MULR': (0x98, 2), 'NORM': (0xC8, 1), 'OR': (0x44, 3),
    'RD': (0xD8, 3), 'RMO': (0xAC, 2), 'RSUB': (0x4C, 3),
    'SHIFTL': (0xA4, 2), 'SHIFTR': (0xA8, 2), 'SIO': (0xF0, 1),
    'STA': (0x0C, 3), 'STB': (0x78, 3), 'STCH': (0x54, 3),
    'STF': (0x80, 3), 'STI': (0xD4, 3), 'STL': (0x14, 3),
    'STS': (0x7C, 3), 'STSW': (0xE8, 3), 'STT': (0x84, 3),
    'STX': (0x10, 3), 'SUB': (0x1C, 3), 'SUBF': (0x5C, 3),
    'SUBR': (0x94, 2), 'SVC': (0xB0, 2), 'TD': (0xE0, 3),
    'TIO': (0xF8, 1), 'TIX': (0x2C, 3), 'TIXR': (0xB8, 2),
    'WD': (0xDC, 3),

}


registers = {
    "A": 0, "X": 1, "L": 2, "B": 3, "S": 4, "T": 5, "F": 6,
    "PC": 8, "SW": 9
}

In [4]:
def generate_location_counters(intermediate_path="intermediate.txt",
                               output_path="out_pass1.txt"):
    locctr = 0
    loc_list = []

    with open(intermediate_path) as f:
        lines = f.readlines()

    # --- handle START ---------------------------------------
    first = lines[0].strip().split()
    if len(first) >= 3 and first[1].upper() == "START":
        locctr = int(first[2], 16)
        loc_list.append((locctr, first))   # record first line
        lines = lines[1:]

    # --- main pass ------------------------------------------
    for raw in lines:
        line = raw.strip()
        if not line:
            continue

        parts = line.split()
        label = opcode = operand = ""

        if len(parts) == 3:
            label, opcode, operand = parts
        elif len(parts) == 2:
            if parts[1].upper() in ("WORD", "RESW", "RESB", "BYTE", "START", "END") or parts[1].startswith("+"):
                label, opcode = parts
            else:
                opcode, operand = parts
        elif len(parts) == 1:
            opcode = parts[0]

        # ---- record current LOCCTR before incrementing -----
        loc_list.append((locctr, [label, opcode, operand]))

        # ---- LOCCTR update logic ---------------------------
        op_uc = opcode.upper()

        if op_uc == "WORD":
            inc = 3
        elif op_uc == "RESW":
            inc = 3 * int(operand)
        elif op_uc == "RESB":
            inc = int(operand)
        elif op_uc == "BYTE":
            if operand.startswith("C'") and operand.endswith("'"):
                inc = len(operand[2:-1])
            elif operand.startswith("X'") and operand.endswith("'"):
                inc = len(operand[2:-1]) // 2
            else:
                inc = 0
        elif op_uc == "END":
            break
        elif opcode.startswith("+"):  # format-4 instruction
            inc = 4
        elif op_uc in optable:  # machine opcodes from OPTAB
            size_flag = optable[op_uc][1]
            inc = 3 if size_flag == "3X" else int(size_flag)
        else:  # unknown instruction or comment
            inc = 0

        locctr += inc  # advance location counter

    # --- write to output file -------------------------------
    with open(output_path, "w") as f:
        for addr, fields in loc_list:
            lbl, opc, opr = (fields + ["", "", ""])[:3]
            f.write(f"{addr:04X} {lbl} {opc} {opr}\n")

    print(f"✅ Location counters written to {output_path}")
    return loc_list


In [5]:
def generate_symbol_table(intermediate_path="intermediate.txt",
                          output_path="symbTable.txt"):

    symbol_table = {}

    # --- read source (already cleaned to `intermediate.txt`) ---
    with open(intermediate_path, "r") as f:
        lines = f.readlines()

    # --- initialise LOCCTR ------------------------------------
    locctr = 0
    first_fields = lines[0].strip().split()
    if len(first_fields) >= 3 and first_fields[1].upper() == "START":
        locctr = int(first_fields[2], 16)            # starting address
        lines = lines[1:]                            # skip START line

    # --- main scan -------------------------------------------
    for raw in lines:
        line = raw.strip()
        if not line:
            continue

        parts = line.split()
        label, opcode, operand = "", "", ""

        if len(parts) == 3:
            label, opcode, operand = parts
        elif len(parts) == 2:
            if parts[1].upper() in (
                "WORD", "RESW", "RESB", "BYTE",
                "START", "END"
            ) or parts[1].startswith("+"):
                label, opcode = parts
            else:
                opcode, operand = parts
        elif len(parts) == 1:
            opcode = parts[0]

        # ---- symbol table entry -----------------------------
        if label and label not in symbol_table:
            symbol_table[label] = locctr

        # ---- LOCCTR increment rules -------------------------
        op_uc = opcode.upper()

        if op_uc == "WORD":
            locctr += 3
        elif op_uc == "RESW":
            locctr += 3 * int(operand)
        elif op_uc == "RESB":
            locctr += int(operand)
        elif op_uc == "BYTE":
            if operand.startswith("C'") and operand.endswith("'"):
                locctr += len(operand[2:-1])
            elif operand.startswith("X'") and operand.endswith("'"):
                locctr += len(operand[2:-1]) // 2
        elif op_uc == "END":
            break
        elif opcode.startswith("+"):
            locctr += 4                                # format-4
        elif op_uc in optable:
            size_flag = optable[op_uc][1]              # 1,2,3,4 or '3X'
            if size_flag == "3X":
                locctr += 3
            else:
                locctr += int(size_flag)

    # --- write symbol table ----------------------------------
    with open(output_path, "w") as f:
        for lbl, addr in symbol_table.items():
            f.write(f"{lbl} {addr:04X}\n")

    print(f"✅ Symbol table written to {output_path}")
    return symbol_table


In [6]:
loc_list = generate_location_counters()
symtab = generate_symbol_table(intermediate_path="intermediate.txt", output_path="symbTable.txt")


✅ Location counters written to out_pass1.txt
✅ Symbol table written to symbTable.txt


In [7]:
def generate_object_code(intermediate,
                         op_table,
                         symbol_table_file,
                         registers,
                         output_path,
                         base_register=None):

    object_code_lines = []

    # --- load symbol table as int addresses ------------------
    symbol_table = {}
    with open(symbol_table_file, "r") as f:
        for line in f:
            lbl, addr = line.strip().split()[:2]
            symbol_table[lbl] = int(addr, 16)

    # --- scan intermediate listing ---------------------------
    with open(intermediate, "r") as src:
        for raw in src:
            parts = raw.strip().split()

            # ------- robust parsing of columns ---------------
            loc = label = opcode = operand = ""
            if len(parts) == 4:           # loc  lbl  opc  opr
                loc, label, opcode, operand = parts
            elif len(parts) == 3:         # loc  opc  opr
                loc, opcode, operand = parts
            elif len(parts) == 2:         # loc  opc
                loc, opcode = parts
            elif len(parts) == 1:
                opcode = parts[0]
            else:                         # blank / malformed
                continue

            obj   = ""
            fmt   = 0
            ni = x = b = p = e = 0        # flags defaults
            current_loc = int(loc, 16) if loc else 0

            # ----- directives that never emit object code ----
            if opcode.upper() in {"START", "END", "RESW",
                                  "RESB", "BASE", "NOBASE"}:
                object_code_lines.append((loc, label, opcode, operand, obj))
                continue

            # ----- get clean opcode & detect format-4 --------
            op_clean   = opcode.lstrip("+").upper()
            is_format4 = opcode.startswith("+")
            fmt        = 4 if is_format4 else op_table.get(op_clean, (None, 0))[1]

            # =================================================
            # =============  format-independent cases =========
            # =================================================
            if op_clean == "RSUB":
                obj = "4F000000" if fmt == 4 else "4F0000"

            # =================================================
            # =============  machine instructions =============
            # =================================================
            elif op_clean in op_table:
                code = op_table[op_clean][0]

                # -------- format-1 ---------------------------
                if fmt == 1:
                    obj = f"{code:02X}"

                # -------- format-2 ---------------------------
                elif fmt == 2:
                    regs = operand.split(",") if operand else []
                    r1   = registers.get(regs[0], 0) if len(regs) > 0 else 0
                    r2   = registers.get(regs[1], 0) if len(regs) > 1 else 0
                    obj  = f"{code:02X}{r1:X}{r2:X}"

                # -------- format-3X (new packed-register) ----
                elif fmt == "3X":
                    regs = operand.split(",")
                    if len(regs) != 4:
                        raise ValueError(f"{opcode} needs exactly 4 registers")
                    r_vals = [registers.get(r.strip(), None) for r in regs]
                    if None in r_vals:
                        raise ValueError(f"Bad register in {operand}")
                    byte1 = (r_vals[0] << 4) | r_vals[1]
                    byte2 = (r_vals[2] << 4) | r_vals[3]
                    obj   = f"{code:02X}{byte1:02X}{byte2:02X}"

                # -------- formats-3 & 4 ----------------------
                elif fmt in (3, 4):
                    e = 1 if fmt == 4 else 0
                    ni = 3                     # direct by default
                    x = b = p = 0

                    # --- address / displacement calculation --
                    if operand.startswith("#"):
                        ni = 1
                        op_val = operand[1:]
                        addr   = int(op_val) if op_val.isdigit() else symbol_table.get(op_val, 0)
                    elif operand.startswith("@"):
                        ni = 2
                        op_val = operand[1:]
                        addr   = symbol_table.get(op_val, 0)
                    else:
                        op_val = operand
                        if op_val.endswith(",X"):
                            x  = 1
                            op_val = op_val[:-2]
                        addr = symbol_table.get(op_val, 0)

                    if fmt == 4:
                        disp = addr
                    else:
                        disp = addr - (current_loc + 3)
                        if -2048 <= disp <= 2047:
                            p = 1
                        elif base_register is not None:
                            disp = addr - base_register
                            if 0 <= disp <= 4095:
                                b = 1
                            else:
                                disp = 0         # out of range
                        else:
                            disp = 0             # out of range

                    flags      = (ni << 4) | (x << 3) | (b << 2) | (p << 1) | e
                    first_byte = (code & 0xFC) | (flags >> 4)

                    if fmt == 4:
                        obj = f"{first_byte:02X}{flags & 0xF:X}{disp:05X}"
                    else:
                        obj = f"{first_byte:02X}{flags & 0xF:X}{disp & 0xFFF:03X}"

           
            elif op_clean == "WORD":
                obj = f"{int(operand):06X}"

            elif op_clean == "BYTE":
                if operand.startswith("C'") and operand.endswith("'"):
                    chars = operand[2:-1]
                    obj   = "".join(f"{ord(c):02X}" for c in chars)
                elif operand.startswith("X'") and operand.endswith("'"):
                    obj = operand[2:-1].upper()

            # append line (object code may be empty)
            object_code_lines.append((loc, label, opcode, operand, obj))

    # --- write listing ---------------------------------------
    with open(output_path, "w") as out:
        for loc, lbl, opc, opr, obj in object_code_lines:
            out.write(f"{loc}\t{lbl}\t{opc}\t{opr}\t{obj}\n")

    return object_code_lines



In [8]:
object_code = generate_object_code(
    intermediate="out_pass1.txt",
    op_table=optable,
    symbol_table_file="symbTable.txt",  # Use symbol_table_file instead of symbol_table
    registers=registers,
    output_path="out_pass2.txt"
)


In [9]:
def generate_htme(object_code_lines, output_file="HTME.txt"):
    # Initialize variables
    start_address = None
    program_name = "      "  # Default if no label

    # Get start address and program name
    for loc, label, opcode, operand, obj in object_code_lines:
        if opcode.upper() == "START":
            start_address = loc
            if label:
                program_name = label[:6].upper().ljust(6)
            break

    if not start_address:
        locs = [loc for loc, *_ in object_code_lines if loc]
        if not locs:
            raise ValueError("No location info")
        start_address = min(locs)

    # Program length
    locs_int = [int(loc, 16) for loc, *_ in object_code_lines if loc]
    program_length = max(locs_int) - int(start_address, 16)

    # Header Record
    header_record = f"H^{program_name}^{start_address.zfill(6).upper()}^{program_length:06X}"

    # T Record setup
    text_records = []
    current_record = ""
    record_start_address = ""
    record_length = 0  # in bytes

    def flush_record():
        nonlocal current_record, record_start_address, record_length
        if current_record:
            text_records.append(
                f"T^{record_start_address.zfill(6).upper()}^{record_length:02X}^{current_record}"
            )
            current_record = ""
            record_start_address = ""
            record_length = 0

    for loc, _, opcode, operand, obj in object_code_lines:
        opcode_upper = opcode.upper()
        if opcode_upper in ("RESW", "RESB"):
            flush_record()
            continue

        if not obj:
            # Skip line but continue same record
            continue

        obj_length = len(obj) // 2
        if current_record == "":
            # Start new record
            record_start_address = loc
            current_record = obj
            record_length = obj_length
        elif record_length + obj_length > 30:
            flush_record()
            record_start_address = loc
            current_record = obj
            record_length = obj_length
        else:
            current_record += obj
            record_length += obj_length

    flush_record()  # Flush remaining

    # Modification Records
    modification_records = []
    for loc, _, opcode, operand, obj in object_code_lines:
        if opcode.startswith("+") and obj:
            if operand.startswith("#"):
                try:
                    int(operand[1:])  # constant → skip
                    continue
                except ValueError:
                    pass  # it's a label
            modification_loc = int(loc, 16) + 1
            modification_records.append(f"M^{modification_loc:06X}^05")

    # End Record
    end_record = f"E^{start_address.zfill(6).upper()}"

    # Write all records
    with open(output_file, "w") as file:
        file.write(header_record + "\n")
        for record in text_records:
            file.write(record + "\n")
        for record in modification_records:
            file.write(record + "\n")
        file.write(end_record + "\n")


In [10]:
def load_object_code_lines(filename):
    object_code_lines = []
    with open(filename, "r") as f:
        for line in f:
            parts = line.rstrip('\n').split('\t') 
            if len(parts) == 5:
                loc, label, opcode, operand, obj = parts
            elif len(parts) == 4:
                loc, label, opcode, operand = parts
                obj = ""  # No object code on this line
            else:
                print(f"Skipping malformed line: {line.strip()}")
                continue
            
            object_code_lines.append((loc, label, opcode, operand, obj))
    return object_code_lines


In [11]:
# Load the parsed object code lines from your input file
object_code_lines = load_object_code_lines("out_pass2.txt")

# Generate the HTME output file 
generate_htme(object_code_lines)
