In [7]:
# Define the opcodes and function codes for some simple RISC-V instructions
INSTRUCTION_SET = {
    'lui':      {'opcode': '0110111'},
    'auipc':    {'opcode': '0010111'},
    'jal':      {'opcode': '1101111'},
    'jalr':     {'opcode': '1100111', 'funct3': '000'},
    'beq':      {'opcode': '1100011', 'funct3': '000'},
    'bne':      {'opcode': '1100011', 'funct3': '001'},
    'blt':      {'opcode': '1100011', 'funct3': '100'},
    'bge':      {'opcode': '1100011', 'funct3': '101'},
    'bltu':     {'opcode': '1100011', 'funct3': '110'},
    'bgeu':     {'opcode': '1100011', 'funct3': '111'},
    'lw':       {'opcode': '0000011', 'funct3': '010'},
    'sw':       {'opcode': '0100011', 'funct3': '010'},
    'addi':     {'opcode': '0010011', 'funct3': '000'},
    'slti':     {'opcode': '0010011', 'funct3': '010'},
    'sltiu':    {'opcode': '0010011', 'funct3': '011'},
    'xori':     {'opcode': '0010011', 'funct3': '100'},
    'ori':      {'opcode': '0010011', 'funct3': '110'},
    'andi':     {'opcode': '0010011', 'funct3': '111'},
    'slli':     {'opcode': '0010011', 'funct3': '001', 'funct7': '0000000'},
    'srli':     {'opcode': '0010011', 'funct3': '101', 'funct7': '0000000'},
    'srai':     {'opcode': '0010011', 'funct3': '101', 'funct7': '0100000'},
    'add':      {'opcode': '0110011', 'funct3': '000', 'funct7': '0000000'},
    'sub':      {'opcode': '0110011', 'funct3': '000', 'funct7': '0100000'},
    'sll':      {'opcode': '0110011', 'funct3': '001', 'funct7': '0000000'},
    'slt':      {'opcode': '0110011', 'funct3': '010', 'funct7': '0000000'},
    'sltu':     {'opcode': '0110011', 'funct3': '011', 'funct7': '0000000'},
    'xor':      {'opcode': '0110011', 'funct3': '100', 'funct7': '0000000'},
    'srl':      {'opcode': '0110011', 'funct3': '101', 'funct7': '0000000'},
    'sra':      {'opcode': '0110011', 'funct3': '101', 'funct7': '0100000'},
    'or':       {'opcode': '0110011', 'funct3': '110', 'funct7': '0000000'},
    'and':      {'opcode': '0110011', 'funct3': '111', 'funct7': '0000000'},
    'lujr':     {'opcode': '1000011', 'funct3': '011'},
    'luw':      {'opcode': '1000111', 'funct3': '011'},
    'setur':    {'opcode': '0001011'},
    'setui':    {'opcode': '0001111'}
}

REGISTER_MAP = {
    'x0': '00000', 'x1': '00001', 'x2': '00010', 'x3': '00011', 'x4': '00100',
    'x5': '00101', 'x6': '00110', 'x7': '00111', 'x8': '01000', 'x9': '01001',
    'x10': '01010', 'x11': '01011', 'x12': '01100', 'x13': '01101', 'x14': '01110',
    'x15': '01111', 'x16': '10000', 'x17': '10001', 'x18': '10010', 'x19': '10011',
    'x20': '10100', 'x21': '10101', 'x22': '10110', 'x23': '10111', 'x24': '11000',
    'x25': '11001', 'x26': '11010', 'x27': '11011', 'x28': '11100', 'x29': '11101',
    'x30': '11110', 'x31': '11111'
}

#用户写寄存器map
USER_REGISTER_MAP = {
    'y0': '00000000', 'y1': '00000001', 'y2': '00000010', 'y3': '00000011', 'y4': '00000100',
    'y5': '00000101', 'y6': '00000110', 'y7': '00000111', 'y8': '00001000', 'y9': '00001001',
    'y10': '00001010', 'y11': '00001011', 'y12': '00001100', 'y13': '00001101', 'y14': '00001110',
    'y15': '00001111', 'y16': '00010000', 'y17': '00010001', 'y18': '00010010', 'y19': '00010011',
    'y20': '00010100', 'y21': '00010101', 'y22': '00010110', 'y23': '00010111', 'y24': '00011000',
    'y25': '00011001', 'y26': '00011010', 'y27': '00011011', 'y28': '00011100', 'y29': '00011101',
    'y30': '00011110', 'y31': '00011111', 'y32': '00100000', 'y33': '00100001', 'y34': '00100010',
    'y35': '00100011', 'y36': '00100100', 'y37': '00100101', 'y38': '00100110', 'y39': '00100111',
    'y40': '00101000', 'y41': '00101001', 'y42': '00101010', 'y43': '00101011', 'y44': '00101100',
    'y45': '00101101', 'y46': '00101110', 'y47': '00101111', 'y48': '00110000', 'y49': '00110001',
    'y50': '00110010', 'y51': '00110011', 'y52': '00110100', 'y53': '00110101', 'y54': '00110110',
    'y55': '00110111', 'y56': '00111000', 'y57': '00111001', 'y58': '00111010', 'y59': '00111011',
    'y60': '00111100', 'y61': '00111101', 'y62': '00111110', 'y63': '00111111'
}

#用户读寄存器map
USER_READ_REGISTER_MAP = {
    'z0':'000', 'z1':'001', 'z2':'010', 'z3':'011', 'z4':'100', 'z5':'101', 'z6':'110', 'z7':'111'
}

def twos_complement(val, bits):# 计算一个整数的二进制补码表示
    if val < 0:
        val = (1 << bits) + val
    return format(val, f'0{bits}b')

def assemble_r_type(instr, rd, rs1, rs2):
    opcode = INSTRUCTION_SET[instr]['opcode']
    funct3 = INSTRUCTION_SET[instr]['funct3']
    funct7 = INSTRUCTION_SET[instr]['funct7']
    rd_bin = REGISTER_MAP[rd]
    rs1_bin = REGISTER_MAP[rs1]
    rs2_bin = REGISTER_MAP[rs2]
    return funct7 + rs2_bin + rs1_bin + funct3 + rd_bin + opcode

def assemble_i_type(instr, rd, rs1, imm):
    opcode = INSTRUCTION_SET[instr]['opcode']
    funct3 = INSTRUCTION_SET[instr]['funct3']
    rd_bin = REGISTER_MAP[rd]
    rs1_bin = REGISTER_MAP[rs1]
    imm_bin = format(int(imm), '012b')
    return imm_bin + rs1_bin + funct3 + rd_bin + opcode

def assemble_s_type(instr, rs1, rs2, imm):
    opcode = INSTRUCTION_SET[instr]['opcode']
    funct3 = INSTRUCTION_SET[instr]['funct3']
    rs1_bin = REGISTER_MAP[rs1]
    rs2_bin = REGISTER_MAP[rs2]
    imm_bin = format(int(imm), '012b')
    imm_bin1 = imm_bin[:7]
    imm_bin2 = imm_bin[7:]
    return imm_bin1 + rs2_bin + rs1_bin + funct3 + imm_bin2 + opcode

def assemble_b_type(instr, rs1, rs2, imm):
    opcode = INSTRUCTION_SET[instr]['opcode']
    funct3 = INSTRUCTION_SET[instr]['funct3']
    rs1_bin = REGISTER_MAP[rs1]
    rs2_bin = REGISTER_MAP[rs2]
    imm_bin = twos_complement(int(imm), 13)
    imm_bin1 = imm_bin[0]
    imm_bin2 = imm_bin[2:8]
    imm_bin3 = imm_bin[8:12]
    imm_bin4 = imm_bin[1]
    return imm_bin1 + imm_bin2 + rs2_bin + rs1_bin + funct3  + imm_bin3 + imm_bin4 + opcode

def assemble_lui(instr, rd, imm):
    opcode = INSTRUCTION_SET[instr]['opcode']
    rd_bin = REGISTER_MAP[rd]
    imm_bin = twos_complement(int(imm), 20)
    return imm_bin + rd_bin + opcode

def assemble_auipc(instr, rd, imm):
    opcode = INSTRUCTION_SET[instr]['opcode']
    rd_bin = REGISTER_MAP[rd]
    imm_bin = twos_complement(int(imm), 20)
    return imm_bin + rd_bin + opcode

def assemble_jal(instr, rd, imm):
    opcode = INSTRUCTION_SET[instr]['opcode']
    rd_bin = REGISTER_MAP[rd]
    imm_bin = twos_complement(int(imm), 21)
    imm_bin1 = imm_bin[0]
    imm_bin2 = imm_bin[10:20]
    imm_bin3 = imm_bin[9]
    imm_bin4 = imm_bin[1:9]
    return imm_bin1 + imm_bin2 + imm_bin3 + imm_bin4 + rd_bin + opcode

def assemble_jalr(instr, rd, rs1, imm):
    opcode = INSTRUCTION_SET[instr]['opcode']
    funct3 = INSTRUCTION_SET[instr]['funct3']
    rd_bin = REGISTER_MAP[rd]
    rs1_bin = REGISTER_MAP[rs1]
    imm_bin = twos_complement(int(imm), 12)
    return imm_bin + rs1_bin + funct3 + rd_bin + opcode

def assemble_lujr(instr,rd,rs1):
    opcode = INSTRUCTION_SET[instr]['opcode']
    funct3 = INSTRUCTION_SET[instr]['funct3']
    rd_bin = REGISTER_MAP[rd]
    rs1_bin = REGISTER_MAP[rs1]
    return '000000000000' + rs1_bin + funct3 + rd_bin + opcode
    
def assemble_luw(instr,rd,urs):
    opcode = INSTRUCTION_SET[instr]['opcode']
    funct3 = INSTRUCTION_SET[instr]['funct3']
    rd_bin = REGISTER_MAP[rd]
    urs_bin = USER_READ_REGISTER_MAP[urs]
    return "0" + urs_bin + '0000000000000' + funct3 + rd_bin + opcode

def assemble_setur(instr,urd,rs1,imm):
    opcode = INSTRUCTION_SET[instr]['opcode']
    urd_bin = USER_REGISTER_MAP[urd]
    rs1_bin = REGISTER_MAP[rs1]
    imm_bin = format(int(imm), '012b')
    imm_bin1 = imm_bin[0:4]
    imm_bin2 = imm_bin[4:12]
    return imm_bin1 + urd_bin + rs1_bin + imm_bin2 + opcode

def assemble_setui(instr,urd,imm):
    opcode = INSTRUCTION_SET[instr]['opcode']
    urd_bin = USER_REGISTER_MAP[urd]
    imm_bin = format(int(imm), '016b')
    imm_bin1 = imm_bin[0:3]
    imm_bin2 = imm_bin[3:16]
    return imm_bin1 + urd_bin + imm_bin2 + opcode

def bin_to_hex(binary):
    return f"{int(binary, 2):08x}"

def assemble_instruction(instruction):
    parts = instruction.split()
    instr = parts[0]
    
    if instr in ['add', 'sub', 'sll', 'slt', 'sltu', 'xor', 'srl', 'sra', 'or', 'and']:
        machine_code_bin = assemble_r_type(instr, parts[1], parts[2], parts[3])
    elif instr == 'lw':
        rd = parts[1]
        offset, rs1 = parts[2].split('(')
        rs1 = rs1.strip(')')
        machine_code_bin = assemble_i_type(instr, rd, rs1, offset)
    elif instr in ['addi', 'slti', 'sltiu', 'xori', 'ori', 'andi']:
        rd = parts[1]
        rs1 = parts[2]
        imm = parts[3]
        machine_code_bin = assemble_i_type(instr, rd, rs1, imm)
    elif instr in ['slli', 'srli', 'srai']:
        rd = parts[1]
        rs1 = parts[2]
        shamt = parts[3]
        machine_code_bin = assemble_i_type(instr, rd, rs1, shamt)
    elif instr == 'sw':
        rs2 = parts[1]
        offset, rs1 = parts[2].split('(')
        rs1 = rs1.strip(')')
        machine_code_bin = assemble_s_type(instr, rs1, rs2, offset)
    elif instr in ['beq', 'bne', 'blt', 'bge', 'bltu', 'bgeu']:
        rs1 = parts[1]
        rs2 = parts[2]
        offset = parts[3]
        machine_code_bin = assemble_b_type(instr, rs1, rs2, offset)
    elif instr == 'lui':
        rd = parts[1]
        imm = parts[2]
        machine_code_bin = assemble_lui(instr, rd, imm)
    elif instr == 'auipc':
        rd = parts[1]
        imm = parts[2]
        machine_code_bin = assemble_auipc(instr, rd, imm)
    elif instr == 'jal':
        rd = parts[1]
        imm = parts[2]
        machine_code_bin = assemble_jal(instr, rd, imm)
    elif instr == 'jalr':
        rd = parts[1]
        rs1 = parts[2]
        imm = parts[3]
        machine_code_bin = assemble_jalr(instr, rd, rs1, imm)
    elif instr == 'lujr':
        rd = parts[1]
        rs1 = parts[2]
        machine_code_bin = assemble_lujr(instr, rd, rs1)
    elif instr == 'luw':
        rd = parts[1]
        urs = parts[2]
        machine_code_bin = assemble_luw(instr, rd, urs)
    elif instr == 'setur':
        urd = parts[1]
        rs1 = parts[2]
        imm = parts[3]
        machine_code_bin = assemble_setur(instr, urd, rs1, imm)
    elif instr == 'setui':
        urd = parts[1]
        imm = parts[2]
        machine_code_bin = assemble_setui(instr, urd, imm)
    else:
        raise ValueError(f"Unsupported instruction: {instr}")

    return bin_to_hex(machine_code_bin)

# Example usage
#instructions = [
#    "add x1 x2 x3",
#    "sub x4 x5 x6",
#    "lw x7 0(x8)",
#    "sw x9 4(x10)",
#    "beq x11 x12 -16",
#    "slli x11 x12 3",
#    "lui x2 786432",
#    "jal x0 20",
#    "auipc x1 20",
#    "lujr x1 x2",
#    "luw x1 z1",
#    "setur y1 x1 20",
#    "setui y1 20"
#]

#for instr in instructions:
#    machine_code_hex = assemble_instruction(instr)
#    print(f"{instr} -> {machine_code_hex}")

def read_assembly_file(filename):
    with open(filename, 'r') as file:
        instructions = file.readlines()
    return [instr.strip() for instr in instructions if instr.strip()]

def write_machine_code_hexfile(filename, machine_codes):
    with open(filename, 'wb') as file:
        for code in machine_codes:
            #file.write(code + '\n')
            file.write(bytes.fromhex(code))

def write_machine_code_file(filename, machine_codes):
    with open(filename, 'w') as file:
        i = 0
        for code in machine_codes:
            if i % 4 == 3:
                file.write(code + '\n')
            else:
                file.write(code)
            i += 1
        file.write("ffffffff\n")
        for i in range(0, 15):
            if(i % 4 == 3):
                file.write("00000000\n")
            else:
                file.write("00000000")
        file.write("ffffffff\n")
            
def main(input_filename, output_filename, output_hex_filename):
    instructions = read_assembly_file(input_filename)
    machine_codes = [assemble_instruction(instr) for instr in instructions]
    write_machine_code_file(output_filename, machine_codes)
    write_machine_code_hexfile(output_hex_filename, machine_codes)
    print(f"Machine code written to {output_filename}")
    
#main('assembly_dds.s', 'machine_code_dds.mem', 'machine_code_dds.hex')
# main('assembly_ttl.s', 'machine_code_ttl.mem', 'machine_code_ttl.hex')
#main('ttl_threshold.s', 'ttl_threshold.mem', 'ttl_threshold.hex')


Machine code written to machine_code_ttl.mem
