<a href="https://colab.research.google.com/github/vbonato/cnnTestBench/blob/main/cnnTestBench.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [23]:
import ipywidgets as widgets
from IPython.display import display, clear_output, Markdown
import os
import math

# --- 1. Core Code Generation Functions (HLS INFERENCE - SRC Folder) ---

def generate_vitis_tcl_script(num_layers, params):
    """
    Generates the final, robust Tcl script for Vitis HLS.
    This version uses absolute paths and manually creates the simulation
    directory to prevent any race conditions.
    """
    top_function_name = "cnn"
    proj_name = f"cnn_{num_layers}_layer_vitis_project"
    solution_name = "solution1"
    cpp_src = f"conv{num_layers}.cpp"
    cpp_hdr = f"conv{num_layers}.h"
    tb_src = f"conv_tb{num_layers}.cpp"
    target_part = "xczu7ev-ffvc1156-2-e"

    data_files = ["test.dat", "W1.bin", "B1.bin", "W_fc.bin", "B_fc.bin"]

    tcl_script = [
        '# --- Vitis HLS Automation Script (Final, Robust Version) ---',
        '',
        '# 1. Capture the original directory where this script is running',
        'set original_dir [pwd]',
        'puts "INFO: Original script directory is: $original_dir"',
        '',
        '# 2. Create a clean project',
        f'open_project -reset "{proj_name}"',
        f'set_top {top_function_name}',
        '',
        '# 3. Add C++ source files using absolute paths',
        f'add_files "$original_dir/{cpp_src}"',
        f'add_files "$original_dir/{cpp_hdr}"',
        f'add_files -tb "$original_dir/{tb_src}"',
        '',
        '# 4. Configure the solution',
        f'open_solution -flow_target vitis "{solution_name}"',
        f'set_part {{{target_part}}}',
        'create_clock -period 10ns -name default',
        '',
        '# 5. Define the simulation directory and ENSURE IT EXISTS',
        f'set sim_dir "{proj_name}/{solution_name}/csim/build"',
        'file mkdir $sim_dir',
        '',
        '# 6. Copy data files from the original directory to the simulation directory',
    ]

    # Add a file copy command for each data file
    for df in data_files:
        tcl_script.append(f'file copy -force "$original_dir/{df}" $sim_dir')

    tcl_script.extend([
        'puts "INFO: Successfully prepared simulation directory: $sim_dir"',
        '',
        '# 7. Run the full flow (C-sim, Synthesis, Co-sim)',
        'csim_design',
        'csynth_design',
        'cosim_design -trace_level all',
        '',
        '# 8. Export the final IP core',
        'export_design -format ip_catalog',
        '',
        'puts "INFO: Vitis HLS script finished successfully!"',
        'exit'
    ])
    return '\n'.join(tcl_script)


def generate_c_style_loader_function():
    """Generates a C-style function to load floats and quantize them to type_t."""
    code = [
        '// --- WEIGHT LOADING & QUANTIZATION FUNCTION ---',
        '// Loads 32-bit floats from a binary file and converts them to type_t',
        'int load_and_quantize_weights(const char* file_path, type_t* dest_buffer, size_t num_elements, float scale_factor) {',
        '    FILE* fp = fopen(file_path, "rb");',
        '    if (!fp) {',
        '        printf("ERROR: Could not open file for reading: %s\\n", file_path);',
        '        return 0; // Failure',
        '    }',
        '',
        '    // Create a temporary buffer to hold the float data from the file',
        '    float* temp_buffer = (float*) malloc(num_elements * sizeof(float));',
        '    if (!temp_buffer) {',
        '        printf("ERROR: Could not allocate memory for temporary float buffer.\\n");',
        '        fclose(fp);',
        '        return 0; // Failure',
        '    }',
        '',
        '    // Read the entire block of floats',
        '    size_t elements_read = fread(temp_buffer, sizeof(float), num_elements, fp);',
        '    fclose(fp);',
        '',
        '    if (elements_read != num_elements) {',
        '        printf("ERROR: Expected to read %zu elements from %s, but got %zu.\\n", num_elements, file_path, elements_read);',
        '        free(temp_buffer);',
        '        return 0; // Failure',
        '    }',
        '',
        '    // Quantize: convert floats to type_t using the scale factor',
        '    for (size_t i = 0; i < num_elements; ++i) {',
        '        dest_buffer[i] = (type_t)(temp_buffer[i] * scale_factor);',
        '    }',
        '',
        '    free(temp_buffer);',
        '    printf("Successfully loaded and quantized %zu elements from %s\\n", num_elements, file_path);',
        '    return 1; // Success',
        '}\n'
    ]
    return '\n'.join(code)

def generate_hls_data_loader_function(params):
    """Generates a C++ data loader for the HLS testbench."""
    INPUT_W = params['C1'] # Number of features per sample

    # This regex is specifically for the format: {{d,d,d...}, label}
    # It dynamically creates capture groups for the number of input features.
    feature_pattern = ",".join([r"([-+]?\d+\.?\d*e?[-+]?\d*)" for _ in range(INPUT_W)])
    full_pattern = r"\{\{" + feature_pattern + r"\},(\d+)\}"

    code = [
        '#include <vector>',
        '#include <string>',
        '#include <regex>',
        '#include <fstream>',
        '',
        '// --- DATA LOADER FOR HLS TESTBENCH ---',
        '// Loads float features and integer labels from a .dat file.',
        'int load_test_data(const std::string& file_path, std::vector<float>& features, std::vector<int>& labels) {',
        '    std::ifstream file(file_path);',
        '    if (!file.is_open()) {',
        '        printf("ERROR: Could not open test data file: %s\\n", file_path.c_str());',
        '        return 0;',
        '    }',
        '    std::string content((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());',
        '    file.close();',
        '',
        f'    std::regex pattern(R"({full_pattern})");',
        '    auto matches_begin = std::sregex_iterator(content.begin(), content.end(), pattern);',
        '    auto matches_end = std::sregex_iterator();',
        '',
        '    for (std::sregex_iterator i = matches_begin; i != matches_end; ++i) {',
        '        std::smatch match = *i;',
        f'        for (int j = 1; j <= {INPUT_W}; ++j) {{',
        '            features.push_back(std::stof(match[j].str()));',
        '        }',
        f'        labels.push_back(std::stoi(match[{INPUT_W} + 1].str()));',
        '    }',
        '    printf("Loaded %zu samples from %s\\n", labels.size(), file_path.c_str());',
        '    return labels.size();',
        '}\n'
    ]
    return '\n'.join(code)

def generate_makefile_code(num_layers):
    """Generates the Makefile content for HLS compilation."""
    TARGET = f'conv{num_layers}'
    TB_OBJ = f'conv_tb{num_layers}.o'
    TB_SRC = f'conv_tb{num_layers}.cpp'
    TB_HDR = f'conv_tb{num_layers}.h'
    CNN_HDR = f'conv{num_layers}.h'
    CNN_SRC = f'conv{num_layers}.cpp'

    makefile_lines = [
        f'{TARGET}: check_dirs {TARGET}.o {TB_OBJ}',
        f'\tclang++ {TARGET}.o {TB_OBJ} -o ../bin/{TARGET} -lm\n',

        f'{TB_OBJ}: {TB_SRC} {TB_HDR} {CNN_HDR}',
        f'\tclang++ -c {TB_SRC} -o {TB_OBJ}\n',

        f'{TARGET}.o: {CNN_SRC} {CNN_HDR}',
        f'\tclang++ -c {CNN_SRC} -o {TARGET}.o\n',

        'check_dirs:',
        '\t@mkdir -p ../bin\n',

        '.PHONY: clean',
        'clean:',
        f'\trm -f {TARGET}.o {TB_OBJ}',
        f'\trm -f ../bin/{TARGET}',
        f'\trm -f ../bin/output.bin\n'
    ]
    return '\n'.join(makefile_lines)

def generate_testbench_header_code(num_layers):
    """Generates the testbench header C++ code (conv_tbX.h)."""
    code = f"#ifndef CONV_TB_H\n#define CONV_TB_H\n\n"
    code += f"// ** GLOBAL TESTBENCH CONSTANTS **\n"
    code += f"const int RANDROOF = 256; // Max value for randomized input/weights/bias\n"
    code += f"\n#endif"
    return code

def generate_testbench_code(num_layers, params):
    """
    Generates a testbench that loads test.dat, PADS the input correctly,
    runs inference, and reports accuracy.
    """
    N_CLASSES = params['N_CLASSES']
    GLOBAL_DIM = params['GLOBAL_DIM']
    INPUT_W = params['C1'] # This is the number of features per sample in the .dat file

    # Generate the necessary helper functions
    loader_code = generate_c_style_loader_function()
    data_loader_code = generate_hls_data_loader_function(params)

    code_lines = [
        '#include <cstdio>',
        '#include <cstdlib>',
        '#include <iostream>',
        '#include <cmath>',
        '#include <cstring>', # Needed for memset
        f'#include "conv_tb{num_layers}.h"',
        f'#include "conv{num_layers}.h"\n',

        # Inject the helper functions
        loader_code,
        data_loader_code,

        'int main(void) {'
    ]

    # --- Memory Allocation ---
    H1_size = f"H1 * H1" if GLOBAL_DIM == 2 else f"H1"
    input_buffer_size_str = f"C1 * {H1_size}"
    code_lines.append(f'    const size_t INPUT_BUFFER_SIZE = {input_buffer_size_str};')
    code_lines.append(f'    const size_t FEATURES_PER_SAMPLE = {INPUT_W};')
    code_lines.append(f'    type_t *I1 = (type_t *) malloc(INPUT_BUFFER_SIZE * sizeof(type_t));')
    for i in range(1, num_layers + 1):
        R_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        code_lines.append(f'    type_t *W{i} = (type_t *) malloc(M{i} * C{i} * R{i}{R_size} * sizeof(type_t));')
        code_lines.append(f'    type_t *B{i} = (type_t *) malloc(M{i} * sizeof(type_t));')
    code_size_n = f"M{num_layers} * E{num_layers}" + (f" * E{num_layers}" if GLOBAL_DIM == 2 else "")
    code_lines.append(f'    type_t *W_fc = (type_t *) malloc({code_size_n} * N_CLASSES * sizeof(type_t));')
    code_lines.append(f'    type_t *B_fc = (type_t *) malloc(N_CLASSES * sizeof(type_t));')
    code_lines.append(f'    float *O_final = (float *) calloc(N_CLASSES, sizeof(float));')

    # --- Load Trained Weights ---
    code_lines.append('\n    // --- Load and Quantize Trained Weights ---')
    code_lines.append('    const float SCALE_FACTOR = 256.0f; // Represents 8 fractional bits')
    code_lines.append('    int all_weights_loaded = 1;')
    for i in range(1, num_layers + 1):
        R_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        W_size_str = f"M{i} * C{i} * R{i}{R_size}"
        code_lines.append(f'    all_weights_loaded &= load_and_quantize_weights("W{i}.bin", W{i}, {W_size_str}, SCALE_FACTOR);')
        code_lines.append(f'    all_weights_loaded &= load_and_quantize_weights("B{i}.bin", B{i}, M{i}, SCALE_FACTOR);')
    code_lines.append(f'    all_weights_loaded &= load_and_quantize_weights("W_fc.bin", W_fc, {code_size_n} * N_CLASSES, SCALE_FACTOR);')
    code_lines.append(f'    all_weights_loaded &= load_and_quantize_weights("B_fc.bin", B_fc, N_CLASSES, SCALE_FACTOR);')
    code_lines.append('    if (!all_weights_loaded) { return EXIT_FAILURE; }\n')

    # --- Load Real Test Data ---
    code_lines.append('    // --- Load Real Test Data ---')
    code_lines.append('    std::vector<float> test_features;')
    code_lines.append('    std::vector<int> test_labels;')
    code_lines.append('    int num_samples = load_test_data("test.dat", test_features, test_labels);')
    code_lines.append('    if (num_samples == 0) {')
    code_lines.append('        printf("FATAL: No samples loaded from test.dat. Exiting.\\n");')
    code_lines.append('        return EXIT_FAILURE;')
    code_lines.append('    }\n')

    # --- CORRECTED INFERENCE LOOP ---
    code_lines.append('    // --- Run Inference on All Test Samples ---')
    code_lines.append('    int correct_predictions = 0;')
    code_lines.append('    printf("Running inference on %d test samples...\\n", num_samples);')
    code_lines.append('    for (int i = 0; i < num_samples; ++i) {')

    code_lines.append(f'        // 1. Get a pointer to the start of the current sample\'s features in the vector')
    code_lines.append(f'        float* current_sample_features = &test_features[i * FEATURES_PER_SAMPLE];')

    code_lines.append('\n        // 2. Zero-out the entire input buffer to handle padding')
    code_lines.append('        memset(I1, 0, INPUT_BUFFER_SIZE * sizeof(type_t));')

    code_lines.append('\n        // 3. Copy and quantize the available features into the start of the buffer')
    code_lines.append(f'        for (size_t j = 0; j < FEATURES_PER_SAMPLE; ++j) {{')
    code_lines.append('            I1[j] = (type_t)(current_sample_features[j] * SCALE_FACTOR);')
    code_lines.append('        }')

    code_lines.append('\n        // 4. Perform CNN inference')
    cnn_call = '        cnn(I1'
    for i in range(1, num_layers + 1):
        cnn_call += f', W{i}, B{i}'
    cnn_call += f', W_fc, B_fc, O_final);'
    code_lines.append(cnn_call)

    code_lines.append('\n        // 5. Find the predicted class')
    code_lines.append('        float max_val = -1.0f;')
    code_lines.append('        int predicted_class = -1;')
    code_lines.append(f'        for (int k = 0; k < {N_CLASSES}; k++) {{')
    code_lines.append('            if (O_final[k] > max_val) {')
    code_lines.append('                max_val = O_final[k];')
    code_lines.append('                predicted_class = k;')
    code_lines.append('            }')
    code_lines.append('        }')

    code_lines.append('\n        // 6. Compare with the true label')
    code_lines.append('        int true_label = test_labels[i];')
    code_lines.append('        if (predicted_class == true_label) {')
    code_lines.append('            correct_predictions++;')
    code_lines.append('        }')
    code_lines.append('    }\n') # End of inference loop

    # --- Report Final Accuracy ---
    code_lines.append('    // --- Report Final Accuracy ---')
    code_lines.append('    float accuracy = (float)correct_predictions / num_samples * 100.0f;')
    code_lines.append('    printf("\\n--- HLS Model Verification Result ---\\n");')
    code_lines.append('    printf("Correctly Classified: %d / %d\\n", correct_predictions, num_samples);')
    code_lines.append('    printf("Accuracy: %.2f%%\\n", accuracy);')
    code_lines.append('    printf("-------------------------------------\\n");')

    # Free memory
    code_lines.append('\n    // Free allocated memory')
    code_lines.append('    if(I1) free(I1);')
    for i in range(1, num_layers + 1):
        code_lines.append(f'    if(W{i}) free(W{i});')
        code_lines.append(f'    if(B{i}) free(B{i});')
    code_lines.append('    if(W_fc) free(W_fc);')
    code_lines.append('    if(B_fc) free(B_fc);')
    code_lines.append('    if(O_final) free(O_final);')

    # Return success code
    code_lines.append('\n    // Return success code (0) if accuracy is over a threshold, else failure (1)')
    code_lines.append('    return (accuracy > 70.0f) ? EXIT_SUCCESS : EXIT_FAILURE;')
    code_lines.append('}')
    return '\n'.join(code_lines)

def generate_conv_code(num_layers, params):
    """Generates the HLS convolution and wrapper C++ implementation code."""
    GLOBAL_DIM = params['GLOBAL_DIM']

    code = f'#include "conv{num_layers}.h"\n#include <math.h>\n\n'
    code += "#include <stdio.h>\n\n"

    # ReLU function
    code += "type_t relu(type_t x) {\n"
    code += " \treturn (x > 0) ? x : 0;\n"
    code += "}\n\n"

    # Softmax function
    code += "void softmax(type_t input[N_CLASSES], float output[N_CLASSES]) {\n"
    code += " \tfloat sum = 0.0f;\n"
    code += " \tfloat max_val = (float)input[0];\n"
    code += " \tfor (int k = 1; k < N_CLASSES; k++) {\n"
    code += " \t\tif ((float)input[k] > max_val) max_val = (float)input[k];\n"
    code += " \t}\n"
    code += " \tfor (int k = 0; k < N_CLASSES; k++) {\n"
    code += " \t\toutput[k] = expf((float)input[k] - max_val);\n"
    code += " \t\tsum += output[k];\n"
    code += " \t}\n"
    code += " \tfor (int k = 0; k < N_CLASSES; k++) {\n"
    code += " \t\toutput[k] /= sum;\n"
    code += " \t}\n"
    code += "}\n\n"

    # --- GENERATE SPECIALIZED 1D CONV FUNCTIONS ---
    for i in range(1, num_layers + 1):
        code += f"// ** Specialized 1D Convolution Layer {i} **\n"
        code += f"void conv_1d_{i}(type_t I{i}[C{i} * H{i}], type_t W{i}[M{i} * C{i} * R{i}], type_t B{i}[M{i}], type_t O{i}[M{i} * E{i}]) {{\n"
        code += f" \tfor(int m = 0; m < M{i}; m++) {{\n"
        code += f" \t\tfor(int x = 0; x < E{i}; x++) {{\n"

        code += f" \t\t\tO{i}[x + m * E{i}] = B{i}[m];\n"
        code += f" \t\t\t#pragma HLS PIPELINE II=1\n"

        code += f" \t\t\tfor(int c = 0; c < C{i}; c++) {{\n"
        code += f" \t\t\t\tfor(int l = 0; l < R{i}; l++) {{\n"

        code += f" \t\t\t\t\tint h2 = x * S{i} - PAD{i} + l;\n"
        code += f" \t\t\t\t\ttype_t val = (h2 < 0 || h2 >= H{i}) ? 0 : I{i}[h2 + c * H{i}];\n"
        code += f" \t\t\t\t\tO{i}[x + m * E{i}] += val * W{i}[l + c * R{i} + m * C{i} * R{i}];\n"

        code += f" \t\t\t\t}}\n"
        code += f" \t\t\t}}\n"
        code += f" \t\t}}\n"
        code += f" \t}}\n"
        code += f"}}\n\n"


    # --- GENERATE SPECIALIZED 2D CONV FUNCTIONS ---
    for i in range(1, num_layers + 1):
        code += f"// ** Specialized 2D Convolution Layer {i} **\n"
        code += f"void conv_2d_{i}(type_t I{i}[C{i} * H{i} * H{i}], type_t W{i}[M{i} * C{i} * R{i} * R{i}], type_t B{i}[M{i}], type_t O{i}[M{i} * E{i} * E{i}]) {{\n"
        code += f" \tfor(int m = 0; m < M{i}; m++) {{\n"
        code += f" \t\tfor(int y = 0; y < E{i}; y++) {{\n"
        code += f" \t\t\tfor(int x = 0; x < E{i}; x++) {{\n"

        code += f" \t\t\t\tO{i}[x + (y + (m * E{i})) * E{i}] = B{i}[m];\n"
        code += f" \t\t\t\t#pragma HLS PIPELINE II=1\n"

        code += f" \t\t\t\tfor(int c = 0; c < C{i}; c++) {{\n"
        code += f" \t\t\t\t\tfor(int k = 0; k < R{i}; k++) {{\n"
        code += f" \t\t\t\t\t\tfor(int l = 0; l < R{i}; l++) {{\n"

        code += f" \t\t\t\t\t\t\tint h1 = y * S{i} - PAD{i} + k;\n"
        code += f" \t\t\t\t\t\t\tint h2 = x * S{i} - PAD{i} + l;\n"
        code += f" \t\t\t\t\t\t\ttype_t val = (h1 < 0 || h1 >= H{i} || h2 < 0 || h2 >= H{i}) ? 0 : I{i}[h2 + (h1 + (c * H{i})) * H{i}];\n"
        code += f" \t\t\t\t\t\t\tO{i}[x + (y + (m * E{i})) * E{i}] += val * W{i}[l + (k + (c + (m * C{i})) * R{i}) * R{i}];\n"

        code += f" \t\t\t\t\t\t}}\n"
        code += f" \t\t\t\t\t}}\n"
        code += f" \t\t\t\t}}\n"
        code += f" \t\t\t}}\n"
        code += f" \t\t}}\n"
        code += f" \t}}\n"
        code += f"}}\n\n"


    # Final Dedicated FC layer
    input_size_n = f"M{num_layers} * E{num_layers}"
    if GLOBAL_DIM == 2:
        input_size_n += f" * E{num_layers}"

    code += f"// ** Fully Connected Layer **\n"
    code += f"void fc_layer(type_t input[{input_size_n}], type_t W_fc[{input_size_n} * N_CLASSES], type_t B_fc[N_CLASSES], type_t output[N_CLASSES]) {{\n"
    code += f" \tfor (int k = 0; k < N_CLASSES; k++) {{\n"
    code += f" \t\toutput[k] = B_fc[k];\n"
    code += f" \t\tfor (int i = 0; i < {input_size_n}; i++) {{\n"
    code += f" \t\t\toutput[k] += input[i] * W_fc[i + k * {input_size_n}];\n"
    code += f" \t\t}}\n"
    code += f" \t}}\n"
    code += f"}}\n\n"

    # --- WRAPPER CNN FUNCTION (HLS Dataflow) ---
    code += "// ** Wrapper CNN Function (HLS Inference) **\n"
    code += "void cnn(type_t *input"
    for i in range(1, num_layers + 1):
        code += f", type_t *W{i}, type_t *B{i}"
    code += ", type_t *W_fc, type_t *B_fc, float *output) {\n"
    code += "#pragma HLS DATAFLOW\n"

    # Intermediate buffers (sizes adjusted based on GLOBAL_DIM)
    for i in range(1, num_layers + 1):
        E_size = f" * E{i}" if GLOBAL_DIM == 2 else ""
        code += f" \tstatic type_t O{i}[M{i} * E{i}{E_size}];\n"

    code += f" \tstatic type_t O_raw[N_CLASSES];\n\n"

    # Chaining layers using C preprocessor conditionals based on GLOBAL_DIM
    for i in range(1, num_layers + 1):
        O_size = f"M{i} * E{i}"
        if GLOBAL_DIM == 2:
            O_size += f" * E{i}"

        I_name = "input" if i == 1 else f"O{i-1}"

        code += f"\n \t// --- Layer {i} ({GLOBAL_DIM}D) ---\n"
        code += f" #if GLOBAL_DIM == 1\n"
        code += f" \tconv_1d_{i}({I_name}, W{i}, B{i}, O{i});\n"
        code += f" #elif GLOBAL_DIM == 2\n"
        code += f" \tconv_2d_{i}({I_name}, W{i}, B{i}, O{i});\n"
        code += f" #endif\n"

        # Apply ReLU
        code += f" \t// Apply ReLU\n"
        code += f" \tfor (int j = 0; j < {O_size}; j++) O{i}[j] = relu(O{i}[j]);\n"


    # Final FC and Softmax layers
    code += "\n \t// Final Layer: Fully Connected (FC) Classification Head\n"
    code += f" \tfc_layer(O{num_layers}, W_fc, B_fc, O_raw);\n"

    code += "\n \t// Softmax Layer: Convert raw scores to probability distribution\n"
    code += f" \tsoftmax(O_raw, output);\n"

    code += "}\n"
    return code

def generate_convh_code(num_layers, params):
    """Generates the convolution header C++ code with calculated parameters (HLS)."""
    GLOBAL_DIM = params['GLOBAL_DIM']

    code = "#ifndef CONV_H\n#define CONV_H\n\n#include <cstddef>\n#include <stdio.h>\n\ntypedef int type_t;\n\n"

    # Global Parameters
    code += f"// ** GLOBAL PARAMETERS **\n"
    code += f"const size_t N_CLASSES = {params['N_CLASSES']};\n"
    code += f"const size_t GLOBAL_DIM = {GLOBAL_DIM}; // All layers are {GLOBAL_DIM}D\n"
    code += f"const size_t BIAS_ON = 1;\n\n"

    # Input Parameters (H1 is the 1D dimension, or one side of a square 2D image)
    code += "// ** INPUT PARAMETERS **\n"
    code += f"const size_t C1 = {params['C1']}; // Input channels\n"
    code += f"const size_t H1 = {params['H1']}; // Input size (seq length or side length H) \n\n"

    for i in range(1, num_layers + 1):
        if i == 1:
            C_name = 'C1'
            H_name = 'H1'
        else:
            C_name = f'C{i}'
            H_name = f'H{i}'

        code += f"// ** CONV LAYER {i} **\n"
        if i > 1:
             code += f"const size_t {C_name} = {params[C_name]};\n" # Input Channels
             code += f"const size_t {H_name} = {params[H_name]};\n" # Input Size

        code += f"const size_t M{i} = {params[f'M{i}']};\n" # Output Channels
        code += f"const size_t R{i} = {params[f'R{i}']};\n" # Kernel Size
        code += f"const size_t S{i} = {params[f'S{i}']};\n" # Stride
        code += f"const size_t E{i} = {params[f'E{i}']};\n" # Output Size

        code += f"const size_t F{i} = {params[f'F{i}']};\n"
        code += f"const size_t PAD{i} = {params[f'PAD{i}']};\n\n"

    # Function prototypes
    for i in range(1, num_layers + 1):
        # 1D Conv Prototype
        code += f"void conv_1d_{i}(type_t I{i}[C{i} * H{i}], type_t W{i}[M{i} * C{i} * R{i}], type_t B{i}[M{i}], type_t O{i}[M{i} * E{i}]);\n"
        # 2D Conv Prototype
        code += f"void conv_2d_{i}(type_t I{i}[C{i} * H{i} * H{i}], type_t W{i}[M{i} * C{i} * R{i} * R{i}], type_t B{i}[M{i}], type_t O{i}[M{i} * E{i} * E{i}]);\n"

    # FC and Softmax prototypes
    input_size_n = f"M{num_layers} * E{num_layers}"
    if GLOBAL_DIM == 2:
        input_size_n += f" * E{num_layers}"

    code += f"void fc_layer(type_t input[{input_size_n}], type_t W_fc[{input_size_n} * N_CLASSES], type_t B_fc[N_CLASSES], type_t output[N_CLASSES]);\n"
    code += f"void softmax(type_t input[N_CLASSES], float output[N_CLASSES]);\n"

    # CNN prototype
    code += "\nvoid cnn(type_t *input"
    for i in range(1, num_layers + 1):
        code += f", type_t *W{i}, type_t *B{i}"
    code += ", type_t *W_fc, type_t *B_fc, float *output);\n"

    code += "\n#endif"
    return code



# --- 2. BACKPROPAGATION TRAINING CODE (X86 CPU - BACKPROP Folder) ---

def generate_vector_norm_function():
    """Generates a C++ function to calculate the L2 Norm (magnitude) of an array."""
    code = [
        '// --- DIAGNOSTIC HELPER FUNCTION ---',
        '#include <numeric> // For std::inner_product',
        '',
        '// Calculates the L2 Norm (magnitude) of a vector of floats.',
        'dtype_t calculate_l2_norm(const dtype_t* data, size_t size) {',
        '    dtype_t sum_sq = 0.0f;',
        '    for (size_t i = 0; i < size; ++i) {',
        '        sum_sq += data[i] * data[i];',
        '    }',
        '    return sqrt(sum_sq);',
        '}\n'
    ]
    return '\n'.join(code)

def generate_normalization_function():
    """Generates a C++ function to normalize feature data."""
    code = [
        '// --- DATA NORMALIZATION FUNCTION ---',
        '#include <numeric> // For std::accumulate',
        '',
        '// Normalizes a vector of features to have a mean of 0 and stddev of 1.',
        'void normalize_features(std::vector<dtype_t>& features) {',
        '    if (features.empty()) return;',
        '',
        '    // 1. Calculate the mean',
        '    double sum = std::accumulate(features.begin(), features.end(), 0.0);',
        '    double mean = sum / features.size();',
        '',
        '    // 2. Calculate the standard deviation',
        '    double sq_sum = 0.0;',
        '    for(const auto& val : features) {',
        '        sq_sum += (val - mean) * (val - mean);',
        '    }',
        '    double std_dev = std::sqrt(sq_sum / features.size());',
        '',
        '    // 3. Apply normalization: (x - mean) / std_dev',
        '    // Avoid division by zero if std_dev is very small',
        '    if (std_dev > 1e-6) {',
        '        for(auto& val : features) {',
        '            val = (val - mean) / std_dev;',
        '        }',
        '        std::cout << "Features normalized with Mean=" << mean << ", StdDev=" << std_dev << std::endl;',
        '    } else {',
        '        std::cout << "Warning: Feature standard deviation is near zero. Skipping normalization." << std::endl;',
        '    }',
        '}\n'
    ]
    return '\n'.join(code)

def generate_calibration_code(num_layers, params):
    """Generates C++ code to find the maximum absolute weight value."""
    code = [
        '\n    // --- CALIBRATION: Find max absolute weight value ---',
        '    float max_abs_value = 0.0f;',
        '    // Helper lambda to check an array',
        '    auto find_max = [&](const dtype_t* arr, size_t size) {',
        '        for (size_t i = 0; i < size; ++i) {',
        '            if (fabs(arr[i]) > max_abs_value) {',
        '                max_abs_value = fabs(arr[i]);',
        '            }',
        '        }',
        '    };'
    ]
    # Check all weight and bias arrays
    for i in range(1, num_layers + 1):
        W_size_str = f"M{i} * C{i} * R{i}" + (f" * R{i}" if params['GLOBAL_DIM'] == 2 else "")
        code.append(f'    find_max(W{i}, {W_size_str});')
        code.append(f'    find_max(B{i}, M{i});')

    code.append('    find_max(W_fc, FC_WEIGHT_SIZE);')
    code.append('    find_max(B_fc, N_CLASSES);')
    code.append('    cout << "CALIBRATION_INFO: Maximum absolute weight value is: " << max_abs_value << endl;')
    return '\n'.join(code)

def generate_save_weights_function():
    """Generates the C++ function to save a weight/bias array to a binary file."""
    save_func_code = [
        '// --- WEIGHT SAVING FUNCTION ---',
        '#include <fstream>',
        '',
        'void save_weights(const std::string& file_path, const dtype_t* data, size_t num_elements) {',
        '    std::ofstream out_file(file_path, std::ios::binary);',
        '    if (!out_file.is_open()) {',
        '        std::cerr << "ERROR: Could not open file for writing: " << file_path << std::endl;',
        '        return;',
        '    }',
        '    // Write the raw bytes of the array to the file',
        '    out_file.write(reinterpret_cast<const char*>(data), num_elements * sizeof(dtype_t));',
        '    out_file.close();',
        '    std::cout << "Saved " << num_elements << " elements to " << file_path << std::endl;',
        '}\n'
    ]
    return '\n'.join(save_func_code)

def generate_backprop_makefile_code():
    """Generates the Makefile for x86 training, using mandatory tabs."""
    makefile_lines = [
        'TARGET = cnn_trainer',
        'SRC = backprop_main.cpp backprop.cpp',
        '\n.PHONY: all $(TARGET) clean', # Added .PHONY and removed $(TARGET) from all dependencies
        'all:',
        # NOTE: This line MUST start with a hard TAB in the generated file.
        '\tclang++ -O3 -Wall -std=c++11 $(SRC) -o ../bin/$(TARGET) -lm\n',

        'clean:',
        # NOTE: This line MUST start with a hard TAB in the generated file.
        '\trm -f ../bin/$(TARGET)\n'
    ]
    return '\n'.join(makefile_lines)

def generate_backprop_header_code(num_layers, params):
    """Generates backprop header with the optimized cnn_bwd prototype."""
    GLOBAL_DIM = params['GLOBAL_DIM']
    code = "#ifndef BACKPROP_H\n#define BACKPROP_H\n\n#include <cstddef>\n#include <cmath>\n#include <iostream>\n\ntypedef float dtype_t;\n\n"
    code += f"const size_t N_CLASSES = {params['N_CLASSES']};\n"
    code += f"const size_t GLOBAL_DIM = {GLOBAL_DIM};\n\n"
    for i in range(1, num_layers + 1):
        code += f"const size_t C{i} = {params[f'C{i}']};\n"
        code += f"const size_t H{i} = {params[f'H{i}']};\n"
        code += f"const size_t M{i} = {params[f'M{i}']};\n"
        code += f"const size_t E{i} = {params[f'E{i}']};\n"
        code += f"const size_t R{i} = {params[f'R{i}']};\n"
        code += f"const size_t S{i} = {params[f'S{i}']};\n"
        code += f"const size_t PAD{i} = {params[f'PAD{i}']};\n\n"
    for i in range(1, num_layers + 1):
        E_size = f" * E{i}" if GLOBAL_DIM == 2 else ""
        code += f"const size_t L{i}_FMAP_SIZE = M{i} * E{i}{E_size};\n"
    fc_input_size = f"L{num_layers}_FMAP_SIZE"
    code += f"const size_t FC_INPUT_SIZE = {fc_input_size};\n"
    code += f"const size_t FC_WEIGHT_SIZE = FC_INPUT_SIZE * N_CLASSES;\n\n"

    fwd_proto = "void cnn_fwd(const dtype_t I[C1 * H1 * (GLOBAL_DIM == 2 ? H1 : 1)]"
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        fwd_proto += f",\n             dtype_t W{i}[M{i} * C{i} * R{i}{W_size}], dtype_t B{i}[M{i}], dtype_t O{i}[L{i}_FMAP_SIZE]"
    fwd_proto += f",\n             dtype_t W_fc[FC_WEIGHT_SIZE], dtype_t B_fc[N_CLASSES], dtype_t O_RAW[N_CLASSES]);\n\n"
    code += fwd_proto

    bwd_proto = "void cnn_bwd(const dtype_t I[C1 * H1 * (GLOBAL_DIM == 2 ? H1 : 1)], const size_t LABEL[1],\n"
    bwd_proto += "             const dtype_t O_RAW[N_CLASSES], dtype_t dO_raw[N_CLASSES]"
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        bwd_proto += f",\n             const dtype_t W{i}[M{i} * C{i} * R{i}{W_size}], const dtype_t O{i}[L{i}_FMAP_SIZE]"
        bwd_proto += f", dtype_t dW{i}[M{i} * C{i} * R{i}{W_size}], dtype_t dB{i}[M{i}]"
    bwd_proto += f",\n             const dtype_t W_fc[FC_WEIGHT_SIZE], dtype_t dW_fc[FC_WEIGHT_SIZE], dtype_t dB_fc[N_CLASSES]"
    # Add reusable intermediate gradient buffers to the signature
    bwd_proto += f",\n             dtype_t dI_fc[FC_INPUT_SIZE], dtype_t dI1[C1*H1*(GLOBAL_DIM==2?H1:1)]"
    for i in range(num_layers, 1, -1):
        bwd_proto += f", dtype_t dI{i}[L{i-1}_FMAP_SIZE]"
    bwd_proto += ");\n\n"
    code += bwd_proto

    #code += "void update_weights(dtype_t* W, const dtype_t* dW, size_t size, dtype_t lr);\n"
    code += "void adam_update(dtype_t* W, const dtype_t* dW, dtype_t* m, dtype_t* v, size_t size, dtype_t lr, dtype_t beta1, dtype_t beta2, dtype_t epsilon, size_t t);\n"

    code += "void cross_entropy_softmax_bwd(size_t num_classes, const dtype_t* scores, const size_t* labels, dtype_t* dO_raw);\n"
    code += "void relu_bwd(size_t size, const dtype_t* O_store, const dtype_t* dO, dtype_t* dI);\n"
    code += "\n#endif"
    return code

def generate_load_data_function(N_CLASSES, INPUT_W):
    """Generates the C++ load_data function body, using C++ Raw String Literals
    to correctly define the regex pattern and returning both features (dtype_t)
    and raw integer labels (size_t)."""

    load_data_code = [
        '// --- DATA LOADING FUNCTION ---',
        '#include <fstream>',
        '#include <sstream>',
        '#include <vector>',
        '#include <regex>',
        '#include <cmath>',
        '#include <algorithm>\n',

        f'long load_data(const std::string& file_path, std::vector<dtype_t>& X_out, std::vector<dtype_t>& Y_out_one_hot, std::vector<size_t>& Y_out_raw, int N_CLASSES, int INPUT_W) {{',
        '    std::ifstream file(file_path);',
        '    if (!file.is_open()) {',
        '        // Return 0 on error, main() will handle the fatal error gracefully.',
        '        std::cerr << "ERROR: Could not open file: " << file_path << std::endl; return 0;',
        '    }',
        '    std::stringstream buffer;',
        '    buffer << file.rdbuf();',
        '    std::string content = buffer.str(); // Buffer contents for regex search',

        '    // Regex to match the pattern: {{x, y, z}, label}',
        # FIX: Using r'' (Python raw string) prevents the SyntaxWarning, while R"()" is correct for C++.
        r'    std::regex pattern(R"(\{\{([-+]?\d+\.?\d*e?[-+]?\d*),([-+]?\d+\.?\d*e?[-+]?\d*),([-+]?\d+\.?\d*e?[-+]?\d*)\},(\d+)\})");',

        '    auto matches_begin = std::sregex_iterator(content.begin(), content.end(), pattern);',
        '    auto matches_end = std::sregex_iterator();',
        '    long num_samples = 0;',

        '    for (std::sregex_iterator i = matches_begin; i != matches_end; ++i) {',
        '        std::smatch match = *i;',
        '        // 1. Add Features (X). Start from match[1] (first float capture group).',
        '        for(int j = 1; j <= INPUT_W; ++j) {',
        '            X_out.push_back(std::stof(match[j].str()));',
        '        }',
        '        // 2. Add Label (Y_out_raw and Y_out_one_hot)',
        '        // The label (int) is captured by match[INPUT_W + 1].',
        '        int label = std::stoi(match[INPUT_W + 1].str());',
        '        Y_out_raw.push_back(label); // Store raw label',
        '        std::vector<dtype_t> one_hot(N_CLASSES, 0.0f);',
        '        if (label >= 0 && label < N_CLASSES) {',
        '            one_hot[label] = 1.0f;',
        '        } else {',
        '            std::cerr << "WARNING: Invalid Label " << label << " in file " << file_path << ". Skipping sample." << std::endl;',
        '            for(int j = 0; j < INPUT_W; ++j) { X_out.pop_back(); }',
        '            Y_out_raw.pop_back(); // Rollback raw label too',
        '            continue;',
        '        }',
        '        Y_out_one_hot.insert(Y_out_one_hot.end(), one_hot.begin(), one_hot.end());',
        '        num_samples++;',
        '    }',
        '    std::cout << "Successfully loaded " << num_samples << " samples from " << file_path << std::endl;',
        '    return num_samples;',
        '}\n'
    ]
    return load_data_code

def generate_backprop_cpp_code(num_layers, params):
    """Generates the backprop.cpp with the optimized cnn_bwd implementation."""
    code = ['#include "backprop.h"\n']

    # --- Utility and Layer Functions (These are correct) ---
    code.extend([
        '// --- UTILITY FUNCTIONS ---',
        'void cross_entropy_softmax_bwd(size_t num_classes, const dtype_t* scores, const size_t* labels, dtype_t* dO_raw) {',
        '    dtype_t max_score = scores[0];',
        '    for (size_t k = 1; k < num_classes; ++k) { if (scores[k] > max_score) max_score = scores[k]; }',
        '    dtype_t exp_sum = 0.0f;',
        '    for (size_t k = 0; k < num_classes; ++k) { exp_sum += expf(scores[k] - max_score); }',
        '    for (size_t k = 0; k < num_classes; ++k) {',
        '        dO_raw[k] = expf(scores[k] - max_score) / exp_sum;',
        '        if (k == labels[0]) { dO_raw[k] -= 1.0f; }',
        '    }',
        '}',
        'void relu_fwd(size_t size, dtype_t* x) { for (size_t i = 0; i < size; ++i) { x[i] = (x[i] > 0.0f) ? x[i] : 0.0f; }}',
        'void relu_bwd(size_t size, const dtype_t* O_store, const dtype_t* dO, dtype_t* dI) { for (size_t i = 0; i < size; ++i) { dI[i] = (O_store[i] > 0.0f) ? dO[i] : 0.0f; }}',

        '\n// --- LAYER IMPLEMENTATIONS ---',
        'void fc_layer_fwd(const dtype_t input[FC_INPUT_SIZE], const dtype_t W_fc[FC_WEIGHT_SIZE], const dtype_t B_fc[N_CLASSES], dtype_t output[N_CLASSES]) {',
        '    for (size_t k = 0; k < N_CLASSES; k++) {',
        '        output[k] = B_fc[k];',
        '        for (size_t i = 0; i < FC_INPUT_SIZE; i++) { output[k] += input[i] * W_fc[i + k * FC_INPUT_SIZE]; }',
        '    }',
        '}',
        'void fc_layer_bwd(const dtype_t input[FC_INPUT_SIZE], const dtype_t W_fc[FC_WEIGHT_SIZE], const dtype_t dO_raw[N_CLASSES], dtype_t dI[FC_INPUT_SIZE], dtype_t dW_fc[FC_WEIGHT_SIZE], dtype_t dB_fc[N_CLASSES]) {',
        '    for (size_t k = 0; k < N_CLASSES; ++k) { dB_fc[k] += dO_raw[k]; }',
        '    for (size_t k = 0; k < N_CLASSES; ++k) { for (size_t i = 0; i < FC_INPUT_SIZE; ++i) { dW_fc[i + k * FC_INPUT_SIZE] += input[i] * dO_raw[k]; }}',
        '    for (size_t i = 0; i < FC_INPUT_SIZE; ++i) { dI[i] = 0.0f; }',
        '    for (size_t k = 0; k < N_CLASSES; ++k) { for (size_t i = 0; i < FC_INPUT_SIZE; ++i) { dI[i] += W_fc[i + k * FC_INPUT_SIZE] * dO_raw[k]; }}',
        '}',
        'void conv_1d_fwd(size_t H, size_t C, size_t R, size_t M, size_t S, size_t PAD, const dtype_t* I, const dtype_t* W, const dtype_t* B, dtype_t* O) {',
        '    size_t E = (H - R + 2 * PAD) / S + 1;',
        '    for(size_t m = 0; m < M; m++) { for(size_t x = 0; x < E; x++) {',
        '        O[x + m * E] = B[m];',
        '        for(size_t c = 0; c < C; c++) { for(size_t l = 0; l < R; l++) {',
        '            long h2 = (long)x * S - PAD + l;',
        '            if (h2 >= 0 && h2 < H) { O[x + m * E] += I[h2 + c * H] * W[l + c * R + m * C * R]; }',
        '        }}',
        '    }}',
        '}',
        'void conv_1d_bwd(size_t H, size_t C, size_t R, size_t M, size_t S, size_t PAD, const dtype_t* I, const dtype_t* dO, const dtype_t* W, dtype_t* dI, dtype_t* dW, dtype_t* dB) {',
        '    size_t E = (H - R + 2 * PAD) / S + 1;',
        '    for (size_t m = 0; m < M; ++m) { for (size_t c = 0; c < C; ++c) { for (size_t l = 0; l < R; ++l) {',
        '        dtype_t grad_w = 0.0f;',
        '        for (size_t x = 0; x < E; ++x) { long h2 = (long)x * S - PAD + l; if (h2 >= 0 && h2 < H) { grad_w += I[h2 + c * H] * dO[x + m * E]; } }',
        '        dW[l + c * R + m * C * R] += grad_w;',
        '    }}}',
        '    for (size_t m = 0; m < M; ++m) { dtype_t grad_b = 0.0f; for (size_t x = 0; x < E; ++x) { grad_b += dO[x + m * E]; } dB[m] += grad_b; }',
        '    for (size_t i = 0; i < C * H; ++i) { dI[i] = 0.0f; }',
        '    for (size_t m = 0; m < M; ++m) { for (size_t c = 0; c < C; ++c) { for (size_t l = 0; l < R; ++l) {',
        '        for (size_t x = 0; x < E; ++x) { long h2 = (long)x * S - PAD + l; if (h2 >= 0 && h2 < H) { dI[h2 + c * H] += W[l + c * R + m * C * R] * dO[x + m * E]; } }',
        '    }}}',
        '}'
    ])

    fwd_wrapper_list = ['\n\n// ** WRAPPER FWD (For x86 Training) **']
    fwd_signature = "void cnn_fwd(const dtype_t I[C1 * H1 * (GLOBAL_DIM == 2 ? H1 : 1)]"
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if params['GLOBAL_DIM'] == 2 else ""
        fwd_signature += f",\n             dtype_t W{i}[M{i} * C{i} * R{i}{W_size}], dtype_t B{i}[M{i}], dtype_t O{i}[L{i}_FMAP_SIZE]"
    fwd_signature += f",\n             dtype_t W_fc[FC_WEIGHT_SIZE], dtype_t B_fc[N_CLASSES], dtype_t O_RAW[N_CLASSES]) {{"
    fwd_wrapper_list.append(fwd_signature)
    for i in range(1, num_layers + 1):
        I_name, H_name = ("I", "H1") if i == 1 else (f"O{i-1}", f"E{i-1}")
        fwd_wrapper_list.append(f'    conv_1d_fwd({H_name}, C{i}, R{i}, M{i}, S{i}, PAD{i}, {I_name}, W{i}, B{i}, O{i});')
        fwd_wrapper_list.append(f'    relu_fwd(L{i}_FMAP_SIZE, O{i});')
    fwd_wrapper_list.append(f'    fc_layer_fwd(O{num_layers}, W_fc, B_fc, O_RAW);')
    fwd_wrapper_list.append('}')
    code.extend(fwd_wrapper_list)

    bwd_wrapper_list = ['\n\n// ** WRAPPER BWD (Optimized for Performance) **']
    bwd_signature = "void cnn_bwd(const dtype_t I[C1 * H1 * (GLOBAL_DIM == 2 ? H1 : 1)], const size_t LABEL[1],\n             const dtype_t O_RAW[N_CLASSES], dtype_t dO_raw[N_CLASSES]"
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if params['GLOBAL_DIM'] == 2 else ""
        bwd_signature += f",\n             const dtype_t W{i}[M{i} * C{i} * R{i}{W_size}], const dtype_t O{i}[L{i}_FMAP_SIZE], dtype_t dW{i}[M{i} * C{i} * R{i}{W_size}], dtype_t dB{i}[M{i}]"
    bwd_signature += f",\n             const dtype_t W_fc[FC_WEIGHT_SIZE], dtype_t dW_fc[FC_WEIGHT_SIZE], dtype_t dB_fc[N_CLASSES]"
    bwd_signature += f",\n             dtype_t dI_fc[FC_INPUT_SIZE], dtype_t dI1[C1*H1*(GLOBAL_DIM==2?H1:1)]"
    for i in range(num_layers, 1, -1): bwd_signature += f", dtype_t dI{i}[L{i-1}_FMAP_SIZE]"
    # ** THE FIX IS HERE: Changed {{ to { **
    bwd_signature += ") {"
    bwd_wrapper_list.append(bwd_signature)
    bwd_wrapper_list.append('    cross_entropy_softmax_bwd(N_CLASSES, O_RAW, LABEL, dO_raw);')
    bwd_wrapper_list.append(f'    fc_layer_bwd(O{num_layers}, W_fc, dO_raw, dI_fc, dW_fc, dB_fc);')
    for i in range(num_layers, 0, -1):
        I_name, H_name = ("(const dtype_t*)I", "H1") if i == 1 else (f"O{i-1}", f"E{i-1}")
        dO_name_map = {num_layers: "dI_fc"}
        for j in range(num_layers - 1, 0, -1): dO_name_map[j] = f"dI{j+1}"
        dO_name = dO_name_map[i]
        dI_name_map = {1: "dI1"}
        for j in range(2, num_layers + 1): dI_name_map[j] = f"dI{j}"
        dI_name = dI_name_map[i]
        bwd_wrapper_list.append(f'    dtype_t d_relu_out_{i}[L{i}_FMAP_SIZE];')
        bwd_wrapper_list.append(f'    relu_bwd(L{i}_FMAP_SIZE, O{i}, {dO_name}, d_relu_out_{i});')
        bwd_wrapper_list.append(f'    conv_1d_bwd({H_name}, C{i}, R{i}, M{i}, S{i}, PAD{i}, {I_name}, d_relu_out_{i}, W{i}, {dI_name}, dW{i}, dB{i});')
    bwd_wrapper_list.append('}')
    code.extend(bwd_wrapper_list)
    return '\n'.join(code)

def generate_adam_update_function():
    """Generates the C++ function for the Adam optimizer."""
    code = [
        '// --- ADAM OPTIMIZER FUNCTION ---',
        '// Implements the Adam weight update rule.',
        'void adam_update(dtype_t* W, const dtype_t* dW, dtype_t* m, dtype_t* v, size_t size,',
        '                 dtype_t lr, dtype_t beta1, dtype_t beta2, dtype_t epsilon, size_t t) {',
        '    // t is the 1-based timestep for bias correction',
        '    dtype_t beta1_t = powf(beta1, t);',
        '    dtype_t beta2_t = powf(beta2, t);',
        '',
        '    for (size_t i = 0; i < size; ++i) {',
        '        // Update biased first and second moment estimates',
        '        m[i] = beta1 * m[i] + (1.0f - beta1) * dW[i];',
        '        v[i] = beta2 * v[i] + (1.0f - beta2) * (dW[i] * dW[i]);',
        '',
        '        // Compute bias-corrected moment estimates',
        '        dtype_t m_hat = m[i] / (1.0f - beta1_t);',
        '        dtype_t v_hat = v[i] / (1.0f - beta2_t);',
        '',
        '        // Update weights',
        '        W[i] -= lr * m_hat / (sqrtf(v_hat) + epsilon);',
        '    }',
        '}\n'
    ]
    return '\n'.join(code)

def generate_backprop_main_code(num_layers, params):
    """Generates the main training loop, allocating and passing reusable buffers."""
    N_CLASSES = params['N_CLASSES']
    INPUT_W = params['C1']
    padded_input_size = f"C1 * H1" + (" * H1" if params['GLOBAL_DIM'] == 2 else "")

    load_data_code = generate_load_data_function(N_CLASSES, INPUT_W)
    save_weights_code = generate_save_weights_function()
    vector_norm_code = generate_vector_norm_function()
    normalization_code = generate_normalization_function()
    adam_update_code = generate_adam_update_function() # Generate Adam code

    code = [
        '#include "backprop.h"', '#include <stdlib.h>', '#include <time.h>', '#include <stdio.h>',
        '#include <string.h>', '#include <iostream>', '#include <vector>', '#include <algorithm>',
        '#include <random>', '#include <numeric>', 'using namespace std;\n',
    ]
    code.extend(load_data_code)
    code.append(save_weights_code)
    code.append(vector_norm_code)
    code.append(normalization_code)
    code.append(adam_update_code) # Add Adam function to the C++ source

    code.extend([
        'int main() {',
        '    srand(time(0));',
        '    const dtype_t LEARNING_RATE = 0.001f;',
        '    const dtype_t ADAM_BETA1 = 0.9f;',
        '    const dtype_t ADAM_BETA2 = 0.999f;',
        '    const dtype_t ADAM_EPSILON = 1e-8f;',
        '    const size_t NUM_EPOCHS = 3;',
        '    const size_t BATCH_SIZE = 32;',
        '    const std::string TRAIN_DATA_FILE = "train.dat";',
        '    const std::string TEST_DATA_FILE = "test.dat";',
        '    std::vector<dtype_t> X_train_vec, Y_train_vec_one_hot, X_test_vec, Y_test_vec_one_hot;',
        '    std::vector<size_t> Y_train_vec_raw, Y_test_vec_raw;',
        f'    long N_TRAIN_SAMPLES = load_data(TRAIN_DATA_FILE, X_train_vec, Y_train_vec_one_hot, Y_train_vec_raw, {N_CLASSES}, {INPUT_W});',
        f'    long N_TEST_SAMPLES = load_data(TEST_DATA_FILE, X_test_vec, Y_test_vec_one_hot, Y_test_vec_raw, {N_CLASSES}, {INPUT_W});',
        '    if (N_TRAIN_SAMPLES == 0) { cerr << "FATAL ERROR" << endl; return 1; }',
        '    normalize_features(X_train_vec);',
        '    normalize_features(X_test_vec);',
        '    dtype_t* in_data_train = X_train_vec.data();',
        '    size_t* out_data_train_raw = Y_train_vec_raw.data();',
        '    dtype_t* in_data_test = X_test_vec.data();',
        '    size_t* out_data_test_raw = Y_test_vec_raw.data();',
    ])

    # Declare weights, biases, gradients, and Adam state arrays
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if params['GLOBAL_DIM'] == 2 else ""
        size_str = f"M{i} * C{i} * R{i}{W_size}"
        code.append(f'    dtype_t W{i}[{size_str}]; dtype_t B{i}[M{i}];')
        code.append(f'    dtype_t dW{i}[{size_str}]; dtype_t dB{i}[M{i}];')
        code.append(f'    dtype_t m_W{i}[{size_str}]; dtype_t v_W{i}[{size_str}];') # Adam state
        code.append(f'    dtype_t m_B{i}[M{i}]; dtype_t v_B{i}[M{i}];') # Adam state
        code.append(f'    dtype_t O{i}[L{i}_FMAP_SIZE];')

    code.append('    dtype_t W_fc[FC_WEIGHT_SIZE]; dtype_t B_fc[N_CLASSES];')
    code.append('    dtype_t dW_fc[FC_WEIGHT_SIZE]; dtype_t dB_fc[N_CLASSES];')
    code.append('    dtype_t m_W_fc[FC_WEIGHT_SIZE]; dtype_t v_W_fc[FC_WEIGHT_SIZE];') # Adam state
    code.append('    dtype_t m_B_fc[N_CLASSES]; dtype_t v_B_fc[N_CLASSES];') # Adam state
    code.append('    dtype_t O_RAW[N_CLASSES]; dtype_t dO_raw[N_CLASSES];\n')
    code.append(f'    dtype_t padded_input[{padded_input_size}];\n')

    code.append('    // Reusable intermediate gradient buffers for the backward pass')
    code.append(f'    dtype_t dI_fc[FC_INPUT_SIZE];')
    code.append(f'    dtype_t dI1[C1*H1*(GLOBAL_DIM==2?H1:1)];')
    for i in range(num_layers, 1, -1):
        code.append(f'    dtype_t dI{i}[L{i-1}_FMAP_SIZE];')

    code.append('\n    cout << "Initializing weights using He initialization..." << endl;')
    for i in range(1, num_layers + 1):
        fan_in = params[f'C{i}'] * params[f'R{i}'] * (params[f'R{i}'] if params['GLOBAL_DIM'] == 2 else 1)
        w_bound = f"sqrt(6.0f / {fan_in})"
        W_size_str = f"M{i}*C{i}*R{i}" + (f" * R{i}" if params['GLOBAL_DIM'] == 2 else "")
        code.append(f'    float w_bound_{i} = {w_bound};')
        code.append(f'    for (size_t j = 0; j < {W_size_str}; ++j) {{ W{i}[j] = ((float)rand()/(float)RAND_MAX * 2.0f - 1.0f) * w_bound_{i}; }}')
        code.append(f'    for (size_t j = 0; j < M{i}; ++j) B{i}[j] = 0.0f;')
    fc_fan_in = f"FC_INPUT_SIZE"
    fc_w_bound = f"sqrt(6.0f / {fc_fan_in})"
    code.append(f'    float w_bound_fc = {fc_w_bound};')
    code.append('    for (size_t i = 0; i < FC_WEIGHT_SIZE; ++i) { W_fc[i] = ((float)rand()/(float)RAND_MAX * 2.0f - 1.0f) * w_bound_fc; }')
    code.append('    for (size_t i = 0; i < N_CLASSES; ++i) B_fc[i] = 0.0f;\n')

    # Initialize Adam state arrays to zero
    code.append('    cout << "Initializing Adam optimizer state..." << endl;')
    for i in range(1, num_layers + 1):
        code.append(f'    memset(m_W{i}, 0, sizeof(m_W{i})); memset(v_W{i}, 0, sizeof(v_W{i}));')
        code.append(f'    memset(m_B{i}, 0, sizeof(m_B{i})); memset(v_B{i}, 0, sizeof(v_B{i}));')
    code.append('    memset(m_W_fc, 0, sizeof(m_W_fc)); memset(v_W_fc, 0, sizeof(v_W_fc));')
    code.append('    memset(m_B_fc, 0, sizeof(m_B_fc)); memset(v_B_fc, 0, sizeof(v_B_fc));\n')

    code.append('    cout << "Starting training with " << N_TRAIN_SAMPLES << " samples for " << NUM_EPOCHS << " epochs..." << endl;')
    code.append('    std::vector<size_t> indices(N_TRAIN_SAMPLES); std::iota(indices.begin(), indices.end(), 0);')
    code.append('    std::mt19937 g(rand());')
    code.append('    size_t t = 0; // Timestep for Adam\n')
    code.append('    for (size_t epoch = 0; epoch < NUM_EPOCHS; ++epoch) {')
    code.append('        std::shuffle(indices.begin(), indices.end(), g);')
    code.append('        long total_batches = (N_TRAIN_SAMPLES + BATCH_SIZE - 1) / BATCH_SIZE; int batch_count = 0;')
    code.append('        for (long i = 0; i < N_TRAIN_SAMPLES; i += BATCH_SIZE) {')
    code.append('            size_t current_batch_size = (i + BATCH_SIZE <= N_TRAIN_SAMPLES) ? BATCH_SIZE : (N_TRAIN_SAMPLES - i);')
    for layer_i in range(1, num_layers + 1):
        W_size = f" * R{layer_i}" if params['GLOBAL_DIM'] == 2 else ""
        code.append(f'            memset(dW{layer_i}, 0, sizeof(dtype_t)*(M{layer_i}*C{layer_i}*R{layer_i}{W_size}));')
        code.append(f'            memset(dB{layer_i}, 0, sizeof(dB{layer_i}));')
    code.append('            memset(dW_fc, 0, sizeof(dW_fc)); memset(dB_fc, 0, sizeof(dB_fc));\n')
    code.append('            for (size_t sample_idx = 0; sample_idx < current_batch_size; ++sample_idx) {')
    code.append('                size_t shuffled_index = indices[i + sample_idx];')
    code.append(f'                dtype_t* current_sample_features = in_data_train + shuffled_index * {INPUT_W};')
    code.append(f'                size_t* current_sample_LABEL = out_data_train_raw + shuffled_index;')
    code.append(f'                memset(padded_input, 0, sizeof(padded_input));')
    code.append(f'                memcpy(padded_input, current_sample_features, {INPUT_W} * sizeof(dtype_t));')

    fwd_call = '                cnn_fwd(padded_input'
    for layer_i in range(1, num_layers + 1): fwd_call += f', W{layer_i}, B{layer_i}, O{layer_i}'
    fwd_call += f', W_fc, B_fc, O_RAW);'
    code.append(fwd_call)

    bwd_call = '                cnn_bwd(padded_input, current_sample_LABEL, O_RAW, dO_raw'
    for layer_i in range(1, num_layers + 1): bwd_call += f', W{layer_i}, O{layer_i}, dW{layer_i}, dB{layer_i}'
    bwd_call += f', W_fc, dW_fc, dB_fc, dI_fc, dI1'
    for i in range(num_layers, 1, -1): bwd_call += f', dI{i}'
    bwd_call += ');'
    code.append(bwd_call)
    code.append('            }')

    # Adam update step
    code.append('            t++; // Increment Adam timestep')
    code.append('            adam_update(W_fc, dW_fc, m_W_fc, v_W_fc, FC_WEIGHT_SIZE, LEARNING_RATE, ADAM_BETA1, ADAM_BETA2, ADAM_EPSILON, t);')
    code.append('            adam_update(B_fc, dB_fc, m_B_fc, v_B_fc, N_CLASSES, LEARNING_RATE, ADAM_BETA1, ADAM_BETA2, ADAM_EPSILON, t);')
    for i in range(num_layers, 0, -1):
        W_size = f" * R{i}" if params['GLOBAL_DIM'] == 2 else ""
        code.append(f'            adam_update(W{i}, dW{i}, m_W{i}, v_W{i}, (M{i}*C{i}*R{i}{W_size}), LEARNING_RATE, ADAM_BETA1, ADAM_BETA2, ADAM_EPSILON, t);')
        code.append(f'            adam_update(B{i}, dB{i}, m_B{i}, v_B{i}, M{i}, LEARNING_RATE, ADAM_BETA1, ADAM_BETA2, ADAM_EPSILON, t);')

    code.append('            batch_count++; if (batch_count % 50 == 0) { printf("      Epoch %zu, Batch %d / %ld...\\r", epoch + 1, batch_count, total_batches); fflush(stdout); }')
    code.append('        }')

    # Evaluation block (remains the same)
    code.append('\n        if (N_TEST_SAMPLES > 0) {')
    code.append('            int correct_predictions = 0;')
    code.append('            for (long j = 0; j < N_TEST_SAMPLES; ++j) {')
    code.append(f'                dtype_t* current_test_features = in_data_test + j * {INPUT_W};')
    code.append('                size_t true_label = out_data_test_raw[j];')
    code.append(f'                memset(padded_input, 0, sizeof(padded_input));')
    code.append(f'                memcpy(padded_input, current_test_features, {INPUT_W} * sizeof(dtype_t));')
    fwd_eval_call = '                cnn_fwd(padded_input'
    for i in range(1, num_layers + 1): fwd_eval_call += f', W{i}, B{i}, O{i}'
    fwd_eval_call += f', W_fc, B_fc, O_RAW);'
    code.append(fwd_eval_call)
    code.append('                int predicted_class = 0; dtype_t max_score = O_RAW[0];')
    code.append('                for (size_t k = 1; k < N_CLASSES; ++k) { if (O_RAW[k] > max_score) { max_score = O_RAW[k]; predicted_class = k; } }')
    code.append('                if (predicted_class == true_label) { correct_predictions++; }')
    code.append('            }')
    code.append('            float test_accuracy = (float)correct_predictions / N_TEST_SAMPLES * 100.0f;')
    code.append('            dtype_t grad_norm = calculate_l2_norm(dW_fc, FC_WEIGHT_SIZE);')
    code.append('            dtype_t weight_norm = calculate_l2_norm(W_fc, FC_WEIGHT_SIZE);')
    code.append('            printf("\\nEpoch %zu | Test Accuracy: %.2f%% | Grad Norm (dW_fc): %e | Weight Norm (W_fc): %.4f\\n", epoch + 1, test_accuracy, grad_norm, weight_norm);')
    code.append('        }')

    code.append('    }')
    code.append('\n    cout << "\\nTraining finished." << endl;')
    calibration_code = generate_calibration_code(num_layers, params)
    code.append(calibration_code)
    code.append('    cout << "Saving final model weights..." << endl;')
    for i in range(1, num_layers + 1):
        W_size_str = f"M{i} * C{i} * R{i}" + (f" * R{i}" if params['GLOBAL_DIM'] == 2 else "")
        code.append(f'    save_weights("W{i}.bin", W{i}, {W_size_str});')
        code.append(f'    save_weights("B{i}.bin", B{i}, M{i});')
    code.append('    save_weights("W_fc.bin", W_fc, FC_WEIGHT_SIZE);')
    code.append('    save_weights("B_fc.bin", B_fc, N_CLASSES);')
    code.append('\n    return 0;\n}')
    return '\n'.join(code)



# --- 3. Parameter Calculation and GUI Setup (Unchanged) ---

def calculate_output_size(H_in, R, S, P):
    """Calculates output size E based on HLS-style padding/stride. We enforce E = ceil(H_in / S)"""
    return math.ceil(H_in / S)

def generate_parameter_widgets(num_layers):
    """Generates the parameter input widgets with PyTorch SimpleCNN defaults."""

    # Global Parameters
    n_classes_widget = widgets.IntText(value=6, description='N_CLASSES:', min=1, style={'description_width': 'initial'})

    # Global Dimension Selector: Set to 1D
    global_dim_widget = widgets.Dropdown(options=[(1, 1), (2, 2)], value=1, description='GLOBAL DIMENSION:', style={'description_width': 'initial'})
    bias_on_widget = widgets.IntText(value=1, description='Bias ON (1=Yes):', min=1, max=1, style={'description_width': 'initial'}, disabled=True)

    # Input Feature Map (Layer 1 Input)
    c1_widget = widgets.IntText(value=3, description='Input C1 (Channels):', min=1, style={'description_width': 'initial'})
    # H1 is dynamic (X_train.shape[1]), using 100 as an example
    h1_widget = widgets.IntText(value=100, description='Input H1 (Size: Seq Length):', min=1, style={'description_width': 'initial'})

    widgets_list = [
        widgets.HTML(value="<h3>Global Configuration:</h3>"),
        global_dim_widget,
        widgets.HBox([n_classes_widget, bias_on_widget]),
        widgets.HTML(value="<h3>Input Feature Map (Layer 1 Input):</h3>"),
        c1_widget,
        h1_widget,
        widgets.HTML(value=f"<h3>Convolution Layers (1 to {num_layers}):</h3>")
    ]

    layer_widgets = {}

    for i in range(1, num_layers + 1):
        # Layer 1 matches PyTorch model: M=16, R=3, S=2 (to emulate MaxPool(k=2, s=2))
        m_value = 16 if i == 1 else 32
        r_value = 3 if i == 1 else 3
        s_value = 2 if i == 1 else 1 # Stride of 2 simulates the MaxPool layer for output size

        m_i = widgets.IntText(value=m_value, description=f'L{i} M (Ch Out):', min=1, style={'description_width': 'initial'})
        r_i = widgets.IntText(value=r_value, description=f'L{i} R (Kernel):', min=1, style={'description_width': 'initial'})
        s_i = widgets.IntText(value=s_value, description=f'L{i} S (Stride):', min=1, style={'description_width': 'initial'})

        layer_widgets[i] = (m_i, r_i, s_i)

        widgets_list.append(widgets.VBox([
            widgets.HTML(value=f"<h4>Conv Layer {i}</h4>"),
            widgets.HBox([m_i, r_i, s_i])
        ]))

    params_vbox = widgets.VBox(widgets_list)

    return params_vbox, layer_widgets, c1_widget, h1_widget, n_classes_widget, global_dim_widget

def collect_and_calculate_params(num_layers, layer_widgets, c1_widget, h1_widget, n_classes_widget, global_dim_widget):
    """Collects user input and calculates dependent parameters, using a single GLOBAL_DIM."""

    params = {}

    # Global Parameters
    params['N_CLASSES'] = n_classes_widget.value
    params['GLOBAL_DIM'] = global_dim_widget.value
    GLOBAL_DIM = params['GLOBAL_DIM']

    # Initial Input (Layer 1 Input)
    params['C1'] = c1_widget.value
    params['H1'] = h1_widget.value

    H_prev = params['H1']
    M_prev = params['C1']

    for i in range(1, num_layers + 1):
        # M_i, R_i, S_i are retrieved from the widgets
        m_i, r_i, s_i = layer_widgets[i]

        params[f'M{i}'] = m_i.value
        params[f'R{i}'] = r_i.value
        params[f'S{i}'] = s_i.value

        # --- Calculate Input C_i and H_i for Layer i ---
        params[f'C{i}'] = M_prev
        params[f'H{i}'] = H_prev

        H_curr = params[f'H{i}']

        # Calculated Output Size (E_i)
        params[f'E{i}'] = calculate_output_size(H_curr, params[f'R{i}'], params[f'S{i}'], 0)

        # Calculated Padding and Padded Size
        E_curr = params[f'E{i}']
        F_i = (E_curr * params[f'S{i}'] + params[f'R{i}'] - 1)
        params[f'F{i}'] = F_i

        # Padding is calculated assuming symmetric padding to enforce output size E_i
        params[f'PAD{i}'] = (F_i - H_curr) // 2

        # --- Setup for next layer (i+1) ---
        H_prev = E_curr
        M_prev = params[f'M{i}']

        # Validation
        if params[f'R{i}'] > H_curr and H_curr > 1:
            raise ValueError(f"Layer {i}: Kernel size R{i} must be less than or equal to input size H{i}.")
        if params['N_CLASSES'] < 1:
            raise ValueError("N_CLASSES must be 1 or greater.")

        # 2D Specific Validation
        if GLOBAL_DIM == 2 and params[f'R{i}'] % 2 == 0:
             raise ValueError(f"Layer {i}: 2D kernel R{i} must be odd to ensure symmetric padding in HLS style (R=3, 5, 7...).")

    return params


# --- 4. GUI and Execution ---

layer_slider = widgets.IntSlider(value=1, min=1, max=5, step=1, description='CNN Layers:', continuous_update=False, style={'description_width': 'initial'})
generate_button = widgets.Button(description='Generate Code (HLS INF + x86 BKP)', button_style='success')
output_area = widgets.Output()
code_controls_vbox = widgets.VBox()

# Initial setup of parameter widgets
param_vbox, layer_widgets_map, c1_input, h1_input, n_classes_input, global_dim_input = generate_parameter_widgets(layer_slider.value)
code_controls_vbox.children = (param_vbox,)

def update_widgets(change):
    """Update parameter widgets when the number of layers changes."""
    global layer_widgets_map, c1_input, h1_input, n_classes_input, global_dim_input

    num_layers = layer_slider.value
    new_param_vbox, layer_widgets_map, c1_input, h1_input, n_classes_input, global_dim_input = generate_parameter_widgets(num_layers)

    code_controls_vbox.children = (new_param_vbox,)

    with output_area:
        clear_output()
        display(Markdown(f"Parameters updated for **{num_layers} layers**. Click 'Generate Code'."))

layer_slider.observe(update_widgets, names='value')

def on_button_click(b):
    """Code generation main logic."""
    with output_area:
        clear_output()

        num_layers = layer_slider.value
        i = num_layers

        try:
            # 1. Collect and Calculate Parameters
            params = collect_and_calculate_params(num_layers, layer_widgets_map, c1_input, h1_input, n_classes_input, global_dim_input)
            GLOBAL_DIM = params['GLOBAL_DIM']

            # 2. Setup Folder
            root_folder = f'./repo/generatedCNN_{i}Layers_{GLOBAL_DIM}D'
            source_folder = os.path.join(root_folder, 'src')
            bin_folder = os.path.join(root_folder, 'bin')
            backprop_folder = os.path.join(root_folder, 'backprop')

            os.makedirs(source_folder, exist_ok=True)
            os.makedirs(bin_folder, exist_ok=True)
            os.makedirs(backprop_folder, exist_ok=True)

            # 3. Generate HLS Files (Preserved)
            with open(f'{source_folder}/Makefile{i}', 'w') as f: f.write(generate_makefile_code(i))
            with open(f'{source_folder}/conv_tb{i}.cpp', 'w') as f: f.write(generate_testbench_code(i, params))
            with open(f'{source_folder}/conv_tb{i}.h', 'w') as f: f.write(generate_testbench_header_code(i))
            with open(f'{source_folder}/conv{i}.h', 'w') as f: f.write(generate_convh_code(i, params))
            with open(f'{source_folder}/conv{i}.cpp', 'w') as f: f.write(generate_conv_code(i, params))


            # 4. Generate the Vitis HLS Tcl Script
            print(f"Generating Vitis HLS Tcl script...")
            tcl_script_content = generate_vitis_tcl_script(i, params)
            with open(f'{source_folder}/run_hls.tcl', 'w') as f:f.write(tcl_script_content)

            # 4. Generate Backpropagation Files (NEW and FIXED)
            with open(f'{backprop_folder}/Makefile', 'w') as f: f.write(generate_backprop_makefile_code())
            with open(f'{backprop_folder}/backprop.h', 'w') as f: f.write(generate_backprop_header_code(i, params))
            with open(f'{backprop_folder}/backprop_main.cpp', 'w') as f: f.write(generate_backprop_main_code(i, params))
            with open(f'{backprop_folder}/backprop.cpp', 'w') as f: f.write(generate_backprop_cpp_code(i, params))

            display(Markdown(f'## ✅ Success! All files for **{i} {GLOBAL_DIM}D layers** generated.'))
            display(Markdown(f'HLS Inference code in `{source_folder}`. **x86 Training code in `{backprop_folder}`.**'))

            # Print a summary of the calculated parameters
            param_summary = [f"| Layer | Type | D (Dim) | C (In) | H (In) | M (Out) | R (K) | S (Str) | E (Out) | Activation |"]
            param_summary.append("|---|---|---|---|---|---|---|---|---|---|")

            H1_display = f"{params['H1']}x{params['H1']}" if GLOBAL_DIM == 2 else f"{params['H1']}"
            param_summary.append(f"| Input | Image | **{GLOBAL_DIM}** | {params['C1']} | {H1_display} | N/A | N/A | N/A | N/A | No |")

            for j in range(1, num_layers + 1):
                H_display = f"{params[f'H{j}']}x{params[f'H{j}']}" if GLOBAL_DIM == 2 else f"{params[f'H{j}']}"
                E_display = f"{params[f'E{j}']}x{params[f'E{j}']}" if GLOBAL_DIM == 2 else f"{params[f'E{j}']}"

                param_summary.append(f"| {j} | CONV | **{GLOBAL_DIM}** | {params[f'C{j}']} | {H_display} | {params[f'M{j}']} | {params[f'R{j}']} | {params[f'S{j}']} | {E_display} | **ReLU** |")

            input_size = params[f'M{num_layers}'] * params[f'E{num_layers}']
            if GLOBAL_DIM == 2:
                input_size *= params[f'E{num_layers}']

            param_summary.append(f"| N+1 | **FC** | N/A | {input_size} (flat) | N/A | **{params['N_CLASSES']}** | N/A | N/A | N/A | **Softmax** |")

            display(Markdown('### Calculated CNN Parameters\n' + '\n'.join(param_summary)))

            display(Markdown('### Execution Instructions (SRC/BIN/BACKPROP Structure)'))
            display(Markdown(
                f"1. **HLS Inference Target (SRC Folder):**\n"
                f"   * Navigate to the Source: `cd {source_folder}`\n"
                f"   * Compile: `make -f Makefile{i} conv{i}`\n"
                f"   * Run: Go to the parent directory: `cd ../` and execute: `./bin/conv{i}`\n\n"
                f"2. **x86 Training Target (BACKPROP Folder):**\n"
                f"   * Navigate to Backprop: `cd {backprop_folder}`\n"
                f"   * Compile (Use the correct target!): `make all`\n"
                f"   * Run: Go to the parent directory: `cd ../` and execute: `./bin/cnn_trainer`\n"
            ))

        except ValueError as e:
            display(Markdown(f'## ❌ Error: {e}'))

        except Exception as e:
            display(Markdown(f'## ❌ An unexpected error occurred: {e}'))


generate_button.on_click(on_button_click)

# FINAL DISPLAY: Display the components only once
display(layer_slider, code_controls_vbox, generate_button, output_area)



IntSlider(value=1, continuous_update=False, description='CNN Layers:', max=5, min=1, style=SliderStyle(descrip…

VBox(children=(VBox(children=(HTML(value='<h3>Global Configuration:</h3>'), Dropdown(description='GLOBAL DIMEN…

Button(button_style='success', description='Generate Code (HLS INF + x86 BKP)', style=ButtonStyle())

Output()