<a href="https://colab.research.google.com/github/vbonato/cnnTestBench/blob/main/cnnTestBench.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [23]:
import ipywidgets as widgets
from IPython.display import display, clear_output, Markdown
import os
import math

# --- 1. Core Code Generation Functions (HLS INFERENCE - SRC Folder) ---

def generate_vitis_tcl_script(num_layers, params):
    """Generates the final, robust Tcl script for Vitis HLS."""
    top_function_name = "cnn"
    proj_name = f"cnn_{num_layers}_layer_vitis_project"
    solution_name = "solution1"
    cpp_src = f"conv{num_layers}.cpp"
    cpp_hdr = f"conv{num_layers}.h"
    tb_src = f"conv_tb{num_layers}.cpp"
    target_part = "xczu7ev-ffvc1156-2-e"

    # --- MODIFIED: Dynamically add all FC weight/bias files ---
    data_files = ["test_windowed.dat"]
    for i in range(1, num_layers + 1): # Conv layers
        data_files.extend([f"W{i}.bin", f"B{i}.bin"])
    for i in range(1, params['NUM_FC_LAYERS'] + 1): # FC layers
        data_files.extend([f"W_fc{i}.bin", f"B_fc{i}.bin"])
    # --- END MODIFIED ---

    tcl_script = [
        '# --- Vitis HLS Automation Script ---',
        'set original_dir [pwd]',
        f'open_project -reset "{proj_name}"',
        f'set_top {top_function_name}',
        f'add_files "$original_dir/{cpp_src}"',
        f'add_files "$original_dir/{cpp_hdr}"',
        f'add_files -tb "$original_dir/{tb_src}"',
        f'open_solution -flow_target vitis "{solution_name}"',
        f'set_part {{{target_part}}}',
        'create_clock -period 10ns -name default',
        f'set sim_dir "{proj_name}/{solution_name}/csim/build"',
        'file mkdir $sim_dir',
    ]
    for df in data_files:
        tcl_script.append(f'file copy -force "$original_dir/{df}" $sim_dir')
    tcl_script.extend([
        'csim_design', 'csynth_design', 'cosim_design -trace_level all',
        'export_design -format ip_catalog', 'exit'
    ])
    return '\n'.join(tcl_script)


def generate_c_style_loader_function():
    """
    Generates a C-style function to load 32-bit floats and convert
    them to the fixed-point type_t (ap_fixed).
    """
    code = [
        '// --- WEIGHT LOADING & CONVERSION FUNCTION ---',
        '// Loads 32-bit floats from a binary file and converts them to type_t',
        'int load_and_convert_weights(const char* file_path, type_t* dest_buffer, size_t num_elements) {',
        '    FILE* fp = fopen(file_path, "rb");',
        '    if (!fp) {',
        '        printf("ERROR: Could not open file for reading: %s\\n", file_path);',
        '        return 0; // Failure',
        '    }',
        '',
        '    // Create a temporary buffer to hold the float data from the file',
        '    float* temp_buffer = (float*) malloc(num_elements * sizeof(float));',
        '    if (!temp_buffer) {',
        '        printf("ERROR: Could not allocate memory for temporary float buffer.\\n");',
        '        fclose(fp);',
        '        return 0; // Failure',
        '    }',
        '',
        '    // Read the entire block of floats',
        '    size_t elements_read = fread(temp_buffer, sizeof(float), num_elements, fp);',
        '    fclose(fp);',
        '',
        '    if (elements_read != num_elements) {',
        '        printf("ERROR: Expected to read %zu elements from %s, but got %zu.\\n", num_elements, file_path, elements_read);',
        '        free(temp_buffer);',
        '        return 0; // Failure',
        '    }',
        '',
        '    // Convert floats to type_t by direct assignment. The ap_fixed library handles this.',
        '    for (size_t i = 0; i < num_elements; ++i) {',
        '        dest_buffer[i] = temp_buffer[i];',
        '    }',
        '',
        '    free(temp_buffer);',
        '    printf("Successfully loaded and converted %zu elements from %s\\n", num_elements, file_path);',
        '    return 1; // Success',
        '}\n'
    ]
    return '\n'.join(code)


def generate_makefile_code(num_layers):
    """Generates the Makefile content for HLS compilation."""
    TARGET = f'conv{num_layers}'
    TB_OBJ = f'conv_tb{num_layers}.o'
    TB_SRC = f'conv_tb{num_layers}.cpp'
    TB_HDR = f'conv_tb{num_layers}.h'
    CNN_HDR = f'conv{num_layers}.h'
    CNN_SRC = f'conv{num_layers}.cpp'

    makefile_lines = [
        f'{TARGET}: check_dirs {TARGET}.o {TB_OBJ}',
        f'\tclang++ {TARGET}.o {TB_OBJ} -o ../bin/{TARGET} -lm\n',

        f'{TB_OBJ}: {TB_SRC} {TB_HDR} {CNN_HDR}',
        f'\tclang++ -c {TB_SRC} -o {TB_OBJ}\n',

        f'{TARGET}.o: {CNN_SRC} {CNN_HDR}',
        f'\tclang++ -c {CNN_SRC} -o {TARGET}.o\n',

        'check_dirs:',
        '\t@mkdir -p ../bin\n',

        '.PHONY: clean',
        'clean:',
        f'\trm -f {TARGET}.o {TB_OBJ}',
        f'\trm -f ../bin/{TARGET}',
        f'\trm -f ../bin/output.bin\n'
    ]
    return '\n'.join(makefile_lines)

def generate_testbench_header_code(num_layers):
    """Generates the testbench header C++ code (conv_tbX.h)."""
    code = f"#ifndef CONV_TB_H\n#define CONV_TB_H\n\n"
    code += f"// ** GLOBAL TESTBENCH CONSTANTS **\n"
    code += f"const int RANDROOF = 256; // Max value for randomized input/weights/bias\n"
    code += f"\n#endif"
    return code


def generate_testbench_code(num_layers, params):
    """
    Generates the complete C++ testbench code (conv_tb.cpp) for a fixed-point (ap_fixed) design.
    """
    N_CLASSES = params['N_CLASSES']
    GLOBAL_DIM = params['GLOBAL_DIM']

    # Correctly calculate the total number of features in the flattened window.
    INPUT_W = params['C1'] * params['H1']

    # This C-style function loads binary float weights and converts them to type_t.
    loader_code = generate_c_style_loader_function()

    # Define the robust, string-parsing data loader logic.
    data_loader_code = [
        '// --- DATA LOADING FUNCTION for Testbench ---',
        '#include <fstream>', '#include <sstream>', '#include <vector>',
        f'int load_test_data(const std::string& file_path, std::vector<float>& features, std::vector<int>& labels, int num_features_expected) {{',
        '    std::ifstream file(file_path);',
        '    if (!file.is_open()) {{ std::cerr << "ERROR: Could not open test data file: " << file_path << std::endl; return 0; }}',
        '    std::string line;',
        '    long num_samples = 0;',
        '    while (std::getline(file, line)) {',
        '        size_t features_start = line.find("{{") + 2;',
        '        size_t features_end = line.find("},");',
        '        size_t label_start = features_end + 2;',
        '        size_t label_end = line.rfind("}");',
        '        if (features_start == std::string::npos || features_end == std::string::npos || label_start == std::string::npos || label_end == std::string::npos) continue;',
        '        std::string features_str = line.substr(features_start, features_end - features_start);',
        '        std::string label_str = line.substr(label_start, label_end - label_start);',
        '        if (label_str.empty()) continue;',
        '        std::stringstream ss(features_str);',
        '        std::string feature_val_str;',
        '        int features_read = 0;',
        '        size_t initial_feature_size = features.size();',
        '        while (std::getline(ss, feature_val_str, \',\')) {',
        '            features.push_back(std::stof(feature_val_str));',
        '            features_read++;',
        '        }',
        '        if (features_read != num_features_expected) {',
        '            features.resize(initial_feature_size); // Roll back if feature count mismatches',
        '            continue;',
        '        }',
        '        labels.push_back(std::stoi(label_str));',
        '        num_samples++;',
        '    }',
        '    return num_samples;',
        '}\n'
    ]
    data_loader_code = '\n'.join(data_loader_code)

    # --- Start building the main C++ code ---
    code_lines = [
        '#include <cstdio>', '#include <cstdlib>', '#include <iostream>',
        '#include <cmath>', '#include <cstring>', '#include <vector>',
        f'#include "conv_tb{num_layers}.h"', f'#include "conv{num_layers}.h"\n',
        loader_code,
        data_loader_code,
        'int main(void) {'
    ]

    # --- 1. Memory Allocation ---
    H1_size = f"H1 * H1" if GLOBAL_DIM == 2 else f"H1"
    input_buffer_size_str = f"C1 * {H1_size}"
    code_lines.append(f' \tconst size_t INPUT_BUFFER_SIZE = {input_buffer_size_str};')
    code_lines.append(f' \tconst size_t FEATURES_PER_SAMPLE = {INPUT_W};')
    code_lines.append(f' \ttype_t *I1 = (type_t *) malloc(INPUT_BUFFER_SIZE * sizeof(type_t));')

    for i in range(1, num_layers + 1):
        R_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        code_lines.append(f' \ttype_t *W{i} = (type_t *) malloc(M{i} * C{i} * R{i}{R_size} * sizeof(type_t));')
        code_lines.append(f' \ttype_t *B{i} = (type_t *) malloc(M{i} * sizeof(type_t));')

    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        fc_w_size = params[f'FC{i}_IN'] * params[f'FC{i}_OUT']
        fc_b_size = params[f'FC{i}_OUT']
        code_lines.append(f' \ttype_t *W_fc{i} = (type_t *) malloc({fc_w_size} * sizeof(type_t));')
        code_lines.append(f' \ttype_t *B_fc{i} = (type_t *) malloc({fc_b_size} * sizeof(type_t));')

    code_lines.append(f' \tfloat *O_final = (float *) calloc(N_CLASSES, sizeof(float));\n')

    # --- 2. Load Trained Weights ---
    code_lines.append(' \t// --- Load and Convert Trained Weights ---')
    code_lines.append(' \tint all_weights_loaded = 1;')

    for i in range(1, num_layers + 1):
        R_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        W_size_str = f"M{i} * C{i} * R{i}{R_size}"
        code_lines.append(f' \tall_weights_loaded &= load_and_convert_weights("W{i}.bin", W{i}, {W_size_str});')
        code_lines.append(f' \tall_weights_loaded &= load_and_convert_weights("B{i}.bin", B{i}, M{i});')

    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        fc_w_size = params[f'FC{i}_IN'] * params[f'FC{i}_OUT']
        fc_b_size = params[f'FC{i}_OUT']
        code_lines.append(f' \tall_weights_loaded &= load_and_convert_weights("W_fc{i}.bin", W_fc{i}, {fc_w_size});')
        code_lines.append(f' \tall_weights_loaded &= load_and_convert_weights("B_fc{i}.bin", B_fc{i}, {fc_b_size});')

    code_lines.append(' \tif (!all_weights_loaded) { return EXIT_FAILURE; }\n')

    # --- 3. Load Real Test Data ---
    code_lines.extend([
        ' \t// --- Load Real Test Data ---',
        ' \tstd::vector<float> test_features;',
        ' \tstd::vector<int> test_labels;',
        f' \tint num_samples = load_test_data("test_windowed.dat", test_features, test_labels, {INPUT_W});',
        ' \tif (num_samples == 0) {',
        ' \t \tprintf("FATAL: No samples loaded from test.dat. Exiting.\\n");',
        ' \t \treturn EXIT_FAILURE;',
        ' \t}\n'
    ])

    code_lines.extend([
        ' \t// --- Normalize the Test Data Using Training Stats ---',
        ' \tconst double train_mean = 3.04044;',
        ' \tconst double train_std_dev = 7.01583;',
        ' \tprintf("Applying normalization (mean=%.3f, std_dev=%.3f) to %zu features...\\n", train_mean, train_std_dev, test_features.size());',
        ' \tfor (auto& val : test_features) {{',
        ' \t \tval = (val - train_mean) / train_std_dev;',
        ' \t}}',  # <-- TYPO REMOVED HERE
        ' \t'      # Adds a blank line for readability
    ])


    # --- 4. Run Inference Loop ---
    code_lines.extend([
        ' \t// --- Run Inference on All Test Samples ---',
        ' \tint correct_predictions = 0;',
        ' \tprintf("Running inference on %d test samples...\\n", num_samples);',
        ' \tfor (int i = 0; i < num_samples; ++i) {',
        ' \t \t// 1. Get a pointer to the start of the current sample\'s features',
        ' \t \tfloat* current_sample_features = &test_features[i * FEATURES_PER_SAMPLE];',
        ' \t \t// 2. Zero-out the entire input buffer to handle padding',
        ' \t \tmemset(I1, 0, INPUT_BUFFER_SIZE * sizeof(type_t));',
        ' \t \t// 3. Copy and convert the available features into the start of the buffer',
        ' \t \tfor (size_t j = 0; j < FEATURES_PER_SAMPLE; ++j) {',
        ' \t \t \tI1[j] = current_sample_features[j];',
        ' \t \t}',
        ''
    ])

    cnn_call = ' \t \t// 4. Perform CNN inference\n \t \tcnn(I1'
    for i in range(1, num_layers + 1):
        cnn_call += f', W{i}, B{i}'
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        cnn_call += f', W_fc{i}, B_fc{i}'
    cnn_call += ', O_final);'
    code_lines.append(cnn_call)

    code_lines.extend([
        '\n \t \t// 5. Find the predicted class',
        ' \t \tfloat max_val = O_final[0];',
        ' \t \tint predicted_class = 0;',
        f' \t \tfor (int k = 1; k < {N_CLASSES}; k++) {{',
        ' \t \t \tif (O_final[k] > max_val) {',
        ' \t \t \t \tmax_val = O_final[k];',
        ' \t \t \t \tpredicted_class = k;',
        ' \t \t \t}',
        ' \t \t}',
        ' \t \t// 6. Compare with the true label',
        ' \t \tint true_label = test_labels[i];',
        ' \t \tif (predicted_class == true_label) {',
        ' \t \t \tcorrect_predictions++;',
        ' \t \t}',
        ' \t}\n'
    ])

    # --- 5. Report Final Accuracy ---
    code_lines.extend([
        ' \t// --- Report Final Accuracy ---',
        ' \tfloat accuracy = (float)correct_predictions / num_samples * 100.0f;',
        ' \tprintf("\\n--- HLS Model Verification Result ---\\n");',
        ' \tprintf("Correctly Classified: %d / %d\\n", correct_predictions, num_samples);',
        ' \tprintf("Accuracy: %.2f%%\\n", accuracy);',
        ' \tprintf("-------------------------------------\\n");'
    ])

    # --- 6. Free Allocated Memory ---
    code_lines.append('\n \t// Free allocated memory')
    code_lines.append(' \tif(I1) free(I1);')
    for i in range(1, num_layers + 1):
        code_lines.append(f' \tif(W{i}) free(W{i});')
        code_lines.append(f' \tif(B{i}) free(B{i});')
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        code_lines.append(f' \tif(W_fc{i}) free(W_fc{i});')
        code_lines.append(f' \tif(B_fc{i}) free(B_fc{i});')
    code_lines.append(' \tif(O_final) free(O_final);')

    # --- 7. Return ---
    code_lines.extend([
        '\n \t// Return success code (0) for C-sim pass, or failure (1) if needed',
        ' \treturn EXIT_SUCCESS;',
        '}'
    ])

    return '\n'.join(code_lines)


def generate_conv_code(num_layers, params):
    """
    Generates the complete HLS C++ implementation code (conv.cpp) for the entire CNN,
    including CONV, optional MaxPool, and multiple FC layers.
    """
    GLOBAL_DIM = params['GLOBAL_DIM']

    code = [
        f'#include "conv{num_layers}.h"',
        '#include <math.h>',
        '#include <stdio.h>\n',
        '// --- ACTIVATION & OUTPUT FUNCTIONS ---',
        'type_t relu(type_t x) {',
        '    #pragma HLS INLINE',
        ' \treturn (x > 0) ? x : (type_t)0;',
        '}\n',
        'void softmax(type_t input[N_CLASSES], float output[N_CLASSES]) {',
        ' \tfloat sum = 0.0f;',
        ' \tfloat max_val = (float)input[0];',
        ' \tfor (int k = 1; k < N_CLASSES; k++) {',
        ' \t\tif ((float)input[k] > max_val) max_val = (float)input[k];',
        ' \t}',
        ' \tfor (int k = 0; k < N_CLASSES; k++) {',
        ' \t\toutput[k] = expf((float)input[k] - max_val);',
        ' \t\tsum += output[k];',
        ' \t}',
        ' \tfor (int k = 0; k < N_CLASSES; k++) {',
        ' \t\toutput[k] /= sum;',
        ' \t}',
        '}\n'
    ]

    # --- 1. GENERATE SPECIALIZED CONV FUNCTIONS ---
    for i in range(1, num_layers + 1):
        # Only generate the 1D function if GLOBAL_DIM is 1
        code.append(f"\n#if GLOBAL_DIM == 1")
        code.extend([
            f"// ** Specialized 1D Convolution Layer {i} **",
            f"void conv_1d_{i}(type_t I[C{i} * H{i}], type_t W[M{i} * C{i} * R{i}], type_t B[M{i}], type_t O[M{i} * E{i}]) {{",
            f" \tfor(int m = 0; m < M{i}; m++) {{",
            f" \t\tfor(int x = 0; x < E{i}; x++) {{",
            f" \t\t\tO[x + m * E{i}] = B[m];",
            f" \t\t\t#pragma HLS PIPELINE II=1",
            f" \t\t\tfor(int c = 0; c < C{i}; c++) {{",
            f" \t\t\t\tfor(int l = 0; l < R{i}; l++) {{",
            f" \t\t\t\t\tint h2 = x * S{i} - PAD{i} + l;",
            f" \t\t\t\t\ttype_t val = (h2 < 0 || h2 >= H{i}) ? (type_t)0 : I[h2 + c * H{i}];",
            f" \t\t\t\t\tO[x + m * E{i}] += val * W[l + c * R{i} + m * C{i} * R{i}];",
            f" \t\t\t\t}}",
            f" \t\t\t}}",
            f" \t\t}}",
            f" \t}}",
            f"}}"
        ])
        code.append(f"#endif // GLOBAL_DIM == 1\n")

        # Only generate the 2D function if GLOBAL_DIM is 2
        code.append(f"\n#if GLOBAL_DIM == 2")
        code.extend([
            f"// ** Specialized 2D Convolution Layer {i} **",
            f"void conv_2d_{i}(type_t I[C{i} * H{i} * H{i}], type_t W[M{i} * C{i} * R{i} * R{i}], type_t B[M{i}], type_t O[M{i} * E{i} * E{i}]) {{",
            f" \tfor(int m = 0; m < M{i}; m++) {{",
            f" \t\tfor(int y = 0; y < E{i}; y++) {{",
            f" \t\t\tfor(int x = 0; x < E{i}; x++) {{",
            f" \t\t\t\tO[x + (y + (m * E{i})) * E{i}] = B[m];",
            f" \t\t\t\t#pragma HLS PIPELINE II=1",
            f" \t\t\t\tfor(int c = 0; c < C{i}; c++) {{",
            f" \t\t\t\t\tfor(int k = 0; k < R{i}; k++) {{",
            f" \t\t\t\t\t\tfor(int l = 0; l < R{i}; l++) {{",
            f" \t\t\t\t\t\t\tint h1 = y * S{i} - PAD{i} + k;",
            f" \t\t\t\t\t\t\tint h2 = x * S{i} - PAD{i} + l;",
            f" \t\t\t\t\t\t\ttype_t val = (h1 < 0 || h1 >= H{i} || h2 < 0 || h2 >= H{i}) ? (type_t)0 : I[h2 + (h1 + (c * H{i})) * H{i}];",
            f" \t\t\t\t\t\t\tO[x + (y + (m * E{i})) * E{i}] += val * W[l + (k + (c + (m * C{i})) * R{i}) * R{i}];",
            f" \t\t\t\t\t\t}}",
            f" \t\t\t\t\t}}",
            f" \t\t\t\t}}",
            f" \t\t\t}}",
            f" \t\t}}",
            f" \t}}",
            f"}}"
        ])
        code.append(f"#endif // GLOBAL_DIM == 2\n")


    # --- 2. GENERATE SPECIALIZED MAXPOOL FUNCTIONS ---
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False):
            pool_out_1d_size = f"M{i}*L{i}_POOL_E"
            code.extend([
                f"// ** Specialized 1D MaxPool for Layer {i} **",
                f"void maxpool_1d_{i}(type_t I[M{i}*E{i}], type_t O[{pool_out_1d_size}]) {{",
                f" \tfor (int m = 0; m < M{i}; ++m) {{",
                f" \t\tfor (int x_out = 0; x_out < L{i}_POOL_E; ++x_out) {{",
                f" \t\t\ttype_t max_val = -32768;",
                f" \t\t\t#pragma HLS PIPELINE II=1",
                f" \t\t\tfor (int k = 0; k < L{i}_POOL_K; ++k) {{",
                f" \t\t\t\tint h_in = x_out * L{i}_POOL_S + k;",
                f" \t\t\t\tif (I[h_in + m * E{i}] > max_val) {{",
                f" \t\t\t\t\tmax_val = I[h_in + m * E{i}];",
                f" \t\t\t\t}}",
                f" \t\t\t}}",
                f" \t\t\tO[x_out + m * L{i}_POOL_E] = max_val;",
                f" \t\t}}",
                f" \t}}",
                f"}}\n"
            ])
            # (2D maxpool generation would go here)

    # --- 3. GENERATE SPECIALIZED FC FUNCTIONS ---
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        code.extend([
            f"// ** Specialized Fully Connected Layer {i} **",
            f"void fc_layer_{i}(type_t input[FC{i}_IN_SIZE], type_t W[FC{i}_IN_SIZE * FC{i}_OUT_SIZE], type_t B[FC{i}_OUT_SIZE], type_t output[FC{i}_OUT_SIZE]) {{",
            f" \tfor (int k = 0; k < FC{i}_OUT_SIZE; k++) {{",
            f" \t\toutput[k] = B[k];",
            f" \t\tfor (int j = 0; j < FC{i}_IN_SIZE; j++) {{",
            f" \t\t\t#pragma HLS PIPELINE II=1",
            f" \t\t\toutput[k] += input[j] * W[j + k * FC{i}_IN_SIZE];",
            f" \t\t}}",
            f" \t}}",
            f"}}\n"
        ])

    # --- 4. GENERATE WRAPPER CNN FUNCTION (HLS DATAFLOW) ---
    cnn_proto = "// ** Wrapper CNN Function (HLS Inference) **\nvoid cnn(type_t *input"
    for i in range(1, num_layers + 1): cnn_proto += f", type_t *W{i}, type_t *B{i}"
    for i in range(1, params['NUM_FC_LAYERS'] + 1): cnn_proto += f", type_t *W_fc{i}, type_t *B_fc{i}"
    cnn_proto += ", float *output) {"
    code.append(cnn_proto)
    code.append("//#pragma HLS DATAFLOW\n")

    # Declare all intermediate buffers
    for i in range(1, num_layers + 1):
        conv_out_size = f"M{i} * E{i}" + (f" * E{i}" if GLOBAL_DIM == 2 else "")
        code.append(f" \tstatic type_t O{i}_conv_relu[{conv_out_size}];")
        if params.get(f'L{i}_POOL_ON', False):
            pool_out_size = f"M{i} * L{i}_POOL_E" + (f" * L{i}_POOL_E" if GLOBAL_DIM == 2 else "")
            code.append(f" \tstatic type_t O{i}_pool[{pool_out_size}];")

    code.append(f" \tstatic type_t O_conv_flat[CONV_FLAT_SIZE];")
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        code.append(f" \tstatic type_t O_fc{i}_raw[FC{i}_OUT_SIZE];")
        if i < params['NUM_FC_LAYERS']:
             code.append(f" \tstatic type_t O_fc{i}_relu[FC{i}_OUT_SIZE];")
    code.append("")

    # --- Chain all layers together ---
    for i in range(1, num_layers + 1):
        prev_stage_output = "input"
        if i > 1:
            prev_stage_output = f"O{i-1}_pool" if params.get(f'L{i-1}_POOL_ON', False) else f"O{i-1}_conv_relu"

        code.append(f"\n \t// --- Stage {i}: CONV -> ReLU -> Optional-MaxPool ---")
        code.append(f" #if GLOBAL_DIM == 1")
        code.append(f" \tconv_1d_{i}((type_t*){prev_stage_output}, W{i}, B{i}, O{i}_conv_relu);")
        code.append(f" #elif GLOBAL_DIM == 2")
        code.append(f" \tconv_2d_{i}((type_t*){prev_stage_output}, W{i}, B{i}, O{i}_conv_relu);")
        code.append(f" #endif")

        conv_out_size = params[f'M{i}'] * params[f'E{i}'] * (params[f'E{i}'] if GLOBAL_DIM == 2 else 1)
        code.append(f"\tfor (int j = 0; j < {conv_out_size}; j++) O{i}_conv_relu[j] = relu(O{i}_conv_relu[j]);")

        if params.get(f'L{i}_POOL_ON', False):
            code.append(f" #if GLOBAL_DIM == 1")
            code.append(f" \tmaxpool_1d_{i}(O{i}_conv_relu, O{i}_pool);")
            code.append(f" #elif GLOBAL_DIM == 2")
            code.append(f" \tmaxpool_2d_{i}(O{i}_conv_relu, O{i}_pool);")
            code.append(f" #endif")

    last_stage_out = f"O{num_layers}_pool" if params.get(f'L{num_layers}_POOL_ON', False) else f"O{num_layers}_conv_relu"
    code.append(f"\n \t// --- Flatten Stage ---")
    code.append(f"\tfor (int j = 0; j < CONV_FLAT_SIZE; j++) O_conv_flat[j] = ((type_t*){last_stage_out})[j];")

    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        input_name = "O_conv_flat" if i == 1 else f"O_fc{i-1}_relu"
        is_last_fc_layer = (i == params['NUM_FC_LAYERS'])
        activation_str = "ReLU" if not is_last_fc_layer else "None"
        code.append(f"\n \t// --- FC Layer {i} (Activation: {activation_str}) ---")
        code.append(f" \tfc_layer_{i}({input_name}, W_fc{i}, B_fc{i}, O_fc{i}_raw);")

        if not is_last_fc_layer:
             code.append(f"\tfor (int j = 0; j < FC{i}_OUT_SIZE; j++) O_fc{i}_relu[j] = relu(O_fc{i}_raw[j]);")

    last_fc_out_buffer = f"O_fc{params['NUM_FC_LAYERS']}_raw"
    code.extend([
        f"\n \t// --- Final Layer: Softmax ---",
        f" \tsoftmax({last_fc_out_buffer}, output);"
    ])

    code.append("}")
    return '\n'.join(code)


def generate_convh_code(num_layers, params):
    """
    Generates the complete HLS C++ header (conv.h) with all constants and
    prototypes for CONV, optional MaxPool, and FC layers.
    """
    GLOBAL_DIM = params['GLOBAL_DIM']
    code = [
        "#ifndef CONV_H\n#define CONV_H\n",
        "#include <cstddef>",
        "#include <stdio.h>"
    ]
    if params['DATA_TYPE'] == 'fixed':
        code.extend([
            '#include "ap_fixed.h"',
            '// Using fixed-point for HLS synthesis',
            'typedef ap_fixed<32, 16> type_t;'
        ])
    else:  # Default to float
        code.extend([
            '// Using floating-point for C-simulation verification',
            'typedef float type_t;'
        ])

    # --- 1. Global Parameters ---
    code.append("// ** GLOBAL PARAMETERS **")
    code.append(f"const size_t N_CLASSES = {params['N_CLASSES']};")
    code.append(f"#define GLOBAL_DIM {GLOBAL_DIM}\n")

    # --- 2. CONV Layer Parameters ---
    for i in range(1, num_layers + 1):
        code.append(f"// ** CONV LAYER {i} **")
        code.append(f"const size_t C{i} = {params[f'C{i}']};")
        code.append(f"const size_t H{i} = {params[f'H{i}']};")
        code.append(f"const size_t M{i} = {params[f'M{i}']};")
        code.append(f"const size_t R{i} = {params[f'R{i}']};")
        code.append(f"const size_t S{i} = {params[f'S{i}']};")
        code.append(f"const size_t E{i} = {params[f'E{i}']};")
        code.append(f"const size_t PAD{i} = {params[f'PAD{i}']};\n")

    # --- 3. MaxPool Layer Parameters ---
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False):
            code.append(f"// ** MAXPOOL for LAYER {i} **")
            code.append(f"const size_t L{i}_POOL_K = {params[f'L{i}_POOL_K']};")
            code.append(f"const size_t L{i}_POOL_S = {params[f'L{i}_POOL_S']};")
            code.append(f"const size_t L{i}_POOL_E = {params[f'L{i}_POOL_E']};\n")

    # --- 4. Flatten and FC Layer Parameters ---
    code.append("// ** FLATTEN & FC SIZES **")
    code.append(f"const size_t CONV_FLAT_SIZE = {params['CONV_FLAT_SIZE']};\n")
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        code.append(f"// ** FC LAYER {i} **")
        code.append(f"const size_t FC{i}_IN_SIZE = {params[f'FC{i}_IN']};")
        code.append(f"const size_t FC{i}_OUT_SIZE = {params[f'FC{i}_OUT']};\n")

    # --- 5. Function Prototypes ---
    code.append("// ** FUNCTION PROTOTYPES **")
    # CONV Prototypes
    for i in range(1, num_layers + 1):
        code.append(f"void conv_1d_{i}(type_t I[C{i}*H{i}], type_t W[M{i}*C{i}*R{i}], type_t B[M{i}], type_t O[M{i}*E{i}]);")
        code.append(f"void conv_2d_{i}(type_t I[C{i}*H{i}*H{i}], type_t W[M{i}*C{i}*R{i}*R{i}], type_t B[M{i}], type_t O[M{i}*E{i}*E{i}]);")

    # MaxPool Prototypes
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False):
            pool_out_1d = f"M{i}*L{i}_POOL_E"
            pool_out_2d = f"M{i}*L{i}_POOL_E*L{i}_POOL_E"
            code.append(f"void maxpool_1d_{i}(type_t I[M{i}*E{i}], type_t O[{pool_out_1d}]);")
            code.append(f"void maxpool_2d_{i}(type_t I[M{i}*E{i}*E{i}], type_t O[{pool_out_2d}]);")

    # FC Prototypes
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        code.append(f"void fc_layer_{i}(type_t input[FC{i}_IN_SIZE], type_t W[FC{i}_IN_SIZE * FC{i}_OUT_SIZE], type_t B[FC{i}_OUT_SIZE], type_t output[FC{i}_OUT_SIZE]);")

    # Softmax and Main CNN Prototypes
    code.append(f"void softmax(type_t input[N_CLASSES], float output[N_CLASSES]);")

    cnn_proto = "\nvoid cnn(type_t *input"
    for i in range(1, num_layers + 1):
        cnn_proto += f", type_t *W{i}, type_t *B{i}"
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        cnn_proto += f", type_t *W_fc{i}, type_t *B_fc{i}"
    cnn_proto += ", float *output);"
    code.append(cnn_proto)

    code.append("\n#endif // CONV_H")
    return '\n'.join(code)



# --- 2. BACKPROPAGATION TRAINING CODE (X86 CPU - BACKPROP Folder) ---

def generate_vector_norm_function():
    """Generates a C++ function to calculate the L2 Norm (magnitude) of an array."""
    code = [
        '// --- DIAGNOSTIC HELPER FUNCTION ---',
        '#include <numeric> // For std::inner_product',
        '',
        '// Calculates the L2 Norm (magnitude) of a vector of floats.',
        'dtype_t calculate_l2_norm(const dtype_t* data, size_t size) {',
        '    dtype_t sum_sq = 0.0f;',
        '    for (size_t i = 0; i < size; ++i) {',
        '        sum_sq += data[i] * data[i];',
        '    }',
        '    return sqrt(sum_sq);',
        '}\n'
    ]
    return '\n'.join(code)


def generate_normalization_function():
    """
    Generates C++ functions to correctly handle data normalization,
    now with the 'sum' variable correctly declared.
    """
    code = [
        '// --- DATA NORMALIZATION FUNCTIONS ---',
        '#include <numeric> // For std::accumulate',
        '',
        '// 1. Calculates mean and stddev from a given dataset (should be TRAINING data)',
        'void calculate_stats(const std::vector<dtype_t>& features, double& mean, double& std_dev) {',
        ' \tif (features.empty()) {',
        ' \t \tmean = 0.0;',
        ' \t \tstd_dev = 1.0;',
        ' \t \treturn;',
        ' \t}',
        '',
        ' \t// --- THIS IS THE FIX ---',
        ' \tdouble sum = 0.0;',
        ' \t// --- END FIX ---',
        '',
        ' \tsum = std::accumulate(features.begin(), features.end(), 0.0);',
        ' \tmean = sum / features.size();',
        ' \tdouble sq_sum = 0.0;',
        ' \tfor(const auto& val : features) {',
        ' \t \tsq_sum += (val - mean) * (val - mean);',
        ' \t}',
        ' \tstd_dev = std::sqrt(sq_sum / features.size());',
        ' \tstd::cout << "Calculated Stats from Training Data: Mean=" << mean << ", StdDev=" << std_dev << std::endl;',
        '}',
        '',
        '// 2. Applies pre-calculated mean and stddev to a dataset',
        'void apply_normalization(std::vector<dtype_t>& features, const double mean, const double std_dev) {',
        ' \tif (features.empty()) return;',
        ' \tif (std_dev > 1e-6) {',
        ' \t \tfor(auto& val : features) {',
        ' \t \t \tval = (val - mean) / std_dev;',
        ' \t \t}',
        ' \t} else {',
        ' \t \tstd::cout << "Warning: Standard deviation is near zero. Skipping normalization." << std::endl;',
        ' \t}',
        '}\n'
    ]
    return '\n'.join(code)

def generate_calibration_code(num_layers, params):
    """Generates C++ code to find the maximum absolute weight value across ALL layers."""
    code = [
        '\n \t// --- CALIBRATION: Find max absolute weight value ---',
        ' \tfloat max_abs_value = 0.0f;',
        ' \t// Helper lambda to check an array',
        ' \tauto find_max = [&](const dtype_t* arr, size_t size) {',
        ' \t \tfor (size_t i = 0; i < size; ++i) {',
        ' \t \t \tif (fabs(arr[i]) > max_abs_value) {',
        ' \t \t \t \tmax_abs_value = fabs(arr[i]);',
        ' \t \t \t}',
        ' \t \t}',
        ' \t};'
    ]
    # Check all convolutional weight and bias arrays
    for i in range(1, num_layers + 1):
        W_size_str = f"M{i} * C{i} * R{i}" + (f" * R{i}" if params['GLOBAL_DIM'] == 2 else "")
        code.append(f' \tfind_max(W{i}, {W_size_str});')
        code.append(f' \tfind_max(B{i}, M{i});')

    # Check all fully connected weight and bias arrays
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        fc_w_size = params[f'FC{i}_IN'] * params[f'FC{i}_OUT']
        fc_b_size = params[f'FC{i}_OUT']
        code.append(f' \tfind_max(W_fc{i}, {fc_w_size});')
        code.append(f' \tfind_max(B_fc{i}, {fc_b_size});')

    code.append(' \tcout << "CALIBRATION_INFO: Maximum absolute weight value is: " << max_abs_value << endl;')
    return '\n'.join(code)


def generate_save_weights_function():
    """Generates the C++ function to save a weight/bias array to a binary file."""
    save_func_code = [
        '// --- WEIGHT SAVING FUNCTION ---',
        '#include <fstream>',
        '',
        'void save_weights(const std::string& file_path, const dtype_t* data, size_t num_elements) {',
        '    std::ofstream out_file(file_path, std::ios::binary);',
        '    if (!out_file.is_open()) {',
        '        std::cerr << "ERROR: Could not open file for writing: " << file_path << std::endl;',
        '        return;',
        '    }',
        '    // Write the raw bytes of the array to the file',
        '    out_file.write(reinterpret_cast<const char*>(data), num_elements * sizeof(dtype_t));',
        '    out_file.close();',
        '    std::cout << "Saved " << num_elements << " elements to " << file_path << std::endl;',
        '}\n'
    ]
    return '\n'.join(save_func_code)

def generate_backprop_makefile_code():
    """Generates the Makefile for x86 training, using mandatory tabs."""
    makefile_lines = [
        'TARGET = cnn_trainer',
        'SRC = backprop_main.cpp backprop.cpp',
        '\n.PHONY: all $(TARGET) clean', # Added .PHONY and removed $(TARGET) from all dependencies
        'all:',
        # NOTE: This line MUST start with a hard TAB in the generated file.
        '\tclang++ -O3 -Wall -std=c++11 $(SRC) -o ../bin/$(TARGET) -lm\n',

        'clean:',
        # NOTE: This line MUST start with a hard TAB in the generated file.
        '\trm -f ../bin/$(TARGET)\n'
    ]
    return '\n'.join(makefile_lines)


def generate_backprop_header_code(num_layers, params):
    """
    Generates the complete backprop.h header with fully dynamic function prototypes
    for CONV, FC, and optional MaxPool layers.
    """
    GLOBAL_DIM = params['GLOBAL_DIM']
    code = [
        "#ifndef BACKPROP_H\n#define BACKPROP_H\n",
        "#include <cstddef>",
        "#include <cmath>",
        "#include <iostream>\n",
        "typedef float dtype_t;\n"
    ]

    # --- 1. DEFINE ALL SIZE CONSTANTS ---
    code.append("// --- GLOBAL & CONV CONSTANTS ---")
    code.append(f"const size_t N_CLASSES = {params['N_CLASSES']};")
    code.append(f"const size_t GLOBAL_DIM = {GLOBAL_DIM};\n")
    for i in range(1, num_layers + 1):
        code.append(f"// Layer {i}")
        code.append(f"const size_t C{i} = {params[f'C{i}']}; const size_t H{i} = {params[f'H{i}']};")
        code.append(f"const size_t M{i} = {params[f'M{i}']}; const size_t E{i} = {params[f'E{i}']};")
        code.append(f"const size_t R{i} = {params[f'R{i}']}; const size_t S{i} = {params[f'S{i}']}; const size_t PAD{i} = {params[f'PAD{i}']};")
        E_size = f" * E{i}" if GLOBAL_DIM == 2 else ""
        code.append(f"const size_t L{i}_FMAP_SIZE = M{i} * E{i}{E_size};\n")

    code.append("// --- MAXPOOL CONSTANTS ---")
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False):
            pool_out_size = f"M{i} * {params[f'L{i}_POOL_E']}" + (f" * {params[f'L{i}_POOL_E']}" if GLOBAL_DIM == 2 else "")
            code.append(f"const size_t L{i}_POOL_K = {params[f'L{i}_POOL_K']};")
            code.append(f"const size_t L{i}_POOL_S = {params[f'L{i}_POOL_S']};")
            code.append(f"const size_t L{i}_POOL_E = {params[f'L{i}_POOL_E']};")
            code.append(f"const size_t L{i}_POOL_FMAP_SIZE = {pool_out_size};\n")

    code.append("// --- FLATTEN & FC CONSTANTS ---")
    code.append(f"const size_t CONV_FLAT_SIZE = {params['CONV_FLAT_SIZE']};")
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        code.append(f"const size_t FC{i}_IN_SIZE = {params[f'FC{i}_IN']};")
        code.append(f"const size_t FC{i}_OUT_SIZE = {params[f'FC{i}_OUT']};\n")

    # --- 2. DEFINE DYNAMIC FUNCTION PROTOTYPES ---
    code.append("// --- MAIN FUNCTION PROTOTYPES ---")

    # --- cnn_fwd prototype ---
    fwd_proto = "void cnn_fwd(const dtype_t I[CONV_FLAT_SIZE]"
    # Conv Layers
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        fwd_proto += f",\n                const dtype_t W{i}[M{i}*C{i}*R{i}{W_size}], const dtype_t B{i}[M{i}], dtype_t O{i}[L{i}_FMAP_SIZE]"
        if params.get(f'L{i}_POOL_ON', False):
            fwd_proto += f", dtype_t O{i}_pool[L{i}_POOL_FMAP_SIZE]"
    # FC Layers
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        fwd_proto += f",\n                const dtype_t W_fc{i}[FC{i}_IN_SIZE*FC{i}_OUT_SIZE], const dtype_t B_fc{i}[FC{i}_OUT_SIZE], dtype_t O_fc{i}[FC{i}_OUT_SIZE]"
    # MaxPool Indices (Output)
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False):
             fwd_proto += f",\n                int max_indices{i}[L{i}_POOL_FMAP_SIZE]"
    fwd_proto += ");\n"
    code.append(fwd_proto)

    # --- cnn_bwd prototype ---
    bwd_proto = "void cnn_bwd(const dtype_t I[CONV_FLAT_SIZE], const size_t LABEL[1]"
    # Fwd activations
    for i in range(1, num_layers + 1):
        bwd_proto += f",\n                const dtype_t O{i}[L{i}_FMAP_SIZE]"
        if params.get(f'L{i}_POOL_ON', False):
            bwd_proto += f", const dtype_t O{i}_pool[L{i}_POOL_FMAP_SIZE]"
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        bwd_proto += f",\n                const dtype_t O_fc{i}[FC{i}_OUT_SIZE]"
    # Weights and Gradients
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        bwd_proto += f",\n                const dtype_t W{i}[M{i}*C{i}*R{i}{W_size}], dtype_t dW{i}[M{i}*C{i}*R{i}{W_size}], dtype_t dB{i}[M{i}]"
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        bwd_proto += f",\n                const dtype_t W_fc{i}[FC{i}_IN_SIZE*FC{i}_OUT_SIZE], dtype_t dW_fc{i}[FC{i}_IN_SIZE*FC{i}_OUT_SIZE], dtype_t dB_fc{i}[FC{i}_OUT_SIZE]"
    # Intermediate Gradients
    bwd_proto += f",\n                dtype_t dI_fc1[CONV_FLAT_SIZE], dtype_t dI1[C1*H1*(GLOBAL_DIM==2?H1:1)]"
    for i in range(num_layers, 1, -1): bwd_proto += f", dtype_t dI{i}[L{i-1}_FMAP_SIZE]"
    for i in range(params['NUM_FC_LAYERS'], 1, -1): bwd_proto += f", dtype_t dI_fc{i}[FC{i-1}_OUT_SIZE]"
    # MaxPool Indices (Input)
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False):
             bwd_proto += f",\n                const int max_indices{i}[L{i}_POOL_FMAP_SIZE]"
    bwd_proto += ");\n"
    code.append(bwd_proto)

    # --- 3. OTHER HELPER PROTOTYPES ---
    code.append("\n// --- HELPER FUNCTION PROTOTYPES ---")
    code.append("void adam_update(dtype_t* W, const dtype_t* dW, dtype_t* m, dtype_t* v, size_t size, dtype_t lr, dtype_t beta1, dtype_t beta2, dtype_t epsilon, size_t t);")
    code.append("void cross_entropy_softmax_bwd(size_t num_classes, const dtype_t* scores, const size_t* labels, dtype_t* dO_raw);")
    code.append("void relu_bwd(size_t size, const dtype_t* O_store, const dtype_t* dO, dtype_t* dI);")
    code.append("void maxpool_1d_bwd(const dtype_t* dO, const int* max_indices, dtype_t* dI, size_t in_fmap_size, size_t out_fmap_size);")

    code.append("\n#endif // BACKPROP_H")
    return '\n'.join(code)


def generate_load_data_function(N_CLASSES, INPUT_W):
    """
    Generates a C++ function to load data from a .dat text file
    with the format: {{features...},label}
    """
    code = [
        '// --- DATA LOADING FUNCTION ---',
        '#include <fstream>', '#include <sstream>', '#include <vector>',
        'long load_data(const std::string& file_path, std::vector<dtype_t>& X_out, std::vector<dtype_t>& Y_out_one_hot, std::vector<size_t>& Y_out_raw, int N_CLASSES, int num_features) {',
        '    std::ifstream file(file_path);',
        '    if (!file.is_open()) { std::cerr << "ERROR: Could not open file: " << file_path << std::endl; return 0; }',
        '    std::string line;',
        '    long num_samples = 0;',
        '    while (std::getline(file, line)) {',
        '        size_t features_start = line.find("{{") + 2;',
        '        size_t features_end = line.find("},");',
        '        size_t label_start = features_end + 2;',
        '        size_t label_end = line.rfind("}");',
        '        if (features_start == std::string::npos || features_end == std::string::npos || label_start == std::string::npos || label_end == std::string::npos) continue;',
        '        std::string features_str = line.substr(features_start, features_end - features_start);',
        '        std::string label_str = line.substr(label_start, label_end - label_start);',

        # --- THIS IS THE FIX ---
        # The C++ comment is now a proper Python comment.
        '        if (label_str.empty()) continue;',

        '        std::stringstream ss(features_str);',
        '        std::string feature_val_str;',
        '        std::vector<dtype_t> current_features;',
        '        while (std::getline(ss, feature_val_str, \',\')) { current_features.push_back(std::stof(feature_val_str)); }',
        '        if (current_features.size() != num_features) {',
        '            std::cerr << "WARNING: Mismatch in feature count. Expected " << num_features << ", got " << current_features.size() << ". Skipping sample." << std::endl;',
        '            continue;',
        '        }',
        '        X_out.insert(X_out.end(), current_features.begin(), current_features.end());',
        '        int label = std::stoi(label_str);',
        '        Y_out_raw.push_back(label);',
        '        std::vector<dtype_t> one_hot(N_CLASSES, 0.0f);',
        '        if (label >= 0 && label < N_CLASSES) { one_hot[label] = 1.0f; }',
        '        Y_out_one_hot.insert(Y_out_one_hot.end(), one_hot.begin(), one_hot.end());',
        '        num_samples++;',
        '    }',
        '    std::cout << "Successfully loaded " << num_samples << " samples from " << file_path << std::endl;',
        '    return num_samples;',
        '}'
    ]
    return code


def generate_backprop_cpp_code(num_layers, params):
    """
    Generates the complete backprop.cpp with full implementations for all layer types
    and the main cnn_fwd and cnn_bwd wrapper functions.
    """
    GLOBAL_DIM = params['GLOBAL_DIM']
    code = ['#include "backprop.h"\n']

    # --- 1. REUSABLE HELPER & LAYER IMPLEMENTATIONS ---
    code.extend([
        '// --- UTILITY & ACTIVATION FUNCTIONS ---',
        'void cross_entropy_softmax_bwd(size_t num_classes, const dtype_t* scores, const size_t* labels, dtype_t* dO_raw) {',
        ' \tdtype_t max_score = scores[0];',
        ' \tfor (size_t k = 1; k < num_classes; ++k) { if (scores[k] > max_score) max_score = scores[k]; }',
        ' \tdtype_t exp_sum = 0.0f;',
        ' \tfor (size_t k = 0; k < num_classes; ++k) { exp_sum += expf(scores[k] - max_score); }',
        ' \tfor (size_t k = 0; k < num_classes; ++k) {',
        ' \t \tdO_raw[k] = expf(scores[k] - max_score) / exp_sum;',
        ' \t \tif (k == labels[0]) { dO_raw[k] -= 1.0f; }',
        ' \t}',
        '}',
        'void relu_fwd(size_t size, dtype_t* x) { for (size_t i = 0; i < size; ++i) { x[i] = (x[i] > 0.0f) ? x[i] : 0.0f; }}',
        'void relu_bwd(size_t size, const dtype_t* O_store, const dtype_t* dO, dtype_t* dI) { for (size_t i = 0; i < size; ++i) { dI[i] = (O_store[i] > 0.0f) ? dO[i] : 0.0f; }}\n',

        '// --- FULLY CONNECTED (FC) LAYER ---',
        'void fc_layer_fwd(const dtype_t* input, const dtype_t* W, const dtype_t* B, dtype_t* output, size_t in_size, size_t out_size) {',
        ' \tfor (size_t k = 0; k < out_size; k++) {',
        ' \t \toutput[k] = B[k];',
        ' \t \tfor (size_t i = 0; i < in_size; i++) { output[k] += input[i] * W[i + k * in_size]; }',
        ' \t}',
        '}',
        'void fc_layer_bwd(const dtype_t* input, const dtype_t* W, const dtype_t* dO, dtype_t* dI, dtype_t* dW, dtype_t* dB, size_t in_size, size_t out_size) {',
        ' \tfor (size_t k = 0; k < out_size; ++k) { dB[k] += dO[k]; }',
        ' \tfor (size_t k = 0; k < out_size; ++k) { for (size_t i = 0; i < in_size; ++i) { dW[i + k * in_size] += input[i] * dO[k]; }}',
        ' \tfor (size_t i = 0; i < in_size; ++i) { dI[i] = 0.0f; }',
        ' \tfor (size_t k = 0; k < out_size; ++k) { for (size_t i = 0; i < in_size; ++i) { dI[i] += W[i + k * in_size] * dO[k]; }}',
        '}\n',

        '// --- CONVOLUTIONAL (CONV) LAYER ---',
        'void conv_1d_fwd(size_t H, size_t C, size_t R, size_t M, size_t S, size_t PAD, const dtype_t* I, const dtype_t* W, const dtype_t* B, dtype_t* O) {',
        ' \tsize_t E = (H - R + 2 * PAD) / S + 1;',
        ' \tfor(size_t m = 0; m < M; m++) { for(size_t x = 0; x < E; x++) {',
        ' \t \tO[x + m * E] = B[m];',
        ' \t \tfor(size_t c = 0; c < C; c++) { for(size_t l = 0; l < R; l++) {',
        ' \t \t \tlong h2 = (long)x * S - PAD + l;',
        ' \t \t \tif (h2 >= 0 && h2 < H) { O[x + m * E] += I[h2 + c * H] * W[l + c * R + m * C * R]; }',
        ' \t \t}}',
        ' \t}}',
        '}',
        'void conv_1d_bwd(size_t H, size_t C, size_t R, size_t M, size_t S, size_t PAD, const dtype_t* I, const dtype_t* dO, const dtype_t* W, dtype_t* dI, dtype_t* dW, dtype_t* dB) {',
        ' \tsize_t E = (H - R + 2 * PAD) / S + 1;',
        ' \tfor (size_t m = 0; m < M; ++m) { for (size_t c = 0; c < C; ++c) { for (size_t l = 0; l < R; ++l) {',
        ' \t \tdtype_t grad_w = 0.0f;',
        ' \t \tfor (size_t x = 0; x < E; ++x) { long h2 = (long)x * S - PAD + l; if (h2 >= 0 && h2 < H) { grad_w += I[h2 + c * H] * dO[x + m * E]; } }',
        ' \t \tdW[l + c * R + m * C * R] += grad_w;',
        ' \t}}}',
        ' \tfor (size_t m = 0; m < M; ++m) { dtype_t grad_b = 0.0f; for (size_t x = 0; x < E; ++x) { grad_b += dO[x + m * E]; } dB[m] += grad_b; }',
        ' \tfor (size_t i = 0; i < C * H; ++i) { dI[i] = 0.0f; }',
        ' \tfor (size_t m = 0; m < M; ++m) { for (size_t c = 0; c < C; ++c) { for (size_t l = 0; l < R; ++l) {',
        ' \t \tfor (size_t x = 0; x < E; ++x) { long h2 = (long)x * S - PAD + l; if (h2 >= 0 && h2 < H) { dI[h2 + c * H] += W[l + c * R + m * C * R] * dO[x + m * E]; } }',
        ' \t}}}',
        '}\n',

        '// --- MAXPOOL LAYER ---',
        'void maxpool_1d_fwd_with_indices(const dtype_t* I, dtype_t* O, int* max_indices, size_t M, size_t E_in, size_t E_out, size_t K, size_t S) {',
        ' \tfor (size_t m = 0; m < M; ++m) {',
        ' \t \tfor (size_t x_out = 0; x_out < E_out; ++x_out) {',
        ' \t \t \tdtype_t max_val = -3.4028235E+38f; // FLT_MIN',
        ' \t \t \tint max_idx = -1;',
        ' \t \t \tfor (size_t k = 0; k < K; ++k) {',
        ' \t \t \t \tsize_t h_in = x_out * S + k;',
        ' \t \t \t \tif (I[h_in + m * E_in] > max_val) {',
        ' \t \t \t \t \tmax_val = I[h_in + m * E_in];',
        ' \t \t \t \t \tmax_idx = h_in + m * E_in;',
        ' \t \t \t \t}',
        ' \t \t \t}',
        ' \t \t \tO[x_out + m * E_out] = max_val;',
        ' \t \t \tmax_indices[x_out + m * E_out] = max_idx;',
        ' \t \t}',
        ' \t}',
        '}',
        'void maxpool_1d_bwd(const dtype_t* dO, const int* max_indices, dtype_t* dI, size_t in_fmap_size, size_t out_fmap_size) {',
        ' \tfor(size_t i = 0; i < in_fmap_size; ++i) { dI[i] = 0.0f; }',
        ' \tfor(size_t i = 0; i < out_fmap_size; ++i) {',
        ' \t \tint index = max_indices[i];',
        ' \t \tif(index != -1) { dI[index] += dO[i]; }',
        ' \t}',
        '}\n',
    ])

    # --- 2. DYNAMICALLY BUILD cnn_fwd WRAPPER ---
    fwd_proto = "void cnn_fwd(const dtype_t I[CONV_FLAT_SIZE]"
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        fwd_proto += f",\n                const dtype_t W{i}[M{i}*C{i}*R{i}{W_size}], const dtype_t B{i}[M{i}], dtype_t O{i}[L{i}_FMAP_SIZE]"
        if params.get(f'L{i}_POOL_ON', False): fwd_proto += f", dtype_t O{i}_pool[L{i}_POOL_FMAP_SIZE]"
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        fwd_proto += f",\n                const dtype_t W_fc{i}[FC{i}_IN_SIZE*FC{i}_OUT_SIZE], const dtype_t B_fc{i}[FC{i}_OUT_SIZE], dtype_t O_fc{i}[FC{i}_OUT_SIZE]"
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False): fwd_proto += f",\n                int max_indices{i}[L{i}_POOL_FMAP_SIZE]"
    fwd_proto += ") {"
    code.append(fwd_proto)

    for i in range(1, num_layers + 1):
        I_name = "I" if i == 1 else (f"O{i-1}_pool" if params.get(f'L{i-1}_POOL_ON', False) else f"O{i-1}")
        code.append(f'\tconv_1d_fwd(H{i}, C{i}, R{i}, M{i}, S{i}, PAD{i}, (const dtype_t*){I_name}, W{i}, B{i}, O{i});')
        code.append(f'\trelu_fwd(L{i}_FMAP_SIZE, O{i});')
        if params.get(f'L{i}_POOL_ON', False):
            code.append(f'\tmaxpool_1d_fwd_with_indices(O{i}, O{i}_pool, max_indices{i}, M{i}, E{i}, L{i}_POOL_E, L{i}_POOL_K, L{i}_POOL_S);')

    last_conv_stage_out = f"O{num_layers}_pool" if params.get(f'L{num_layers}_POOL_ON', False) else f"O{num_layers}"
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        input_name = last_conv_stage_out if i == 1 else f"O_fc{i-1}"
        code.append(f'\tfc_layer_fwd({input_name}, W_fc{i}, B_fc{i}, O_fc{i}, FC{i}_IN_SIZE, FC{i}_OUT_SIZE);')
        if i < params['NUM_FC_LAYERS']:
            code.append(f'\trelu_fwd(FC{i}_OUT_SIZE, O_fc{i});')
    code.append('}\n')

    # --- 3. DYNAMICALLY BUILD cnn_bwd WRAPPER ---
    bwd_proto = "void cnn_bwd(const dtype_t I[CONV_FLAT_SIZE], const size_t LABEL[1]"
    for i in range(1, num_layers + 1):
        bwd_proto += f",\n             const dtype_t O{i}[L{i}_FMAP_SIZE]"
        if params.get(f'L{i}_POOL_ON', False): bwd_proto += f", const dtype_t O{i}_pool[L{i}_POOL_FMAP_SIZE]"
    for i in range(1, params['NUM_FC_LAYERS'] + 1): bwd_proto += f",\n             const dtype_t O_fc{i}[FC{i}_OUT_SIZE]"
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if GLOBAL_DIM == 2 else ""
        bwd_proto += f",\n             const dtype_t W{i}[M{i}*C{i}*R{i}{W_size}], dtype_t dW{i}[M{i}*C{i}*R{i}{W_size}], dtype_t dB{i}[M{i}]"
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        bwd_proto += f",\n             const dtype_t W_fc{i}[FC{i}_IN_SIZE*FC{i}_OUT_SIZE], dtype_t dW_fc{i}[FC{i}_IN_SIZE*FC{i}_OUT_SIZE], dtype_t dB_fc{i}[FC{i}_OUT_SIZE]"
    bwd_proto += f",\n             dtype_t dI_fc1[CONV_FLAT_SIZE], dtype_t dI1[C1*H1*(GLOBAL_DIM==2?H1:1)]"
    for i in range(num_layers, 1, -1): bwd_proto += f", dtype_t dI{i}[L{i-1}_FMAP_SIZE]"
    for i in range(params['NUM_FC_LAYERS'], 1, -1): bwd_proto += f", dtype_t dI_fc{i}[FC{i-1}_OUT_SIZE]"
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False): bwd_proto += f",\n             const int max_indices{i}[L{i}_POOL_FMAP_SIZE]"
    bwd_proto += ") {"
    code.append(bwd_proto)

    last_fc_out = f"O_fc{params['NUM_FC_LAYERS']}"
    code.append(f'\tdtype_t dO_raw[N_CLASSES];')
    code.append(f'\tcross_entropy_softmax_bwd(N_CLASSES, {last_fc_out}, LABEL, dO_raw);')

    last_conv_stage_out = f"O{num_layers}_pool" if params.get(f'L{num_layers}_POOL_ON', False) else f"O{num_layers}"
    for i in range(params['NUM_FC_LAYERS'], 0, -1):
        dO_name = "dO_raw" if i == params['NUM_FC_LAYERS'] else f"dI_fc{i+1}"
        dI_name = f"dI_fc{i}" if i > 1 else "dI_fc1"
        O_prev_name = f"O_fc{i-1}" if i > 1 else last_conv_stage_out

        if i < params['NUM_FC_LAYERS']:
            code.append(f'\tdtype_t d_relu_out_fc{i}[FC{i}_OUT_SIZE];')
            code.append(f'\trelu_bwd(FC{i}_OUT_SIZE, O_fc{i}, {dO_name}, d_relu_out_fc{i});')
            code.append(f'\tfc_layer_bwd({O_prev_name}, W_fc{i}, d_relu_out_fc{i}, {dI_name}, dW_fc{i}, dB_fc{i}, FC{i}_IN_SIZE, FC{i}_OUT_SIZE);')
        else:
            code.append(f'\tfc_layer_bwd({O_prev_name}, W_fc{i}, {dO_name}, {dI_name}, dW_fc{i}, dB_fc{i}, FC{i}_IN_SIZE, FC{i}_OUT_SIZE);')

    last_conv_grad_in = 'dI_fc1'
    for i in range(num_layers, 0, -1):
        grad_in = last_conv_grad_in
        if params.get(f'L{i}_POOL_ON', False):
            code.append(f'\tdtype_t dI{i}_pool[L{i}_FMAP_SIZE];')
            code.append(f'\tmaxpool_1d_bwd({grad_in}, max_indices{i}, dI{i}_pool, L{i}_FMAP_SIZE, L{i}_POOL_FMAP_SIZE);')
            grad_in = f'dI{i}_pool'

        code.append(f'\tdtype_t dI{i}_relu[L{i}_FMAP_SIZE];')
        code.append(f'\trelu_bwd(L{i}_FMAP_SIZE, O{i}, {grad_in}, dI{i}_relu);')

        I_name = "(const dtype_t*)I" if i == 1 else (f"O{i-1}_pool" if params.get(f'L{i-1}_POOL_ON', False) else f"O{i-1}")
        dI_name = f"dI{i}" if i > 1 else "dI1"
        code.append(f'\tconv_1d_bwd(H{i}, C{i}, R{i}, M{i}, S{i}, PAD{i}, {I_name}, dI{i}_relu, W{i}, {dI_name}, dW{i}, dB{i});')
        last_conv_grad_in = dI_name

    code.append('}')
    return '\n'.join(code)


def generate_adam_update_function():
    """Generates the C++ function for the Adam optimizer."""
    code = [
        '// --- ADAM OPTIMIZER FUNCTION ---',
        '// Implements the Adam weight update rule.',
        'void adam_update(dtype_t* W, const dtype_t* dW, dtype_t* m, dtype_t* v, size_t size,',
        '                 dtype_t lr, dtype_t beta1, dtype_t beta2, dtype_t epsilon, size_t t) {',
        '    // t is the 1-based timestep for bias correction',
        '    dtype_t beta1_t = powf(beta1, t);',
        '    dtype_t beta2_t = powf(beta2, t);',
        '',
        '    for (size_t i = 0; i < size; ++i) {',
        '        // Update biased first and second moment estimates',
        '        m[i] = beta1 * m[i] + (1.0f - beta1) * dW[i];',
        '        v[i] = beta2 * v[i] + (1.0f - beta2) * (dW[i] * dW[i]);',
        '',
        '        // Compute bias-corrected moment estimates',
        '        dtype_t m_hat = m[i] / (1.0f - beta1_t);',
        '        dtype_t v_hat = v[i] / (1.0f - beta2_t);',
        '',
        '        // Update weights',
        '        W[i] -= lr * m_hat / (sqrtf(v_hat) + epsilon);',
        '    }',
        '}\n'
    ]
    return '\n'.join(code)


def generate_backprop_main_code(num_layers, params):
    """
    Generates the complete main training loop (backprop_main.cpp) for text-based
    .dat files, correctly handling the full window size.
    """
    N_CLASSES = params['N_CLASSES']

    # This calculation is the critical fix: C1 * H1 (e.g., 3 * 10 = 30)
    INPUT_W = params['C1'] * params['H1']

    padded_input_size = f"C1 * H1" + (" * H1" if params['GLOBAL_DIM'] == 2 else "")

    # Inject all C++ helper function code
    load_data_code = '\n'.join(generate_load_data_function(N_CLASSES, INPUT_W))
    save_weights_code = generate_save_weights_function()
    vector_norm_code = generate_vector_norm_function()
    normalization_code = generate_normalization_function()
    adam_update_code = generate_adam_update_function()

    code = [
        '#include "backprop.h"',
        '#include <stdlib.h>', '#include <time.h>', '#include <stdio.h>',
        '#include <string.h>', '#include <iostream>', '#include <vector>',
        '#include <algorithm>', '#include <random>', '#include <numeric>',
        'using namespace std;\n',
        load_data_code,
        save_weights_code,
        vector_norm_code,
        normalization_code,
        adam_update_code,
        'int main() {',
        ' \tsrand(time(0));',
        ' \tconst dtype_t LEARNING_RATE = 0.0001f;',
        ' \tconst dtype_t ADAM_BETA1 = 0.9f;',
        ' \tconst dtype_t ADAM_BETA2 = 0.999f;',
        ' \tconst dtype_t ADAM_EPSILON = 1e-8f;',
        ' \tconst size_t NUM_EPOCHS = 5;',
        ' \tconst size_t BATCH_SIZE = 32;',
        ' \tconst std::string TRAIN_DATA_FILE = "train_windowed.dat";',
        ' \tconst std::string TEST_DATA_FILE = "test_windowed.dat";\n',
        ' \t// --- Data Loading ---',
        ' \tstd::vector<dtype_t> X_train_vec, Y_train_vec_one_hot, X_test_vec, Y_test_vec_one_hot;',
        ' \tstd::vector<size_t> Y_train_vec_raw, Y_test_vec_raw;',
        f' \tlong N_TRAIN_SAMPLES = load_data(TRAIN_DATA_FILE, X_train_vec, Y_train_vec_one_hot, Y_train_vec_raw, {N_CLASSES}, {INPUT_W});',
        f' \tlong N_TEST_SAMPLES = load_data(TEST_DATA_FILE, X_test_vec, Y_test_vec_one_hot, Y_test_vec_raw, {N_CLASSES}, {INPUT_W});',
        ' \tif (N_TRAIN_SAMPLES == 0) { cerr << "FATAL ERROR: No training data loaded." << endl; return 1; }\n',
        ' \t// --- Correct Data Normalization ---',
        ' \tdouble train_mean, train_std_dev;',
        ' \tcalculate_stats(X_train_vec, train_mean, train_std_dev);',
        ' \tapply_normalization(X_train_vec, train_mean, train_std_dev);',
        ' \tcout << "Applying same stats to test data..." << std::endl;',
        ' \tapply_normalization(X_test_vec, train_mean, train_std_dev);\n',
        ' \t// --- Get Raw Data Pointers ---',
        ' \tdtype_t* in_data_train = X_train_vec.data();',
        ' \tsize_t* out_data_train_raw = Y_train_vec_raw.data();',
        ' \tdtype_t* in_data_test = X_test_vec.data();',
        ' \tsize_t* out_data_test_raw = Y_test_vec_raw.data();',
    ]

    # --- 1. MEMORY ALLOCATION ---
    code.append('\n \t// --- Allocate Memory for CONV Layers ---')
    for i in range(1, num_layers + 1):
        W_size = f" * R{i}" if params['GLOBAL_DIM'] == 2 else ""
        size_str = f"M{i} * C{i} * R{i}{W_size}"
        code.append(f' \tdtype_t W{i}[{size_str}]; dtype_t B{i}[M{i}];')
        code.append(f' \tdtype_t dW{i}[{size_str}]; dtype_t dB{i}[M{i}];')
        code.append(f' \tdtype_t m_W{i}[{size_str}]; dtype_t v_W{i}[{size_str}];')
        code.append(f' \tdtype_t m_B{i}[M{i}]; dtype_t v_B{i}[M{i}];')
        code.append(f' \tdtype_t O{i}[L{i}_FMAP_SIZE];')
        if params.get(f'L{i}_POOL_ON', False):
            code.append(f' \tdtype_t O{i}_pool[L{i}_POOL_FMAP_SIZE];')

    code.append('\n \t// --- Allocate Memory for FC Layers ---')
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        fc_w_size = params[f'FC{i}_IN'] * params[f'FC{i}_OUT']
        fc_b_size = params[f'FC{i}_OUT']
        code.append(f' \tdtype_t W_fc{i}[{fc_w_size}]; dtype_t B_fc{i}[{fc_b_size}];')
        code.append(f' \tdtype_t dW_fc{i}[{fc_w_size}]; dtype_t dB_fc{i}[{fc_b_size}];')
        code.append(f' \tdtype_t m_W_fc{i}[{fc_w_size}]; dtype_t v_W_fc{i}[{fc_w_size}];')
        code.append(f' \tdtype_t m_B_fc{i}[{fc_b_size}]; dtype_t v_B_fc{i}[{fc_b_size}];')
        code.append(f' \tdtype_t O_fc{i}[{fc_b_size}];')

    code.append(f'\n \t// --- Allocate Memory for Input & Intermediate Gradients ---')
    code.append(f' \tdtype_t padded_input[{padded_input_size}];')
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        if i > 1: code.append(f' \tdtype_t dI_fc{i}[{params[f"FC{i-1}_OUT"]}];')
    code.append(f' \tdtype_t dI_fc1[{params["CONV_FLAT_SIZE"]}];')
    code.append(f' \tdtype_t dI1[C1*H1*(GLOBAL_DIM==2?H1:1)];')
    for i in range(num_layers, 1, -1):
        code.append(f' \tdtype_t dI{i}[L{i-1}_FMAP_SIZE];')

    code.append('\n \t// --- Allocate Memory for MaxPool Indices ---')
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False):
            code.append(f' \tint max_indices{i}[L{i}_POOL_FMAP_SIZE];')

    # --- 2. WEIGHT INITIALIZATION ---
    code.append('\n \tcout << "Initializing weights using He initialization..." << endl;')
    for i in range(1, num_layers + 1):
        fan_in = params[f'C{i}'] * params[f'R{i}'] * (params[f'R{i}'] if params['GLOBAL_DIM'] == 2 else 1)
        w_bound = f"sqrt(6.0f / {fan_in})"
        W_size_str = f"M{i}*C{i}*R{i}" + (f" * R{i}" if params['GLOBAL_DIM'] == 2 else "")
        code.append(f' \tfloat w_bound_{i} = {w_bound};')
        code.append(f' \tfor (size_t j = 0; j < {W_size_str}; ++j) W{i}[j] = ((float)rand()/(float)RAND_MAX * 2.0f - 1.0f) * w_bound_{i};')
        code.append(f' \tfor (size_t j = 0; j < M{i}; ++j) B{i}[j] = 0.0f;')

    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        fan_in = params[f'FC{i}_IN']
        w_bound = f"sqrt(6.0f / {fan_in})"
        fc_w_size = params[f'FC{i}_IN'] * params[f'FC{i}_OUT']
        fc_b_size = params[f'FC{i}_OUT']
        code.append(f' \tfloat w_bound_fc{i} = {w_bound};')
        code.append(f' \tfor (size_t j = 0; j < {fc_w_size}; ++j) W_fc{i}[j] = ((float)rand()/(float)RAND_MAX * 2.0f - 1.0f) * w_bound_fc{i};')
        code.append(f' \tfor (size_t j = 0; j < {fc_b_size}; ++j) B_fc{i}[j] = 0.0f;')

    # --- 3. ADAM STATE INITIALIZATION ---
    code.append('\n \tcout << "Initializing Adam optimizer state..." << endl;')
    for i in range(1, num_layers + 1):
        code.append(f' \tmemset(m_W{i}, 0, sizeof(m_W{i})); memset(v_W{i}, 0, sizeof(v_W{i}));')
        code.append(f' \tmemset(m_B{i}, 0, sizeof(m_B{i})); memset(v_B{i}, 0, sizeof(v_B{i}));')
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        code.append(f' \tmemset(m_W_fc{i}, 0, sizeof(m_W_fc{i})); memset(v_W_fc{i}, 0, sizeof(v_W_fc{i}));')
        code.append(f' \tmemset(m_B_fc{i}, 0, sizeof(m_B_fc{i})); memset(v_B_fc{i}, 0, sizeof(v_B_fc{i}));')

    # --- 4. TRAINING LOOP ---
    code.extend([
        '\n \tcout << "Starting training for " << NUM_EPOCHS << " epochs..." << endl;',
        ' \tstd::vector<size_t> indices(N_TRAIN_SAMPLES); std::iota(indices.begin(), indices.end(), 0);',
        ' \tstd::mt19937 g(rand());',
        ' \tsize_t t = 0;\n',
        ' \tfor (size_t epoch = 0; epoch < NUM_EPOCHS; ++epoch) {',
        ' \t \tstd::shuffle(indices.begin(), indices.end(), g);',
        ' \t \tlong total_batches = (N_TRAIN_SAMPLES + BATCH_SIZE - 1) / BATCH_SIZE; int batch_count = 0;',
        ' \t \tfor (long i = 0; i < N_TRAIN_SAMPLES; i += BATCH_SIZE) {',
        ' \t \t \tsize_t current_batch_size = (i + BATCH_SIZE <= N_TRAIN_SAMPLES) ? BATCH_SIZE : (N_TRAIN_SAMPLES - i);'
    ])

    for i in range(1, num_layers + 1): code.append(f' \t \t \tmemset(dW{i}, 0, sizeof(dW{i})); memset(dB{i}, 0, sizeof(dB{i}));')
    for i in range(1, params['NUM_FC_LAYERS'] + 1): code.append(f' \t \t \tmemset(dW_fc{i}, 0, sizeof(dW_fc{i})); memset(dB_fc{i}, 0, sizeof(dB_fc{i}));')

    code.extend([
        '\n \t \t \tfor (size_t sample_idx = 0; sample_idx < current_batch_size; ++sample_idx) {',
        ' \t \t \t \tsize_t shuffled_index = indices[i + sample_idx];',
        f' \t \t \t \tdtype_t* current_sample_features = in_data_train + shuffled_index * {INPUT_W};',
        ' \t \t \t \tsize_t* current_sample_LABEL = out_data_train_raw + shuffled_index;',
        ' \t \t \t \tmemset(padded_input, 0, sizeof(padded_input));',
        f' \t \t \t \tmemcpy(padded_input, current_sample_features, {INPUT_W} * sizeof(dtype_t));'
    ])

    fwd_call = ' \t \t \t \tcnn_fwd(padded_input'
    for i in range(1, num_layers + 1):
        fwd_call += f', W{i}, B{i}, O{i}'
        if params.get(f'L{i}_POOL_ON', False): fwd_call += f', O{i}_pool'
    for i in range(1, params['NUM_FC_LAYERS'] + 1): fwd_call += f', W_fc{i}, B_fc{i}, O_fc{i}'
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False): fwd_call += f', max_indices{i}'
    fwd_call += ');'
    code.append(fwd_call)

    bwd_call = ' \t \t \t \tcnn_bwd(padded_input, current_sample_LABEL'
    for i in range(1, num_layers + 1):
        bwd_call += f', O{i}'
        if params.get(f'L{i}_POOL_ON', False): bwd_call += f', O{i}_pool'
    for i in range(1, params['NUM_FC_LAYERS'] + 1): bwd_call += f', O_fc{i}'
    for i in range(1, num_layers + 1): bwd_call += f', W{i}, dW{i}, dB{i}'
    for i in range(1, params['NUM_FC_LAYERS'] + 1): bwd_call += f', W_fc{i}, dW_fc{i}, dB_fc{i}'
    bwd_call += f', dI_fc1, dI1'
    for i in range(num_layers, 1, -1): bwd_call += f', dI{i}'
    for i in range(params['NUM_FC_LAYERS'], 1, -1): bwd_call += f', dI_fc{i}'
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False): bwd_call += f', max_indices{i}'
    bwd_call += ');'
    code.append(bwd_call)

    code.append(' \t \t \t}')

    code.append('\n \t \t \tt++;')
    for i in range(params['NUM_FC_LAYERS'], 0, -1):
        fc_w_size = params[f'FC{i}_IN'] * params[f'FC{i}_OUT']
        fc_b_size = params[f'FC{i}_OUT']
        code.append(f' \t \t \tadam_update(W_fc{i}, dW_fc{i}, m_W_fc{i}, v_W_fc{i}, {fc_w_size}, LEARNING_RATE, ADAM_BETA1, ADAM_BETA2, ADAM_EPSILON, t);')
        code.append(f' \t \t \tadam_update(B_fc{i}, dB_fc{i}, m_B_fc{i}, v_B_fc{i}, {fc_b_size}, LEARNING_RATE, ADAM_BETA1, ADAM_BETA2, ADAM_EPSILON, t);')
    for i in range(num_layers, 0, -1):
        W_size_str = f"M{i}*C{i}*R{i}" + (f" * R{i}" if params['GLOBAL_DIM'] == 2 else "")
        code.append(f' \t \t \tadam_update(W{i}, dW{i}, m_W{i}, v_W{i}, {W_size_str}, LEARNING_RATE, ADAM_BETA1, ADAM_BETA2, ADAM_EPSILON, t);')
        code.append(f' \t \t \tadam_update(B{i}, dB{i}, m_B{i}, v_B{i}, M{i}, LEARNING_RATE, ADAM_BETA1, ADAM_BETA2, ADAM_EPSILON, t);')

    code.append(' \t \t \tbatch_count++; if (batch_count % 50 == 0) { printf("Epoch %zu, Batch %d / %ld...\\r", epoch + 1, batch_count, total_batches); fflush(stdout); }')
    code.append(' \t \t} // End of batch loop')

    # --- 5. EVALUATION ---
    code.append('\n \t \t// --- EVALUATION on Test Set ---')
    code.append(' \t \tif (N_TEST_SAMPLES > 0) {')
    code.append(' \t \t \tint correct_predictions = 0;')
    code.append(' \t \t \tfor (long j = 0; j < N_TEST_SAMPLES; ++j) {')
    code.append(f' \t \t \t \tdtype_t* current_test_features = in_data_test + j * {INPUT_W};')
    code.append(' \t \t \t \tsize_t true_label = out_data_test_raw[j];')
    code.append(f' \t \t \t \tmemset(padded_input, 0, sizeof(padded_input));')
    code.append(f' \t \t \t \tmemcpy(padded_input, current_test_features, {INPUT_W} * sizeof(dtype_t));')

    eval_fwd_call = ' \t \t \t \tcnn_fwd(padded_input'
    for i in range(1, num_layers + 1):
        eval_fwd_call += f', W{i}, B{i}, O{i}'
        if params.get(f'L{i}_POOL_ON', False): eval_fwd_call += f', O{i}_pool'
    for i in range(1, params['NUM_FC_LAYERS'] + 1): eval_fwd_call += f', W_fc{i}, B_fc{i}, O_fc{i}'
    for i in range(1, num_layers + 1):
        if params.get(f'L{i}_POOL_ON', False): eval_fwd_call += f', max_indices{i}'
    eval_fwd_call += ');'
    code.append(eval_fwd_call)

    last_fc_out_name = f'O_fc{params["NUM_FC_LAYERS"]}'
    code.append(f' \t \t \t \tint predicted_class = 0; dtype_t max_score = {last_fc_out_name}[0];')
    code.append(f' \t \t \t \tfor (size_t k = 1; k < N_CLASSES; ++k) {{ if ({last_fc_out_name}[k] > max_score) {{ max_score = {last_fc_out_name}[k]; predicted_class = k; }} }}')
    code.append(' \t \t \t \tif (predicted_class == true_label) { correct_predictions++; }')
    code.append(' \t \t \t}')
    code.append(' \t \t \tfloat test_accuracy = (float)correct_predictions / N_TEST_SAMPLES * 100.0f;')

    last_fc_w_size = params[f'FC{params["NUM_FC_LAYERS"]}_IN'] * params[f'FC{params["NUM_FC_LAYERS"]}_OUT']
    last_fc_w = f'W_fc{params["NUM_FC_LAYERS"]}'
    last_fc_dw = f'dW_fc{params["NUM_FC_LAYERS"]}'

    code.append(f' \t \t \tdtype_t grad_norm = calculate_l2_norm({last_fc_dw}, {last_fc_w_size});')
    code.append(f' \t \t \tdtype_t weight_norm = calculate_l2_norm({last_fc_w}, {last_fc_w_size});')
    code.append(' \t \t \tprintf("\\nEpoch %zu | Test Accuracy: %.2f%% | Grad Norm: %e | Weight Norm: %.4f\\n", epoch + 1, test_accuracy, grad_norm, weight_norm);')
    code.append(' \t \t}')

    code.append(' \t} // End of epoch loop')

    # --- 6. SAVE FINAL WEIGHTS ---
    code.append('\n \tcout << "\\nTraining finished." << endl;')
    code.append(generate_calibration_code(num_layers, params))
    code.append(' \tcout << "Saving final model weights..." << endl;')
    for i in range(1, num_layers + 1):
        W_size_str = f"M{i} * C{i} * R{i}" + (f" * R{i}" if params['GLOBAL_DIM'] == 2 else "")
        code.append(f' \tsave_weights("W{i}.bin", W{i}, {W_size_str});')
        code.append(f' \tsave_weights("B{i}.bin", B{i}, M{i});')
    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        fc_w_size = params[f'FC{i}_IN'] * params[f'FC{i}_OUT']
        fc_b_size = params[f'FC{i}_OUT']
        code.append(f' \tsave_weights("W_fc{i}.bin", W_fc{i}, {fc_w_size});')
        code.append(f' \tsave_weights("B_fc{i}.bin", B_fc{i}, {fc_b_size});')

    code.append('\n \treturn 0;\n}')
    return '\n'.join(code)


# --- 3. Parameter Calculation and GUI Setup (Unchanged) ---

def calculate_output_size(H_in, R, S, P):
    """Calculates output size E based on HLS-style padding/stride. We enforce E = ceil(H_in / S)"""
    return math.ceil(H_in / S)


def generate_parameter_widgets(num_conv_layers):
    """
    Generates the complete set of parameter input widgets, including controls
    for CONV, optional MaxPool, and multiple FC layers.
    """
    # --- 1. Global Parameters ---

     # ADD THIS WIDGET
    data_type_widget = widgets.Dropdown(
        options=['float', 'fixed'],
        value='float',
        description='HLS Data Type:',
        style={'description_width': 'initial'}
    )

    n_classes_widget = widgets.IntText(value=6, description='N_CLASSES:', min=1, style={'description_width': 'initial'})
    global_dim_widget = widgets.Dropdown(options=[(1, 1), (2, 2)], value=1, description='GLOBAL DIMENSION:', style={'description_width': 'initial'})
    bias_on_widget = widgets.IntText(value=1, description='Bias ON (1=Yes):', min=1, max=1, style={'description_width': 'initial'}, disabled=True)

    # --- 2. Input Feature Map (Layer 1 Input) ---
    c1_widget = widgets.IntText(value=3, description='Input C1 (Channels):', min=1, style={'description_width': 'initial'})
    h1_widget = widgets.IntText(value=10, description='Input H1 (Size):', min=1, style={'description_width': 'initial'})

    widgets_list = [
        widgets.HTML(value="<h3>Global Configuration:</h3>"),
        data_type_widget,
        global_dim_widget,
        widgets.HBox([n_classes_widget, bias_on_widget]),
        widgets.HTML(value="<h3>Input Feature Map (Layer 1 Input):</h3>"),
        c1_widget,
        h1_widget,
        widgets.HTML(value=f"<h3>Convolution Layers (1 to {num_conv_layers}):</h3>")
    ]

    # --- 3. Convolutional and MaxPool Layers ---
    layer_widgets = {}
    for i in range(1, num_conv_layers + 1):
        m_value = 16 if i == 1 else 32
        r_value = 3
        s_value = 1

        m_i = widgets.IntText(value=m_value, description=f'L{i} M (Ch Out):', min=1, style={'description_width': 'initial'})
        r_i = widgets.IntText(value=r_value, description=f'L{i} R (Kernel):', min=1, style={'description_width': 'initial'})
        s_i = widgets.IntText(value=s_value, description=f'L{i} S (Stride):', min=1, style={'description_width': 'initial'})

        pool_checkbox = widgets.Checkbox(value=(i==1), description='Add MaxPool', indent=False)
        pool_k = widgets.IntText(value=2, description='Pool Kernel:', style={'description_width': 'initial'}, disabled=(not pool_checkbox.value))
        pool_s = widgets.IntText(value=2, description='Pool Stride:', style={'description_width': 'initial'}, disabled=(not pool_checkbox.value))

        # --- THIS IS THE FIX ---
        # Create a callback function to handle the logic
        def create_pool_toggle_callback(k, s):
            def toggle_pool_widgets(change):
                is_enabled = change['new']
                k.disabled = not is_enabled
                s.disabled = not is_enabled
            return toggle_pool_widgets

        # Observe the checkbox's value and link it to the callback
        pool_checkbox.observe(create_pool_toggle_callback(pool_k, pool_s), names='value')
        # --- END FIX ---

        layer_widgets[i] = (m_i, r_i, s_i, pool_checkbox, pool_k, pool_s)

        widgets_list.append(widgets.VBox([
            widgets.HTML(value=f"<h4>Conv Layer {i}</h4>"),
            widgets.HBox([m_i, r_i, s_i]),
            widgets.HBox([pool_checkbox, pool_k, pool_s], layout=widgets.Layout(margin='0 0 0 20px'))
        ]))

    # --- 4. Fully Connected Layers ---
    # ... (This section is unchanged and correct) ...
    widgets_list.append(widgets.HTML(value="<h3>Fully Connected (Dense) Layers:</h3>"))
    num_fc_layers_widget = widgets.IntSlider(value=2, min=1, max=4, step=1, description='# FC Layers:', style={'description_width': 'initial'})
    widgets_list.append(num_fc_layers_widget)
    fc_layer_widgets = {}
    fc_widgets_vbox = widgets.VBox()
    def update_fc_widgets(change):
        num_fc = change['new']
        new_fc_widgets_list = []
        fc_layer_widgets.clear()
        for i in range(1, num_fc + 1):
            is_last_layer = (i == num_fc)
            default_size = 64 if not is_last_layer else n_classes_widget.value
            desc = f'FC Layer {i} Out Size:'
            if is_last_layer: desc = f'FC Layer {i} (Final Output):'
            size_widget = widgets.IntText(value=default_size, description=desc, style={'description_width': 'initial'}, disabled=is_last_layer)
            fc_layer_widgets[i] = size_widget
            new_fc_widgets_list.append(size_widget)
        fc_widgets_vbox.children = tuple(new_fc_widgets_list)
    num_fc_layers_widget.observe(update_fc_widgets, names='value')
    def update_last_fc_size(change):
        num_fc = num_fc_layers_widget.value
        if num_fc > 0 and num_fc in fc_layer_widgets: fc_layer_widgets[num_fc].value = change['new']
    n_classes_widget.observe(update_last_fc_size, names='value')
    update_fc_widgets({'new': num_fc_layers_widget.value})
    widgets_list.append(fc_widgets_vbox)

    # --- 5. Final Assembly and Return ---
    params_vbox = widgets.VBox(widgets_list)
    return (params_vbox, layer_widgets, c1_widget, h1_widget, n_classes_widget, global_dim_widget, num_fc_layers_widget, fc_layer_widgets, data_type_widget)


def collect_and_calculate_params(num_conv_layers, layer_widgets, c1_widget, h1_widget, n_classes_widget, global_dim_widget, num_fc_layers_widget, fc_layer_widgets, data_type_widget):
    """
    Collects all user inputs and calculates the dependent parameters for the entire
    CNN architecture, including CONV, optional MaxPool, and multiple FC layers.
    """
    params = {}

    # Read the value from the dropdown at the very beginning
    params['DATA_TYPE'] = data_type_widget.value

    # --- 1. Collect Global and Input Parameters ---
    params['N_CLASSES'] = n_classes_widget.value
    params['GLOBAL_DIM'] = global_dim_widget.value
    GLOBAL_DIM = params['GLOBAL_DIM']

    if params['N_CLASSES'] < 1:
        raise ValueError("N_CLASSES must be 1 or greater.")

    params['C1'] = c1_widget.value
    params['H1'] = h1_widget.value

    # Initialize tracking variables for chaining layers
    H_prev = params['H1']
    M_prev = params['C1']

    # --- 2. Loop Through CONV and MaxPool Layers ---
    for i in range(1, num_conv_layers + 1):
        # Unpack all 6 widgets for the current layer
        m_i, r_i, s_i, pool_cb, pool_k, pool_s = layer_widgets[i]

        # Input dimensions for this CONV layer are the output of the previous stage
        params[f'C{i}'] = M_prev
        params[f'H{i}'] = H_prev

        # Calculate CONV layer parameters
        params[f'M{i}'] = m_i.value
        params[f'R{i}'] = r_i.value
        params[f'S{i}'] = s_i.value
        E_conv = calculate_output_size(params[f'H{i}'], params[f'R{i}'], params[f'S{i}'], 0)
        params[f'E{i}'] = E_conv

        # Calculate necessary padding to achieve the output size
        F_i = (E_conv * params[f'S{i}'] + params[f'R{i}'] - 1)
        params[f'F{i}'] = F_i
        params[f'PAD{i}'] = (F_i - params[f'H{i}']) // 2

        # The output of the CONV stage becomes the default input for the next stage
        H_prev = E_conv

        # Calculate optional MaxPool layer parameters
        if pool_cb.value:
            params[f'L{i}_POOL_ON'] = True
            params[f'L{i}_POOL_K'] = pool_k.value
            params[f'L{i}_POOL_S'] = pool_s.value

            # Calculate the output size of the MaxPool layer
            # Using math.floor for standard pooling calculation
            E_pool = math.floor((E_conv - pool_k.value) / pool_s.value) + 1
            if E_pool < 1:
                raise ValueError(f"Layer {i} MaxPool resulted in a non-positive output size ({E_pool}). Please decrease kernel size or increase input size.")
            params[f'L{i}_POOL_E'] = E_pool

            # CRUCIAL: The output of the pool now becomes the input for the next layer
            H_prev = E_pool
        else:
            params[f'L{i}_POOL_ON'] = False

        # The output channels of this CONV layer are the input channels for the next
        M_prev = params[f'M{i}']

        # Validation
        if params[f'R{i}'] > params[f'H{i}'] and params[f'H{i}'] > 1:
            raise ValueError(f"Conv Layer {i}: Kernel size R{i} ({params[f'R{i}']}) must be <= input size H{i} ({params[f'H{i}']}).")
        if GLOBAL_DIM == 2 and params[f'R{i}'] % 2 == 0:
            raise ValueError(f"Conv Layer {i}: 2D kernel R{i} must be odd for symmetric padding.")

    # --- 3. Calculate Flattened Size ---
    # The size is based on the output of the final CONV or POOL stage
    conv_flat_size = M_prev * H_prev * (H_prev if GLOBAL_DIM == 2 else 1)
    params['CONV_FLAT_SIZE'] = conv_flat_size

    # --- 4. Loop Through FC Layers ---
    params['NUM_FC_LAYERS'] = num_fc_layers_widget.value
    current_input_size = conv_flat_size

    for i in range(1, params['NUM_FC_LAYERS'] + 1):
        params[f'FC{i}_IN'] = current_input_size
        output_size = fc_layer_widgets[i].value
        params[f'FC{i}_OUT'] = output_size
        current_input_size = output_size

    # --- 5. Final Validation ---
    if params[f"FC{params['NUM_FC_LAYERS']}_OUT"] != params['N_CLASSES']:
        raise ValueError("Mismatch! The output size of the last FC layer must be equal to N_CLASSES.")

    return params


# --- 4. GUI and Execution ---
# This part needs to be updated to capture the new widget handles from the generator

def on_button_click(b):
    """
    Code generation main logic. This function is triggered when the user clicks
    the 'Generate All Code' button. It orchestrates the entire process from
    parameter collection to file generation and user feedback.
    """
    with output_area:
        clear_output()

        num_conv_layers = layer_slider.value

        try:
            # 1. Collect and Calculate All Network Parameters
            print("INFO: Collecting and calculating all network parameters...")
            params = collect_and_calculate_params(
            num_conv_layers, layer_widgets_map, c1_input, h1_input,
            n_classes_input, global_dim_input,
            num_fc_layers_input, fc_layer_widgets_map,
            data_type_input
            )

            GLOBAL_DIM = params['GLOBAL_DIM']
            NUM_FC_LAYERS = params['NUM_FC_LAYERS']
            print("INFO: Parameter calculation successful.")

            # 2. Setup Folder Structure
            root_folder = f'./generatedCNN_{num_conv_layers}Conv_{NUM_FC_LAYERS}FC_{GLOBAL_DIM}D'
            source_folder = os.path.join(root_folder, 'src')
            bin_folder = os.path.join(root_folder, 'bin')
            backprop_folder = os.path.join(root_folder, 'backprop')

            os.makedirs(source_folder, exist_ok=True)
            os.makedirs(bin_folder, exist_ok=True)
            os.makedirs(backprop_folder, exist_ok=True)
            print(f"INFO: Created project directory: {root_folder}")

            # 3. Generate HLS Inference Files (SRC Folder)
            print("INFO: Generating HLS inference files...")
            with open(os.path.join(source_folder, f'Makefile'), 'w') as f: f.write(generate_makefile_code(num_conv_layers))
            with open(os.path.join(source_folder, f'conv_tb{num_conv_layers}.cpp'), 'w') as f: f.write(generate_testbench_code(num_conv_layers, params))
            with open(os.path.join(source_folder, f'conv_tb{num_conv_layers}.h'), 'w') as f: f.write(generate_testbench_header_code(num_conv_layers))
            with open(os.path.join(source_folder, f'conv{num_conv_layers}.h'), 'w') as f: f.write(generate_convh_code(num_conv_layers, params))
            with open(os.path.join(source_folder, f'conv{num_conv_layers}.cpp'), 'w') as f: f.write(generate_conv_code(num_conv_layers, params))
            with open(os.path.join(source_folder, 'run_hls.tcl'), 'w') as f: f.write(generate_vitis_tcl_script(num_conv_layers, params))
            print("INFO: HLS inference files generated successfully.")

            # 4. Generate Backpropagation Training Files (BACKPROP Folder)
            print("INFO: Generating x86 backpropagation training files...")
            with open(os.path.join(backprop_folder, 'Makefile'), 'w') as f: f.write(generate_backprop_makefile_code())
            with open(os.path.join(backprop_folder, 'backprop.h'), 'w') as f: f.write(generate_backprop_header_code(num_conv_layers, params))
            with open(os.path.join(backprop_folder, 'backprop_main.cpp'), 'w') as f: f.write(generate_backprop_main_code(num_conv_layers, params))
            with open(os.path.join(backprop_folder, 'backprop.cpp'), 'w') as f: f.write(generate_backprop_cpp_code(num_conv_layers, params))
            print("INFO: Backpropagation files generated successfully.")

            # 5. Display Success and Summary to User
            display(Markdown(f'## ✅ Success! All files generated.'))
            display(Markdown(f'HLS Inference code in `{source_folder}`. **x86 Training code in `{backprop_folder}`.**'))

            param_summary = [f"| Layer | Type | D (Dim) | In Shape | Out Shape | Details / Activation |"]
            param_summary.append("|---|---|---|---|---|---|")

            H1_display = f"{params['H1']}x{params['H1']}" if GLOBAL_DIM == 2 else f"{params['H1']}"
            param_summary.append(f"| Input | Image | **{GLOBAL_DIM}** | - | {params['C1']} x {H1_display} | None |")

            for j in range(1, num_conv_layers + 1):
                H_display = f"{params[f'H{j}']}x{params[f'H{j}']}" if GLOBAL_DIM == 2 else f"{params[f'H{j}']}"
                E_display = f"{params[f'E{j}']}x{params[f'E{j}']}" if GLOBAL_DIM == 2 else f"{params[f'E{j}']}"
                in_shape = f"{params[f'C{j}']} x {H_display}"
                out_shape = f"{params[f'M{j}']} x {E_display}"
                param_summary.append(f"| Conv {j} | CONV | **{GLOBAL_DIM}** | {in_shape} | {out_shape} | **ReLU** |")

                if params.get(f'L{j}_POOL_ON', False):
                    pool_k = params[f'L{j}_POOL_K']
                    pool_s = params[f'L{j}_POOL_S']
                    in_shape_pool = out_shape
                    out_shape_pool = f"{params[f'M{j}']} x {params[f'L{j}_POOL_E']}"
                    param_summary.append(f"| | MaxPool | **{GLOBAL_DIM}** | {in_shape_pool} | {out_shape_pool} | K={pool_k}, S={pool_s} |")

            param_summary.append(f"| | Flatten | - | - | {params['CONV_FLAT_SIZE']} | None |")

            for j in range(1, NUM_FC_LAYERS + 1):
                activation = "ReLU" if j < NUM_FC_LAYERS else "Softmax"
                in_size = params[f'FC{j}_IN']
                out_size = params[f'FC{j}_OUT']
                param_summary.append(f"| FC {j} | Dense | - | {in_size} | {out_size} | **{activation}** |")

            display(Markdown('### Calculated CNN Architecture\n' + '\n'.join(param_summary)))

            display(Markdown('### Next Steps: Execution Instructions'))
            display(Markdown(
                f"**To train the model on your CPU:**\n"
                f"1. Open a new terminal.\n"
                f"2. Navigate to the training directory: `cd {backprop_folder}`\n"
                f"3. Compile the trainer: `make`\n"
                f"4. Run the trainer: `./cnn_trainer`\n"
                f"   *(This creates the `.bin` weight files needed for HLS)*\n\n"
                f"**To synthesize the hardware with Vitis HLS:**\n"
                f"1. Ensure the `.bin` files and `test.dat` are in the `{source_folder}` directory.\n"
                f"2. Navigate to the HLS source directory: `cd {source_folder}`\n"
                f"3. Launch Vitis HLS with the script: `vitis_hls -f run_hls.tcl`"
            ))

        except ValueError as e:
            display(Markdown(f'## ❌ Validation Error\nAn invalid parameter was detected. Please correct the input.\n\n**Details:** {e}'))
        except Exception as e:
            display(Markdown(f'## ❌ An Unexpected Error Occurred\n\n**Details:** {e}'))


layer_slider = widgets.IntSlider(value=1, min=1, max=5, step=1, description='CNN Layers:', continuous_update=False, style={'description_width': 'initial'})
generate_button = widgets.Button(description='Generate All Code', button_style='success')
output_area = widgets.Output()
code_controls_vbox = widgets.VBox()

# Global handles for our widgets
param_vbox, layer_widgets_map, c1_input, h1_input, n_classes_input, global_dim_input, num_fc_layers_input, fc_layer_widgets_map, data_type_input = [None] * 9

def setup_ui(num_layers):
    # Add data_type_input to the global statement
    global param_vbox, layer_widgets_map, c1_input, h1_input, n_classes_input, global_dim_input, num_fc_layers_input, fc_layer_widgets_map, data_type_input

    (param_vbox, layer_widgets_map, c1_input, h1_input,
     n_classes_input, global_dim_input,
     num_fc_layers_input, fc_layer_widgets_map,
     data_type_input) = generate_parameter_widgets(num_layers)

    code_controls_vbox.children = (param_vbox,)

def update_widgets_on_slider_change(change):
    setup_ui(change.new)
    with output_area:
        clear_output()
        display(Markdown(f"Parameters updated for **{change.new} conv layers**. Configure and click 'Generate'."))

# Initial setup and event handling
setup_ui(layer_slider.value)
layer_slider.observe(update_widgets_on_slider_change, names='value')
generate_button.on_click(on_button_click)

# Display the final UI
display(layer_slider, code_controls_vbox, generate_button, output_area)



IntSlider(value=1, continuous_update=False, description='CNN Layers:', max=5, min=1, style=SliderStyle(descrip…

VBox(children=(VBox(children=(HTML(value='<h3>Global Configuration:</h3>'), Dropdown(description='HLS Data Typ…

Button(button_style='success', description='Generate All Code', style=ButtonStyle())

Output()