In [14]:
# 09_fusion_layer.ipynb

import pandas as pd
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score



In [15]:
# CORRECTED paths
xgb_path = "/Users/tvishakhanna/MBFT_LITE_FL/results/xgboost"
tinybert_path = "/Users/tvishakhanna/MBFT_LITE_FL/fl_tinybert/fl_tinybert/results"
 

clients = ["client_1", "client_2", "client_3", "client_4"]

fusion_results_dir = "../results/fusion"
os.makedirs(fusion_results_dir, exist_ok=True)

fusion_summary = {}


In [16]:
for client in clients:
    print(f"\nðŸŽ¯ Sweeping alpha values for {client}...")

    # FIXED: use accurate full path
    tinybert_file = os.path.join(tinybert_path, client, "predictions.csv")
    xgb_file = os.path.join(xgb_path, f"{client}_xgb_predictions.csv")

    df_tinybert = pd.read_csv(tinybert_file)
    df_xgb = pd.read_csv(xgb_file)

    # Extract from consistent column names
    y_true = df_tinybert["Actual"].values
    tinybert_probs = df_tinybert["Probability"].values
    xgb_probs = df_xgb["Probability"].values

    best_acc = 0
    best_alpha = None
    alpha_range = np.arange(0.0, 1.01, 0.05)
    alpha_accs = []

    for alpha in alpha_range:
        fused = alpha * tinybert_probs + (1 - alpha) * xgb_probs
        fused_pred = (fused >= 0.5).astype(int)
        acc = accuracy_score(y_true, fused_pred)
        alpha_accs.append(acc)

        if acc > best_acc:
            best_acc = acc
            best_alpha = alpha

    # Store result
    fusion_summary[client] = {"best_alpha": best_alpha, "accuracy": best_acc}

    # Plot accuracy vs alpha
    plt.figure(figsize=(8, 4))
    plt.plot(alpha_range, alpha_accs, marker='o')
    plt.title(f"Fusion Accuracy vs Alpha - {client}")
    plt.xlabel("Alpha (TinyBERT weight)")
    plt.ylabel("Accuracy")
    plt.grid(True)
    plt.tight_layout()
    plot_path = os.path.join(fusion_results_dir, f"{client}_fusion_plot.png")
    plt.savefig(plot_path)
    plt.close()
    print(f"âœ… Saved accuracy plot to: {plot_path}")




ðŸŽ¯ Sweeping alpha values for client_1...
âœ… Saved accuracy plot to: ../results/fusion/client_1_fusion_plot.png

ðŸŽ¯ Sweeping alpha values for client_2...
âœ… Saved accuracy plot to: ../results/fusion/client_2_fusion_plot.png

ðŸŽ¯ Sweeping alpha values for client_3...
âœ… Saved accuracy plot to: ../results/fusion/client_3_fusion_plot.png

ðŸŽ¯ Sweeping alpha values for client_4...
âœ… Saved accuracy plot to: ../results/fusion/client_4_fusion_plot.png


In [18]:
summary_df = pd.DataFrame.from_dict(fusion_summary, orient='index')
summary_df.index.name = "Client"
summary_df.reset_index(inplace=True)

summary_file = os.path.join(fusion_results_dir, "fusion_summary.csv")
summary_df.to_csv(summary_file, index=False)

print("\nðŸ“ˆ Fusion Summary")
print(summary_df)
print(f"âœ… Saved summary CSV to: {summary_file}")



ðŸ“ˆ Fusion Summary
     Client  best_alpha  accuracy
0  client_1         0.5  0.998000
1  client_2         0.5  1.000000
2  client_3         0.5  1.000000
3  client_4         0.5  0.998547
âœ… Saved summary CSV to: ../results/fusion/fusion_summary.csv
