In [10]:

import pandas as pd
import numpy as np
import os
import json
from sklearn.metrics import recall_score

# Paths
TINY_METRICS_PATH = "../fl_tinybert/results"
TINY_PRED_PATH = "../fl_tinybert/fl_tinybert/results"
XGB_PATH = "../results/xgboost"
FUSION_PATH = "../results/fusion/fusion_summary.csv"

clients = ["client_1", "client_2", "client_3", "client_4"]

records = []

for client in clients:
    print(f"\n📥 Processing {client}...")

    # Load TinyBERT accuracy from metrics.json
    tiny_metrics_path = os.path.join(TINY_METRICS_PATH, client, "metrics.json")
    with open(tiny_metrics_path, "r") as f:
        tiny_metrics = json.load(f)
        tiny_acc = tiny_metrics.get("eval_accuracy", 0)

    # Load predictions for TinyBERT
    tiny_preds = pd.read_csv(os.path.join(TINY_PRED_PATH, client, "predictions.csv"))
    tiny_phish_recall = recall_score(tiny_preds["true"], tiny_preds["pred"]) if 1 in tiny_preds["true"].unique() else 0

    # Load XGBoost predictions
    xgb_preds = pd.read_csv(os.path.join(XGB_PATH, f"{client}_xgb_predictions.csv"))
    xgb_acc = (xgb_preds["Actual"] == xgb_preds["Predicted"]).mean()
    xgb_phish_recall = recall_score(xgb_preds["Actual"], xgb_preds["Predicted"]) if 1 in xgb_preds["Actual"].unique() else 0

    # Load Fusion accuracy
    fusion_df = pd.read_csv(FUSION_PATH)
    fusion_acc = fusion_df[fusion_df["Client"] == client]["accuracy"].values[0]

    # Estimate Fusion Recall
    fusion_probs = 0.5 * tiny_preds["Probability"].values + 0.5 * xgb_preds["Probability"].values
    fusion_pred = (fusion_probs >= 0.5).astype(int)
    fusion_recall = recall_score(tiny_preds["true"], fusion_pred) if 1 in tiny_preds["true"].unique() else 0

    # Fix count (both wrong, fusion right)
    both_wrong = (tiny_preds["pred"] != tiny_preds["true"]) & (xgb_preds["Predicted"] != xgb_preds["Actual"])
    fusion_correct = fusion_pred == tiny_preds["true"]
    fixed = np.sum(both_wrong & fusion_correct)

    # Confidence diff
    conf_diff = np.mean(tiny_preds["Probability"] - xgb_preds["Probability"])

    records.append({
        "Client": client,
        "TinyBERT Acc": round(tiny_acc, 4),
        "XGB Acc": round(xgb_acc, 4),
        "Fusion Acc": round(fusion_acc, 4),
        "Phish Recall Tiny": round(tiny_phish_recall, 4),
        "Phish Recall XGB": round(xgb_phish_recall, 4),
        "Phish Recall Fusion": round(fusion_recall, 4),
        "Fusion Fixed X Samples": fixed,
        "Confidence Gap": round(conf_diff, 4)
    })

# Create final summary table
summary_df = pd.DataFrame(records)
summary_df.set_index("Client", inplace=True)

# Display or save
import ace_tools as tools
tools.display_dataframe_to_user(name="Final Evaluation Table with Phish Recall", dataframe=summary_df)



📥 Processing client_1...


KeyError: 'true'