In [10]:
import pandas as pd
import numpy as np

from pathlib import Path
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

In [11]:
# === Load all model summary CSVs === #
project = Path.cwd().parent
current_folder = project / "3 - evaluation"


paths = {
    "Random Forest (Base Model)": current_folder / "random_forest_base_model_summary.csv",
    "Random Forest (Tuned Model)": current_folder / "random_forest_tuned_model_summary.csv",
    "Logistic Regression": current_folder / "logistic_regression_summary.csv",
    "Decision Tree (Base Model)": current_folder / "decision_tree_base_model_summary.csv",
    "Decision Tree (Tuned Model)": current_folder / "decision_tree_tuned_model_summary.csv",
    "Neural Network": current_folder / "neural_network_summary.csv",
    "XGBoost (Base Model)": current_folder / "xgboost_base_model_summary.csv",
    "XGBoost (Tuned Model)": current_folder / "xgboost_tuned_model_summary.csv",
    "Support Vector Machine (Base Model)": current_folder / "support_vector_machine_base_model_summary.csv",
    "Support Vector Machine (Tuned Model)": current_folder / "support_vector_machine_tuned_model_summary.csv"
}

dfs = []
for model_name, path in paths.items():
    df = pd.read_csv(path)
    df["Model"] = model_name
    dfs.append(df)

all_results = pd.concat(dfs, ignore_index=True)
print("✅ Loaded model summaries:", all_results["Model"].unique())

all_results.head()

✅ Loaded model summaries: ['Random Forest (Base Model)' 'Random Forest (Tuned Model)'
 'Logistic Regression' 'Decision Tree (Base Model)'
 'Decision Tree (Tuned Model)' 'Neural Network' 'XGBoost (Base Model)'
 'XGBoost (Tuned Model)' 'Support Vector Machine (Base Model)'
 'Support Vector Machine (Tuned Model)']


Unnamed: 0,Time Taken (mins),Complexity (1-5),Frequency,Tool Used,Department,Error Rate (%),Rule-Based Indicator,Process Stability,Data Structure,Actual,Predicted,Feasibility %,Model
0,0.380295,2,Quarterly,Trello,Customer Service,-1.058714,Yes,Medium,Semi-Structured,Yes,Yes,86.53,Random Forest (Base Model)
1,-0.652825,2,Ad-Hoc,Slack,Procurement,-1.529068,Yes,Low,Unstructured,No,No,39.33,Random Forest (Base Model)
2,0.896855,4,Monthly,Jira,Human Resource,-0.552179,No,Medium,Semi-Structured,Yes,Yes,72.73,Random Forest (Base Model)
3,-1.418758,3,Quarterly,SAP,Compliance,-0.552179,Yes,Low,Unstructured,No,No,21.03,Random Forest (Base Model)
4,-0.243139,2,Ad-Hoc,Google Sheets,IT,0.460891,No,Medium,Structured,No,No,15.98,Random Forest (Base Model)


In [12]:
def compute_basic_metrics(df_model):
    df_model = df_model.dropna(subset=["Actual", "Predicted", "Feasibility %"])

    y_true = df_model["Actual"].astype(str)
    y_pred = df_model["Predicted"].astype(str)

    y_true_bin = (y_true == "Yes").astype(int)
    y_proba_yes = np.clip(df_model["Feasibility %"].astype(float) / 100.0, 0, 1)

    acc = accuracy_score(y_true, y_pred)
    report = classification_report(y_true, y_pred, output_dict=True, zero_division=0)

    auc = np.nan
    if y_true_bin.nunique() == 2:
        auc = roc_auc_score(y_true_bin, y_proba_yes)

    yes_prec = report.get("Yes", {}).get("precision", np.nan)
    no_prec = report.get("No", {}).get("precision", np.nan)
    yes_rec  = report.get("Yes", {}).get("recall", np.nan)
    no_rec  = report.get("No", {}).get("recall", np.nan)
    yes_f1   = report.get("macro avg", {}).get("f1-score", np.nan)

    return {
        "Accuracy": acc,
        "ROC AUC": auc,
        "Precision (No)": no_prec,
        "Precision (Yes)": yes_prec,
        "Recall (No)": no_rec,
        "Recall (Yes)": yes_rec,
        "F1 Score (Macro Average)": yes_f1,
    }

def error_examples(df_model, n=1):
    fp = df_model[(df_model["Actual"] == "No") & (df_model["Predicted"] == "Yes")] \
            .sort_values("Feasibility %", ascending=False).head(n)
    fn = df_model[(df_model["Actual"] == "Yes") & (df_model["Predicted"] == "No")] \
            .sort_values("Feasibility %", ascending=True).head(n)
    return fp, fn

def confidence_summary(df_model):
    df = df_model.copy()
    df["Feasibility %"] = df["Feasibility %"].astype(float)

    tp = df[(df["Actual"] == "Yes") & (df["Predicted"] == "Yes")]["Feasibility %"]
    fp = df[(df["Actual"] == "No") & (df["Predicted"] == "Yes")]["Feasibility %"]
    tn = df[(df["Actual"] == "No") & (df["Predicted"] == "No")]["Feasibility %"]
    fn = df[(df["Actual"] == "Yes") & (df["Predicted"] == "No")]["Feasibility %"]

    return {
        "TP Avg Feasibility %": tp.mean() if len(tp) else np.nan,
        "FP Avg Feasibility %": fp.mean() if len(fp) else np.nan,
        "TN Avg Feasibility %": tn.mean() if len(tn) else np.nan,
        "FN Avg Feasibility %": fn.mean() if len(fn) else np.nan,
        "TP Count": len(tp),
        "FP Count": len(fp),
        "TN Count": len(tn),
        "FN Count": len(fn)
    }

In [13]:
# === Build Model Comparison table === #
models = all_results["Model"].unique()

rows = []
for m in models:
    df_m = all_results[all_results["Model"] == m]
    rows.append({"Model": m, **compute_basic_metrics(df_m)})

comparison_df = pd.DataFrame(rows)

# Sort by Accuracy (desc)
comparison_df = comparison_df.sort_values("Accuracy", ascending=False).reset_index(drop=True)

comparison_df

Unnamed: 0,Model,Accuracy,ROC AUC,Precision (No),Precision (Yes),Recall (No),Recall (Yes),F1 Score (Macro Average)
0,XGBoost (Tuned Model),0.98,0.994364,1.0,0.958333,0.962963,1.0,0.979928
1,Random Forest (Tuned Model),0.97,0.994364,1.0,0.938776,0.944444,1.0,0.969925
2,Random Forest (Base Model),0.96,0.992351,0.980769,0.9375,0.944444,0.978261,0.959855
3,XGBoost (Base Model),0.95,0.990338,0.980392,0.918367,0.925926,0.978261,0.949875
4,Logistic Regression,0.92,0.968196,1.0,0.851852,0.851852,1.0,0.92
5,Support Vector Machine (Base Model),0.92,0.964976,1.0,0.851852,0.851852,1.0,0.92
6,Support Vector Machine (Tuned Model),0.92,0.964976,1.0,0.851852,0.851852,1.0,0.92
7,Decision Tree (Tuned Model),0.9,0.972625,0.958333,0.846154,0.851852,0.956522,0.89996
8,Decision Tree (Base Model),0.88,0.947665,0.92,0.84,0.851852,0.913043,0.879808
9,Neural Network,0.82,0.921095,0.833333,0.804348,0.833333,0.804348,0.818841


In [14]:
# === Build Confidence table === #
conf_rows = []
for m in models:
    df_m = all_results[all_results["Model"] == m]
    conf_rows.append({"Model": m, **confidence_summary(df_m)})

confidence_df = pd.DataFrame(conf_rows).sort_values("TP Avg Feasibility %", ascending=False).reset_index(drop=True)
confidence_df

Unnamed: 0,Model,TP Avg Feasibility %,FP Avg Feasibility %,TN Avg Feasibility %,FN Avg Feasibility %,TP Count,FP Count,TN Count,FN Count
0,Decision Tree (Base Model),94.835952,79.1075,0.869565,25.4175,42,8,46,4
1,XGBoost (Tuned Model),94.549348,83.615,4.253654,,46,2,52,0
2,Decision Tree (Tuned Model),91.488409,71.65875,0.936522,32.375,44,8,46,2
3,Logistic Regression,82.223913,70.11625,12.023913,,46,8,46,0
4,XGBoost (Base Model),80.319556,63.4875,19.8028,33.55,45,4,50,1
5,Support Vector Machine (Base Model),80.148913,67.585,9.928261,,46,8,46,0
6,Support Vector Machine (Tuned Model),80.148913,67.585,9.928261,,46,8,46,0
7,Random Forest (Tuned Model),80.019348,64.23,18.414706,,46,3,51,0
8,Random Forest (Base Model),78.758222,62.456667,21.137451,49.65,45,3,51,1
9,Neural Network,58.105946,53.121111,37.756,46.853333,37,9,45,9


In [15]:
# === Show examples for the best model === #
best_model = comparison_df.iloc[0]["Model"]
print("✅ Best Model:", best_model)

df_best = all_results[all_results["Model"] == best_model]

fp_examples, fn_examples = error_examples(df_best, n=1)

print("\n--- False Positives (Pred Yes, Actual No) ---")
fp_examples

✅ Best Model: XGBoost (Tuned Model)

--- False Positives (Pred Yes, Actual No) ---


Unnamed: 0,Time Taken (mins),Complexity (1-5),Frequency,Tool Used,Department,Error Rate (%),Rule-Based Indicator,Process Stability,Data Structure,Actual,Predicted,Feasibility %,Model
742,-1.276259,1,Weekly,Jira,Procurement,0.750339,Yes,Medium,Semi-Structured,No,Yes,92.38,XGBoost (Tuned Model)


In [16]:
# === Save results to CSV (Comparison & Confidence table) === #
comparison_df.to_csv(current_folder / "model_comparison_table.csv", index=False)
confidence_df.to_csv(current_folder / "model_confidence_table.csv", index=False)

print("✅ Saved:")
print("3 - evaluation/model_comparison_table.csv")
print("3 - evaluation/model_confidence_table.csv")

✅ Saved:
3 - evaluation/model_comparison_table.csv
3 - evaluation/model_confidence_table.csv
