In [None]:
import pandas as pd
from scipy.stats import wilcoxon

df = pd.read_csv("./../../dataset/reliability_pairs_64.csv")

df = df.sort_values(by=["Model", "Dataset"]).reset_index(drop=True)

global_stat, global_p = wilcoxon(df["Current_Reliability"], df["Proposed_Reliability"], alternative="two-sided")

print("========== GLOBAL WSR TEST ==========")
print(f"W-Statistic : {global_stat:.4f}")
print(f"p-Value     : {global_p:.12f}")
if global_p < 0.05:
    print("✅ Significant difference between Current and Proposed reliability approaches (p < 0.05)")
else:
    print("❌ No significant difference between Current and Proposed reliability approaches (p ≥ 0.05)")
print("====================================\n")

print("========== MODEL-WISE WSR TESTS ==========")
results = []
models = df["Model"].unique()

for model in models:
    model_df = df[df["Model"] == model]
    stat, p = wilcoxon(model_df["Current_Reliability"], model_df["Proposed_Reliability"], alternative="two-sided")
    results.append((model, stat, p))
    signif = "✅ Significant" if p < 0.05 else "❌ Not Significant"
    print(f"{model:<18} | W = {stat:.4f} | p = {p:.6f} | {signif}")

W-Statistic : 204.0000
p-Value     : 0.000000022609
✅ Significant difference between Current and Proposed reliability approaches (p < 0.05)

Adaboost           | W = 4.0000 | p = 0.054688 | ❌ Not Significant
CatBoost           | W = 4.0000 | p = 0.054688 | ❌ Not Significant
Extra Trees        | W = 1.0000 | p = 0.015625 | ✅ Significant
Gradient Boosting  | W = 4.0000 | p = 0.054688 | ❌ Not Significant
LightGBM           | W = 3.0000 | p = 0.039062 | ✅ Significant
MLP                | W = 8.0000 | p = 0.195312 | ❌ Not Significant
Random Forest      | W = 4.0000 | p = 0.054688 | ❌ Not Significant
XGBoost            | W = 1.0000 | p = 0.015625 | ✅ Significant
