In [1]:
import pandas as pd
from itertools import combinations
from scipy.stats import ttest_rel

# File paths for each model's results
file_paths = {
    "Linear Regression": "Linear Regression Baseline.csv",
    "Neural Network": "NN.csv",
    "Random Forest": "RandomForest.csv",
    "Support Vector Regression": "SVR.csv",
    "SVM with Tuning": "SVM with Tunning.csv",
    "Voting Regressor": "Voting Regressor EL.csv",
    "Voting Regressor with Tuning": "Voting Regressor EL with Tunning.csv",
    "XGBoost": "XGBR.csv"
}

# Read MAE data for each model
mae_data = {}
for model, path in file_paths.items():
    df = pd.read_csv(path)
    print(path)
    mae_data[model] = df["Average MAE"].values

# Perform paired t-tests between all model pairs
results = []
model_names = list(mae_data.keys())
for model_a, model_b in combinations(model_names, 2):
    stat, pval = ttest_rel(mae_data[model_a], mae_data[model_b])
    significance = "Yes" if pval < 0.05 else "No"
    results.append({
        "Model A": model_a,
        "Model B": model_b,
        "T-Statistic": stat,
        "P-Value": pval,
        "Significant": significance
    })

# Convert to DataFrame
ttest_results_df = pd.DataFrame(results)

print(ttest_results_df)

Linear Regression Baseline.csv
NN.csv
RandomForest.csv
SVR.csv
SVM with Tunning.csv
Voting Regressor EL.csv
Voting Regressor EL with Tunning.csv
XGBR.csv
                         Model A                       Model B  T-Statistic  \
0              Linear Regression                Neural Network     2.405673   
1              Linear Regression                 Random Forest     2.504993   
2              Linear Regression     Support Vector Regression    -1.998717   
3              Linear Regression               SVM with Tuning    -1.831752   
4              Linear Regression              Voting Regressor     2.483518   
5              Linear Regression  Voting Regressor with Tuning     2.456247   
6              Linear Regression                       XGBoost     2.476711   
7                 Neural Network                 Random Forest     2.423361   
8                 Neural Network     Support Vector Regression    -2.164899   
9                 Neural Network               SVM with 