# Calcualte performance rankings from cleaned results

In [1]:
# read cleaned results
import pandas as pd
from pathlib import Path
import pdb, os, sys
root_dir = os.path.dirname(os.getcwd())
sys.path.append(root_dir)

agg_df_with_default = pd.read_csv(Path("./cleaned_results/tuned_aggregated_results_with_default.csv"))
agg_df = pd.read_csv(Path("./cleaned_results/tuned_aggregated_results.csv"))

fold_df_with_default = pd.read_csv(Path("./cleaned_results/tuned_fold_results_with_default.csv"))
fold_df = pd.read_csv(Path("./cleaned_results/tuned_fold_results.csv"))

# make sure output folder exists
output_folder = Path("./performance_rankings")
output_folder.mkdir(exist_ok=True)  

In [2]:
import numpy as np

def get_rank_table(df, metric, table_idx=4):
    """
    NOTE: the df needs to have the following columns defined:
    - {metric}_rank_mean
    - normalized_{metric}__test_mean
    """
    if table_idx == 1:
        from analysis.table1 import DATASETS
    elif table_idx == 2:
        from analysis.table2 import DATASETS
    elif table_idx == 4:
        from analysis.table4 import DATASETS
        
    df = df[df["dataset_name"].isin(DATASETS)]

    overall_ranks = df.groupby("alg_name").agg(
        {
            f"{metric}_rank_mean": ["min", "max", "mean", "median", "count"],
            f"normalized_{metric}__test_mean": ["mean", "median"],
            f"normalized_{metric}__test_std": ["mean", "median"],
            f"train_per_1000_inst_mean_{metric}": ["mean", "median"],        
        }
    ).reset_index().sort_values([(f"{metric}_rank_mean", "mean")])

    # format min/max rank columns to be ints

    overall_ranks.loc[:, (f"{metric}_rank_mean", "count")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "count")].astype(int)

    # overall_ranks.loc[:, "alg_name"] = overall_ranks.loc[:, "alg_name"].apply(lambda x: "\rot{" + x + "}")
    overall_ranks.loc[:, (f"{metric}_rank_mean", "min")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "min")].apply(lambda x: "{:d}".format(int(x)))
    overall_ranks.loc[:, (f"{metric}_rank_mean", "max")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "max")].apply(lambda x: "{:d}".format(int(x)))

    # mean/median mean-rank
    overall_ranks.loc[:, (f"{metric}_rank_mean", "mean")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "mean")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"{metric}_rank_mean", "median")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "median")].apply(lambda x: "{:d}".format(int(x)) if int(x) == x else "{:.1f}".format(x))
    
    # normalized metric - mean and std over folds
    overall_ranks.loc[:, (f"normalized_{metric}__test_mean", "mean")] = overall_ranks.loc[:,(f"normalized_{metric}__test_mean", "mean")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"normalized_{metric}__test_mean", "median")] = overall_ranks.loc[:,(f"normalized_{metric}__test_mean", "median")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"normalized_{metric}__test_std", "mean")] = overall_ranks.loc[:,(f"normalized_{metric}__test_std", "mean")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"normalized_{metric}__test_std", "median")] = overall_ranks.loc[:,(f"normalized_{metric}__test_std", "median")].apply(lambda x: "{:.2f}".format(x))


    # normalized runtime
    overall_ranks.loc[:, (f"train_per_1000_inst_mean_{metric}", "mean")] = overall_ranks.loc[:,(f"train_per_1000_inst_mean_{metric}", "mean")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"train_per_1000_inst_mean_{metric}", "median")] = overall_ranks.loc[:,(f"train_per_1000_inst_mean_{metric}", "median")].apply(lambda x: "{:.2f}".format(x))
   


    final_table = overall_ranks.set_index("alg_name")

    return final_table


### Save rank tables to file

In [7]:
# save rank tables to csv and latex
from analysis_utils import ALG_DISPLAY_NAMES
table_idx = 4

metric_list = [
    "Accuracy",
    "F1",
    "Log Loss",
    "AUC",
]

final_tables = {}

# best, worst, and average performance for each alg, over all datasets
for metric in metric_list:

    agg_df_with_default

    # first with default hparams as its own alg
    final_tables[metric] = get_rank_table(agg_df_with_default, metric, table_idx = table_idx)

    # save to csv, latex
    final_tables[metric].to_csv(output_folder / f"{metric}_rank_with_default.csv", index=True)
    final_tables[metric].to_latex(output_folder / f"{metric}_rank_with_default.tex", index=True, escape=False)

    # now without default hparams as its own alg
    final_tables[metric] = get_rank_table(agg_df, metric, table_idx=table_idx)

    # save to csv, latex
    final_tables[metric].to_csv(output_folder / f"{metric}_rank.csv", index=True)
    final_tables[metric].to_latex(output_folder / f"{metric}_rank.tex", index=True, escape=False)

 '1' '1' '1' '7' '1' '1' '1' '3' '3' '1' '9' '6' '5' '2' '2' '5' '12' '6'
 '7' '4' '8' '6' '7' '5' '4' '8' '9' '8' '15' '16' '31']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  overall_ranks.loc[:, (f"{metric}_rank_mean", "min")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "min")].apply(lambda x: "{:d}".format(int(x)))
 '44' '41' '41' '42' '48' '48' '42' '36' '46' '46' '41' '41' '49' '40'
 '46' '41' '43' '41' '41' '46' '42' '43' '45' '46' '47' '49' '47' '48'
 '48' '49' '46' '49' '47' '49' '49']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  overall_ranks.loc[:, (f"{metric}_rank_mean", "max")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "max")].apply(lambda x: "{:d}".format(int(x)))
 '15.20' '15.48' '15.97' '16.23' '16.91' '17.00' '17.33' '17.33' '19.37'
 '20.00' '20.00' '21.28' '21.29' '22.44' '22.44' '22.55' '22.61' '22.91'
 '22.93' '23.29' '24.54' '25.11' '25.75' '25.75' '26.31' '26.44' '

In [18]:
final_tables['AUC']

Unnamed: 0_level_0,AUC_rank_mean,AUC_rank_mean,AUC_rank_mean,AUC_rank_mean,AUC_rank_mean,normalized_AUC__test_mean,normalized_AUC__test_mean,normalized_AUC__test_std,normalized_AUC__test_std,train_per_1000_inst_mean_AUC,train_per_1000_inst_mean_AUC
Unnamed: 0_level_1,min,max,mean,median,count,mean,median,mean,median,mean,median
alg_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
XGBoost,1,19,4.58,2.0,36,0.94,0.98,0.08,0.05,1.82,0.29
CatBoost,1,18,5.31,4.0,35,0.93,0.96,0.09,0.06,51.02,1.47
ResNet,1,16,7.29,6.0,35,0.87,0.91,0.09,0.05,7.91,5.38
LightGBM,1,19,7.5,5.5,32,0.92,0.95,0.11,0.06,1.27,0.45
SAINT,1,23,8.19,7.0,27,0.8,0.91,0.11,0.07,122.95,67.8
RandomForest,1,21,8.23,7.0,35,0.86,0.88,0.09,0.06,0.47,0.3
Ours,1,20,9.47,10.0,36,0.76,0.87,0.08,0.05,0.0,0.0
DANet,2,21,10.56,10.0,27,0.86,0.9,0.11,0.08,60.75,54.44
FTTransformer,1,20,10.59,11.0,29,0.79,0.86,0.11,0.08,18.68,13.54
TabPFN,1,25,10.61,10.0,36,0.64,0.82,0.11,0.07,0.0,0.0


In [16]:
final_tables['AUC'][final_tables['AUC']['AUC_rank_mean']['count'] == 36]

Unnamed: 0_level_0,AUC_rank_mean,AUC_rank_mean,AUC_rank_mean,AUC_rank_mean,AUC_rank_mean,normalized_AUC__test_mean,normalized_AUC__test_mean,normalized_AUC__test_std,normalized_AUC__test_std,train_per_1000_inst_mean_AUC,train_per_1000_inst_mean_AUC
Unnamed: 0_level_1,min,max,mean,median,count,mean,median,mean,median,mean,median
alg_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
XGBoost,1,19,4.58,2.0,36,0.94,0.98,0.08,0.05,1.82,0.29
Ours,1,20,9.47,10.0,36,0.76,0.87,0.08,0.05,0.0,0.0
TabPFN,1,25,10.61,10.0,36,0.64,0.82,0.11,0.07,0.0,0.0
MLP-rtdl,1,22,10.64,10.5,36,0.78,0.84,0.09,0.06,6.84,4.29
TabFast,2,24,12.31,12.5,36,0.65,0.78,0.1,0.06,0.0,0.0
TabFlex,6,21,14.17,14.0,36,0.62,0.74,0.09,0.09,0.0,0.0
MLP,4,23,14.47,14.0,36,0.67,0.69,0.11,0.06,8.84,5.42
