# Calcualte performance rankings from cleaned results

In [1]:
# read cleaned results
import pandas as pd
from pathlib import Path
import pdb, os, sys
root_dir = os.path.dirname(os.getcwd())
sys.path.append(root_dir)

agg_df_with_default = pd.read_csv(Path("./cleaned_results/tuned_aggregated_results_with_default.csv"))
agg_df = pd.read_csv(Path("./cleaned_results/tuned_aggregated_results.csv"))

fold_df_with_default = pd.read_csv(Path("./cleaned_results/tuned_fold_results_with_default.csv"))
fold_df = pd.read_csv(Path("./cleaned_results/tuned_fold_results.csv"))

# make sure output folder exists
output_folder = Path("./performance_rankings")
output_folder.mkdir(exist_ok=True)  

In [2]:
import numpy as np

def get_rank_table(df, metric, table_idx=4):
    """
    NOTE: the df needs to have the following columns defined:
    - {metric}_rank_mean
    - normalized_{metric}__test_mean
    """
    if table_idx == 1:
        from analysis.table1 import DATASETS
    elif table_idx == 2:
        from analysis.table2 import DATASETS
    elif table_idx == 4:
        from analysis.table4 import DATASETS
        
    df = df[df["dataset_name"].isin(DATASETS)]

    overall_ranks = df.groupby("alg_name").agg(
        {
            f"{metric}_rank_mean": ["min", "max", "mean", "median", "count"],
            f"normalized_{metric}__test_mean": ["mean", "median"],
            f"normalized_{metric}__test_std": ["mean", "median"],
            f"train_per_1000_inst_mean_{metric}": ["mean", "median"],        
        }
    ).reset_index().sort_values([(f"{metric}_rank_mean", "mean")])

    # format min/max rank columns to be ints

    # overall_ranks.loc[:, "count"] = overall_ranks.loc[:, (f"{metric}_rank_mean", "count")].astype(int)
    overall_ranks.drop(columns=(f"{metric}_rank_mean", "count"), inplace=True)

    # overall_ranks.loc[:, "alg_name"] = overall_ranks.loc[:, "alg_name"].apply(lambda x: "\rot{" + x + "}")
    overall_ranks.loc[:, (f"{metric}_rank_mean", "min")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "min")].apply(lambda x: "{:d}".format(int(x)))
    overall_ranks.loc[:, (f"{metric}_rank_mean", "max")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "max")].apply(lambda x: "{:d}".format(int(x)))

    # mean/median mean-rank
    overall_ranks.loc[:, (f"{metric}_rank_mean", "mean")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "mean")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"{metric}_rank_mean", "median")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "median")].apply(lambda x: "{:d}".format(int(x)) if int(x) == x else "{:.1f}".format(x))
    
    # normalized metric - mean and std over folds
    overall_ranks.loc[:, (f"normalized_{metric}__test_mean", "mean")] = overall_ranks.loc[:,(f"normalized_{metric}__test_mean", "mean")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"normalized_{metric}__test_mean", "median")] = overall_ranks.loc[:,(f"normalized_{metric}__test_mean", "median")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"normalized_{metric}__test_std", "mean")] = overall_ranks.loc[:,(f"normalized_{metric}__test_std", "mean")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"normalized_{metric}__test_std", "median")] = overall_ranks.loc[:,(f"normalized_{metric}__test_std", "median")].apply(lambda x: "{:.2f}".format(x))


    # normalized runtime
    overall_ranks.loc[:, (f"train_per_1000_inst_mean_{metric}", "mean")] = overall_ranks.loc[:,(f"train_per_1000_inst_mean_{metric}", "mean")].apply(lambda x: "{:.2f}".format(x))
    overall_ranks.loc[:, (f"train_per_1000_inst_mean_{metric}", "median")] = overall_ranks.loc[:,(f"train_per_1000_inst_mean_{metric}", "median")].apply(lambda x: "{:.2f}".format(x))
   


    final_table = overall_ranks.set_index("alg_name")

    return final_table


### Save rank tables to file

In [5]:
# save rank tables to csv and latex
from analysis_utils import ALG_DISPLAY_NAMES
table_idx = 1

metric_list = [
    "Accuracy",
    "F1",
    "Log Loss",
    "AUC",
]

final_tables = {}

# best, worst, and average performance for each alg, over all datasets
for metric in metric_list:

    agg_df_with_default

    # first with default hparams as its own alg
    final_tables[metric] = get_rank_table(agg_df_with_default, metric, table_idx = table_idx)

    # save to csv, latex
    final_tables[metric].to_csv(output_folder / f"{metric}_rank_with_default.csv", index=True)
    final_tables[metric].to_latex(output_folder / f"{metric}_rank_with_default.tex", index=True, escape=False)

    # now without default hparams as its own alg
    final_tables[metric] = get_rank_table(agg_df, metric, table_idx=table_idx)

    # save to csv, latex
    final_tables[metric].to_csv(output_folder / f"{metric}_rank.csv", index=True)
    final_tables[metric].to_latex(output_folder / f"{metric}_rank.tex", index=True, escape=False)

 '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'
 '1' '1' '1' '1' '3' '1' '1' '1' '9']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  overall_ranks.loc[:, (f"{metric}_rank_mean", "min")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "min")].apply(lambda x: "{:d}".format(int(x)))
 '42' '43' '44' '42' '39' '38' '41' '39' '44' '41' '44' '44' '42' '42'
 '42' '45' '43' '39' '45' '43' '43' '44' '44' '45' '45' '45' '43' '45'
 '43' '45' '45']' has dtype incompatible with float64, please explicitly cast to a compatible dtype first.
  overall_ranks.loc[:, (f"{metric}_rank_mean", "max")] = overall_ranks.loc[:, (f"{metric}_rank_mean", "max")].apply(lambda x: "{:d}".format(int(x)))
 '14.99' '15.50' '16.15' '16.66' '16.66' '16.81' '17.06' '17.28' '17.43'
 '17.49' '18.23' '18.70' '19.07' '20.05' '20.73' '22.26' '22.68' '22.74'
 '22.97' '22.98' '23.21' '23.59' '23.73' '24.77' '24.84' '25.84' '27.02'
 '27.85' '28.03' '28.92' '28.97' 

In [7]:
final_tables['Accuracy']

Unnamed: 0_level_0,Accuracy_rank_mean,Accuracy_rank_mean,Accuracy_rank_mean,Accuracy_rank_mean,normalized_Accuracy__test_mean,normalized_Accuracy__test_mean,normalized_Accuracy__test_std,normalized_Accuracy__test_std,train_per_1000_inst_mean_Accuracy,train_per_1000_inst_mean_Accuracy
Unnamed: 0_level_1,min,max,mean,median,mean,median,mean,median,mean,median
alg_name,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
TabPFN,1,21,5.45,4.0,0.85,0.93,0.32,0.24,0.0,0.0
CatBoost,1,20,5.84,4.0,0.88,0.94,0.26,0.16,21.7,2.08
XGBoost,1,21,7.51,6.0,0.83,0.92,0.29,0.17,0.81,0.37
ResNet,1,22,8.18,8.0,0.78,0.86,0.27,0.16,16.01,9.34
SAINT,1,22,8.51,7.0,0.76,0.87,0.27,0.2,169.54,146.16
NODE,1,22,8.53,8.0,0.77,0.86,0.23,0.15,138.36,117.04
FTTransformer,1,19,8.71,8.5,0.79,0.84,0.27,0.17,27.67,18.4
RandomForest,1,20,8.92,8.0,0.8,0.86,0.27,0.19,0.35,0.24
LightGBM,1,23,9.32,9.0,0.79,0.86,0.31,0.18,0.87,0.34
TabFast,1,23,9.71,9.5,0.73,0.86,0.26,0.17,3.7,1.48
