In [1]:
import pandas as pd
import os, sys, json
root_dir = os.path.dirname(os.getcwd())
sys.path.append(root_dir)

# Datasets

In [2]:

metadataset_df = pd.read_csv(f"{root_dir}/TabZilla/tutorials/metadataset_clean.csv")

def read_json(file):
    with open(file, "r") as f:
        return json.load(f)
    
eval = {}

In [3]:
def get_datasets(table_idx):
    if table_idx == 1:
        from analysis.table1 import DATASETS
    elif table_idx == 2:
        from analysis.table2 import DATASETS
    elif table_idx == 4:
        from analysis.table4 import DATASETS
        
    return DATASETS

In [4]:
table_idx = 4
DATASETS = get_datasets(table_idx)

# get the results of TabFlex
tabflex_results = {}
for dataset in DATASETS:
    result_dir = f"{root_dir}/results/TabFlexModel/{dataset}"
    try:
            tabflex_results[dataset] = read_json(f"{result_dir}/default_trial0_results.json")
    except FileNotFoundError:
        print(f"TabFlexModel: {dataset} not found")
            

TabFlexModel: openml__ada_agnostic__3896openml__airlines__189354 not found


In [5]:
eval['tabflex'] = {}
for dataset in tabflex_results:
    eval['tabflex'][dataset] = {}
    accs = tabflex_results[dataset]['scorers']['test']['Accuracy']
    accs = pd.Series(accs)
    eval['tabflex'][dataset]['median_acc'] = accs.median()
    eval['tabflex'][dataset]['mean_acc'] = accs.mean()
    

In [6]:
def get_baselines(method, eval):
    eval[method] = {}
    for dataset in DATASETS:
        eval[method][dataset] = {}
        result = metadataset_df.loc[
            (metadataset_df["alg_name"] == method) & 
            (metadataset_df["hparam_source"] == "default") &
            (metadataset_df["dataset_name"] == dataset),
            [
                "dataset_fold_id", 
                "alg_name", 
                "hparam_source", 
                "Accuracy__test", 
                "training_time"]
        ]
        if result.empty:
            eval[method][dataset]['median_acc'] = 0
            eval[method][dataset]['mean_acc'] = 0
        else:
            eval[method][dataset]['median_acc'] = result["Accuracy__test"].median()
            eval[method][dataset]['mean_acc'] = result["Accuracy__test"].mean()
    

In [7]:
baselines = [
    'LinearModel', 
    'KNN', 
    'SVM', 
    'DecisionTree', 
    'RandomForest', 
    'XGBoost', 
    'CatBoost', 
    'LightGBM', 
    'MLP', 
    'TabNet', 
    'VIME', 
    'TabTransformer', 
    'NODE', 
    'DeepGBM', 
    'STG', 
    'NAM', 
    'DeepFM', 
    'SAINT', 
    'DANet', 
    'TabPFNModel', 
    'rtdl_MLP', 
    'rtdl_ResNet', 
    'rtdl_FTTransformer',
]

In [8]:
metadataset_df.columns

Index(['dataset_fold_id', 'dataset_name', 'target_type', 'alg_name',
       'hparam_source', 'Log Loss__train', 'Log Loss__val', 'Log Loss__test',
       'AUC__train', 'AUC__val', 'AUC__test', 'Accuracy__train',
       'Accuracy__val', 'Accuracy__test', 'F1__train', 'F1__val', 'F1__test',
       'training_time', 'eval-time__train', 'eval-time__val',
       'eval-time__test'],
      dtype='object')

In [9]:
for baseline in baselines: 
    try:
        get_baselines(baseline, eval)
    except:
        print(f"Error in {baseline}")

In [13]:
methods = baselines + ['tabflex']
final_results = {}

for method in methods:
    result = pd.DataFrame(eval[method]).T
    final_results[method] = result['mean_acc'].mean()
    
sorted_methods = sorted(methods, key=lambda x: final_results[x], reverse=True)
for method in sorted_methods:
    print(f"| {method} | {final_results[method]} |")

| XGBoost | 0.7968499617887653 |
| rtdl_ResNet | 0.7692302871971485 |
| CatBoost | 0.7622398414701581 |
| rtdl_MLP | 0.7459065643369371 |
| RandomForest | 0.7289414599410395 |
| MLP | 0.7239250990129568 |
| LightGBM | 0.7190597129172941 |
| tabflex | 0.7102846863951989 |
| DecisionTree | 0.6816234816254743 |
| LinearModel | 0.6812733340787344 |
| KNN | 0.6585315186712242 |
| NODE | 0.657115893399617 |
| TabNet | 0.637308688106541 |
| STG | 0.5903941316474768 |
| SVM | 0.5866270139733308 |
| rtdl_FTTransformer | 0.5784647697260324 |
| VIME | 0.5734227612285091 |
| DANet | 0.5722235937783006 |
| TabTransformer | 0.513612110499139 |
| SAINT | 0.47709956658658653 |
| DeepFM | 0.4693975434294968 |
| NAM | 0.30645967177914923 |
| TabPFNModel | 0.29361079602194096 |
| DeepGBM | 0.0 |


In [11]:
result.T['mean_acc'].mean()

KeyError: 'mean_acc'