In [1]:
import pandas as pd
import os, sys, json
from tqdm import tqdm
root_dir = os.path.dirname(os.getcwd())
sys.path.append(root_dir)

# Datasets & Baselines

In [2]:

metadataset_df = pd.read_csv(f"{root_dir}/TabZilla/tutorials/metadataset_clean.csv")

def read_json(file):
    with open(file, "r") as f:
        return json.load(f)
    
eval = {}

model_dict = {
    'tabpfn': 'TabPFNModel',
    'tabflex': 'TabFlexModel',
    'tabfast': 'TabFastModel',
}

baselines = [
    'LinearModel', 
    'KNN', 
    'SVM', 
    'DecisionTree', 
    'RandomForest', 
    'XGBoost', 
    'CatBoost', 
    'LightGBM', 
    'MLP', 
    'TabNet', 
    'VIME', 
    'TabTransformer', 
    'NODE', 
    'DeepGBM', 
    'STG', 
    'NAM', 
    'DeepFM', 
    'SAINT', 
    'DANet', 
    'rtdl_MLP', 
    'rtdl_ResNet', 
    'rtdl_FTTransformer',
]

In [3]:
def get_datasets(table_idx):
    if table_idx == 1:
        from analysis.table1 import DATASETS
    elif table_idx == 2:
        from analysis.table2 import DATASETS
    elif table_idx == 4:
        from analysis.table4 import DATASETS
        
    return DATASETS

In [4]:
def get_dataset_stat(dataset):
    result_dir = f"{root_dir}/results/TabFlexModel/{dataset}"
    items = os.listdir(result_dir)
    for item in items:
        if item == 'default_trial0_results.json':
            result = read_json(f"{result_dir}/{item}")
            break
        elif os.path.isdir(f"{result_dir}/{item}"):
            result = read_json(f"{result_dir}/{item}/default_trial0_results.json")
            break
    return {
        'num_classes': result['dataset']['num_classes'],
        'num_features': result['dataset']['num_features'],
        'num_instances': result['dataset']['num_instances'],
    }

In [5]:
def get_results(dataset, model, metric = 'Accuracy'):
    result_dir = f"{root_dir}/results/{model_dict[model]}/{dataset}"
    items = os.listdir(result_dir)
    
    best_eval = None
    for item in items: 
        try:
            if item == 'default_trial0_results.json':
                eval = read_json(f"{result_dir}/{item}")
                if best_eval is None or eval['scorers']['test'][metric] > best_eval['scorers']['test'][metric]:
                    best_eval = eval
            elif os.path.isdir(f"{result_dir}/{item}"):
                eval = read_json(f"{result_dir}/{item}/default_trial0_results.json")
                if best_eval is None or eval['scorers']['test'][metric] > best_eval['scorers']['test'][metric]:
                    best_eval = eval
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        except Exception as e:
            # print the error message
            print(e)
            print(f"Error reading {result_dir}/{item}")
                  
    return best_eval

In [59]:
def get_tabmodels(method, eval, raw_results, metric = 'Accuracy'):
    eval[method] = {}
    for dataset in raw_results[method]:
        eval[method][dataset] = {}
        metrics = raw_results[method][dataset]['scorers']['test'][metric]
        metrics = pd.Series(metrics)
        eval[method][dataset][f'median_{metric}'] = metrics.median()
        eval[method][dataset][f'mean_{metric}'] = metrics.mean()
        eval[method][dataset]['train_time'] = pd.Series(raw_results[method][dataset]['timers']['train'][1:]).mean()/96
        eval[method][dataset]['test_time'] = pd.Series(raw_results[method][dataset]['timers']['test']).mean()/96
        eval[method][dataset]['total_time'] = eval[method][dataset]['train_time'] + eval[method][dataset]['test_time']
    

In [16]:
def get_baselines(method, eval, DATASETS, metric = 'Accuracy'):
    eval[method] = {}
    for dataset in DATASETS:
        eval[method][dataset] = {}
        result = metadataset_df.loc[
            (metadataset_df["alg_name"] == method) & 
            (metadataset_df["hparam_source"] == "default") &
            (metadataset_df["dataset_name"] == dataset),
            [
                "dataset_fold_id", 
                "alg_name", 
                "hparam_source", 
                f"{metric}__test", 
                "training_time",
                'eval-time__test',]
        ]
        if result.empty:
            eval[method][dataset][f'median_{metric}'] = 0
            eval[method][dataset][f'mean_{metric}'] = 0
            eval[method][dataset]['train_time'] = 0
            eval[method][dataset]['test_time'] = 0
            eval[method][dataset]['total_time'] = 0
        else:
            eval[method][dataset][f'median_{metric}'] = result[f"{metric}__test"].median()
            eval[method][dataset][f'mean_{metric}'] = result[f"{metric}__test"].mean()
            eval[method][dataset]['train_time'] = result['training_time'].mean()
            eval[method][dataset]['test_time'] = result['eval-time__test'].mean()
            eval[method][dataset]['total_time'] = eval[method][dataset]['train_time'] + eval[method][dataset]['test_time']

# Get the Results of TabModels

In [60]:
table_idx = 4
metric = 'AUC'

DATASETS = get_datasets(table_idx)
raw_results = {}

tqdm_bar = tqdm(model_dict)
for model in tqdm_bar:
    tqdm_bar.set_description(f"Reading results of {model}...")
    raw_results[model] = {}
    for dataset in DATASETS:
        raw_results[model][dataset] = get_results(dataset, model, metric = metric)            
        
tqdm_bar = tqdm(model_dict)
for model in tqdm_bar:
    tqdm_bar.set_description(f"Computing results of {model}...")
    get_tabmodels(model, eval, raw_results, metric)

Reading results of tabfast...: 100%|██████████| 3/3 [00:00<00:00, 105.17it/s]


[Errno 2] No such file or directory: '/datadrive/tabzilla/results/TabFlexModel/openml__credit-g__31/100000/default_trial0_results.json'
Error reading /datadrive/tabzilla/results/TabFlexModel/openml__credit-g__31/100000


Computing results of tabfast...: 100%|██████████| 3/3 [00:00<00:00, 105.88it/s]


In [61]:
table_dict = {}

for dataset in DATASETS:
    table_dict[dataset] = get_dataset_stat(dataset)
    for method in model_dict:
        table_dict[dataset][f'{method}_{metric}'] = round(eval[method][dataset][f'mean_{metric}'], 4)
        table_dict[dataset][f'{method}_time'] = round(eval[method][dataset]['total_time'], 4)
        
table = pd.DataFrame(table_dict).T

table

Unnamed: 0,num_classes,num_features,num_instances,tabpfn_AUC,tabpfn_time,tabflex_AUC,tabflex_time,tabfast_AUC,tabfast_time
openml__Australian__146818,1.0,14.0,690.0,0.9318,0.3905,0.9259,0.0674,0.9298,0.0733
openml__Bioresponse__9910,1.0,1776.0,3751.0,0.5073,0.8971,0.8096,0.2406,0.4906,0.1624
openml__GesturePhaseSegmentationProcessed__14969,5.0,32.0,9873.0,0.8542,1.2962,0.7759,0.2838,0.8126,0.3212
openml__MiniBooNE__168335,1.0,50.0,130064.0,0.9769,1.3416,0.9658,0.4192,0.9727,0.4646
openml__SpeedDating__146607,1.0,120.0,8378.0,0.5829,0.893,0.8481,0.3438,0.5631,0.1914
openml__ada_agnostic__3896,1.0,48.0,4562.0,0.8969,1.2261,0.8863,0.2097,0.8956,0.2518
openml__airlines__189354,1.0,7.0,539383.0,0.6277,0.665,0.621,0.3803,0.6398,0.5012
openml__albert__189356,1.0,78.0,425240.0,0.686,1.3729,0.692,0.5923,0.7014,1.1137
openml__artificial-characters__14964,10.0,7.0,10218.0,0.9618,0.4729,0.8991,0.2477,0.95,0.2682
openml__audiology__7,24.0,69.0,226.0,0.8182,0.032,0.8452,0.1493,0.8222,0.1752


In [22]:
table[(table[f'tabpfn_{metric}'] >= table[f'tabflex_{metric}']) & (table[f'tabpfn_{metric}'] >= table[f'tabfast_{metric}'])]

Unnamed: 0,num_classes,num_features,num_instances,tabpfn_AUC,tabpfn_time,tabflex_AUC,tabflex_time,tabfast_AUC,tabfast_time
openml__Australian__146818,1.0,14.0,690.0,0.9318,44.5841,0.9259,7.5286,0.9298,8.6669
openml__GesturePhaseSegmentationProcessed__14969,5.0,32.0,9873.0,0.8542,130.7998,0.7545,18.3481,0.8126,31.8087
openml__MiniBooNE__168335,1.0,50.0,130064.0,0.9769,134.4315,0.9631,50.953,0.9727,45.5417
openml__ada_agnostic__3896,1.0,48.0,4562.0,0.8969,124.3009,0.8866,13.6821,0.8956,25.2711
openml__artificial-characters__14964,10.0,7.0,10218.0,0.9618,52.1905,0.8568,24.4703,0.95,27.329
openml__balance-scale__11,3.0,4.0,625.0,0.9993,37.4801,0.9952,4.0068,0.9974,4.109
openml__credit-approval__29,1.0,15.0,690.0,0.9353,53.6654,0.935,8.0043,0.9333,7.3571
openml__credit-g__31,1.0,20.0,1000.0,0.7651,51.5782,0.7296,12.2756,0.7616,13.5104
openml__electricity__219,1.0,8.0,45312.0,0.8886,68.5184,0.8641,37.3372,0.8872,35.8761
openml__heart-h__50,1.0,13.0,294.0,0.8819,9.569,0.8746,4.3212,0.8774,6.533


In [23]:
table[(table[f'tabflex_{metric}'] >= table[f'tabpfn_{metric}']) & (table[f'tabflex_{metric}'] >= table[f'tabfast_{metric}'])]

Unnamed: 0,num_classes,num_features,num_instances,tabpfn_AUC,tabpfn_time,tabflex_AUC,tabflex_time,tabfast_AUC,tabfast_time
openml__Bioresponse__9910,1.0,1776.0,3751.0,0.5073,92.9538,0.8096,24.3545,0.4906,16.6968
openml__SpeedDating__146607,1.0,120.0,8378.0,0.5829,92.327,0.8435,20.3257,0.5631,19.3026
openml__cnae-9__9981,9.0,856.0,1080.0,0.5005,91.1681,0.9167,32.3921,0.499,13.1144
openml__colic__25,1.0,26.0,368.0,0.8784,9.851,0.8847,9.8801,0.8751,7.953
openml__guillermo__168337,1.0,4296.0,20000.0,0.4993,90.66,0.5089,59.1614,0.499,17.5223
openml__jasmine__168911,1.0,144.0,2984.0,0.751,89.5438,0.8514,24.0621,0.6946,17.9377
openml__nomao__9977,1.0,118.0,34465.0,0.6642,93.1088,0.985,43.6235,0.7748,22.2089
openml__one-hundred-plants-texture__9956,100.0,64.0,1599.0,0.5452,37.374,0.5489,27.6299,0.5453,21.1102


In [24]:
table[(table[f'tabfast_{metric}'] >= table[f'tabpfn_{metric}']) & (table[f'tabfast_{metric}'] >= table[f'tabflex_{metric}'])]

Unnamed: 0,num_classes,num_features,num_instances,tabpfn_AUC,tabpfn_time,tabflex_AUC,tabflex_time,tabfast_AUC,tabfast_time
openml__airlines__189354,1.0,7.0,539383.0,0.6277,70.3585,0.6124,60.0918,0.6398,49.1933
openml__albert__189356,1.0,78.0,425240.0,0.686,138.5892,0.689,118.5427,0.7014,107.7514
openml__audiology__7,24.0,69.0,226.0,0.8182,9.9912,0.7484,15.5756,0.8222,17.7511
openml__elevators__3711,1.0,18.0,16599.0,0.945,131.1623,0.9256,12.1635,0.9492,35.1914
openml__higgs__146606,1.0,28.0,98050.0,0.7241,130.1115,0.676,46.6693,0.756,46.1001
openml__poker-hand__9890,10.0,10.0,1025009.0,0.7178,49.3634,0.5078,44.4131,0.8362,41.553
openml__profb__3561,1.0,9.0,672.0,0.6617,51.3491,0.6414,4.6467,0.6644,4.8287


In [26]:
table_fast = table[(table['num_instances'] >= 50000) & (table['num_features'] <= 100)].drop(columns = ['tabflex_time', f'tabflex_{metric}'])
table_fast

Unnamed: 0,num_classes,num_features,num_instances,tabpfn_AUC,tabpfn_time,tabfast_AUC,tabfast_time
openml__MiniBooNE__168335,1.0,50.0,130064.0,0.9769,134.4315,0.9727,45.5417
openml__airlines__189354,1.0,7.0,539383.0,0.6277,70.3585,0.6398,49.1933
openml__albert__189356,1.0,78.0,425240.0,0.686,138.5892,0.7014,107.7514
openml__higgs__146606,1.0,28.0,98050.0,0.7241,130.1115,0.756,46.1001
openml__poker-hand__9890,10.0,10.0,1025009.0,0.7178,49.3634,0.8362,41.553


In [28]:
table_flex = table[(table['num_features'] > 100) | (table['num_features']/table['num_instances'] >= 0.1)].drop(columns = ['tabfast_time', f'tabfast_{metric}'])
table_flex

Unnamed: 0,num_classes,num_features,num_instances,tabpfn_AUC,tabpfn_time,tabflex_AUC,tabflex_time
openml__Bioresponse__9910,1.0,1776.0,3751.0,0.5073,92.9538,0.8096,24.3545
openml__SpeedDating__146607,1.0,120.0,8378.0,0.5829,92.327,0.8435,20.3257
openml__audiology__7,24.0,69.0,226.0,0.8182,9.9912,0.7484,15.5756
openml__cnae-9__9981,9.0,856.0,1080.0,0.5005,91.1681,0.9167,32.3921
openml__guillermo__168337,1.0,4296.0,20000.0,0.4993,90.66,0.5089,59.1614
openml__jasmine__168911,1.0,144.0,2984.0,0.751,89.5438,0.8514,24.0621
openml__lymph__10,4.0,18.0,148.0,0.9087,9.35,0.8721,4.8102
openml__nomao__9977,1.0,118.0,34465.0,0.6642,93.1088,0.985,43.6235


# Compare with Other Baselines

In [29]:
tqdm_bar = tqdm(baselines)
for baseline in tqdm_bar:
    tqdm_bar.set_description(f"Computing results of {baseline}...") 
    try:
        get_baselines(baseline, eval, DATASETS, metric)
    except:
        print(f"Error in {baseline}")

methods = baselines + list(model_dict.keys())
final_results = {}

for method in methods:
    result = pd.DataFrame(eval[method]).T
    final_results[method] = result[f'mean_{metric}'].mean()
    
sorted_methods = sorted(methods, key=lambda x: final_results[x], reverse=True)
for method in sorted_methods:
    print(f"| {method} | {final_results[method]} |")

Computing results of rtdl_FTTransformer...: 100%|██████████| 22/22 [02:07<00:00,  5.82s/it]

| XGBoost | 0.89626674151231 |
| CatBoost | 0.8710780319364649 |
| rtdl_ResNet | 0.865423274822542 |
| RandomForest | 0.8517725838051355 |
| rtdl_MLP | 0.8389464580267786 |
| tabflex | 0.8266660559220623 |
| tabpfn | 0.8236528393585038 |
| tabfast | 0.823597170219736 |
| MLP | 0.8144863457854493 |
| LightGBM | 0.8064431201510455 |
| DecisionTree | 0.7904649113211598 |
| LinearModel | 0.7844321715770098 |
| KNN | 0.7474054796492654 |
| NODE | 0.7247265454837093 |
| TabNet | 0.7219269236127556 |
| VIME | 0.688526351681953 |
| rtdl_FTTransformer | 0.6791009939981121 |
| STG | 0.6748985179731256 |
| DANet | 0.660580705297897 |
| SVM | 0.6507085992457963 |
| TabTransformer | 0.5891746586557063 |
| SAINT | 0.5367639924497778 |
| DeepFM | 0.5026993451840268 |
| NAM | 0.32484285783465794 |
| DeepGBM | 0.0 |





In [37]:
round(eval[method][dataset]['total_time'], 4)

0.0293

In [62]:
for dataset in DATASETS:
    table_dict[dataset] = get_dataset_stat(dataset)
    for method in baselines + list(model_dict.keys()):
        table_dict[dataset][f'{method}_{metric}'] = round(eval[method][dataset][f'mean_{metric}'], 4)
        table_dict[dataset][f'{method}_time'] = round(eval[method][dataset]['total_time'], 4)

In [63]:
full_table = pd.DataFrame(table_dict).T

In [64]:
# show full columns
pd.set_option('display.max_columns', None)
this_table = full_table.loc[table_flex.index].drop(columns = ['tabfast_time', f'tabfast_{metric}'])
this_table[['num_classes', 'num_features', 'num_instances', 'tabpfn_AUC', 'tabflex_AUC', 'CatBoost_AUC', 'XGBoost_AUC', 'tabpfn_time', 'tabflex_time', 'CatBoost_time', 'XGBoost_time']]

Unnamed: 0,num_classes,num_features,num_instances,tabpfn_AUC,tabflex_AUC,CatBoost_AUC,XGBoost_AUC,tabpfn_time,tabflex_time,CatBoost_time,XGBoost_time
openml__Bioresponse__9910,1.0,1776.0,3751.0,0.5073,0.8096,0.8555,0.87,0.8971,0.2406,2.6371,2.4921
openml__SpeedDating__146607,1.0,120.0,8378.0,0.5829,0.8481,0.8637,0.8753,0.893,0.3438,12.0404,2.443
openml__audiology__7,24.0,69.0,226.0,0.8182,0.8452,0.8752,0.942,0.032,0.1493,54.5559,1.3723
openml__cnae-9__9981,9.0,856.0,1080.0,0.5005,0.9136,0.9899,0.9939,0.8801,0.2972,1.7671,5.5404
openml__guillermo__168337,1.0,4296.0,20000.0,0.4993,0.5089,0.0,0.8874,0.8726,0.6053,0.0,13.1522
openml__jasmine__168911,1.0,144.0,2984.0,0.751,0.8511,0.8712,0.8734,0.865,0.1951,1.6779,0.3858
openml__lymph__10,4.0,18.0,148.0,0.9087,0.8721,0.897,0.8976,0.0013,0.0433,2.1286,0.3264
openml__nomao__9977,1.0,118.0,34465.0,0.6642,0.985,0.9915,0.9931,0.8979,0.4431,4.2106,0.8566


In [50]:
this_table = full_table.loc[table_fast.index].drop(columns = ['tabflex_time', f'tabflex_{metric}'])
this_table[['num_classes', 'num_features', 'num_instances', 'tabpfn_AUC', 'tabfast_AUC', 'CatBoost_AUC', 'XGBoost_AUC', 'tabpfn_time', 'tabfast_time', 'CatBoost_time', 'XGBoost_time']]

Unnamed: 0,num_classes,num_features,num_instances,tabpfn_AUC,tabfast_AUC,CatBoost_AUC,XGBoost_AUC,tabpfn_time,tabfast_time,CatBoost_time,XGBoost_time
openml__MiniBooNE__168335,1.0,50.0,130064.0,0.9769,0.9727,0.9778,0.9811,134.4315,45.5417,2.6046,0.9711
openml__airlines__189354,1.0,7.0,539383.0,0.6277,0.6398,0.6977,0.7026,70.3585,49.1933,4.5096,0.9884
openml__albert__189356,1.0,78.0,425240.0,0.686,0.7014,0.7677,0.7455,138.5892,107.7514,33.9801,3.9613
openml__higgs__146606,1.0,28.0,98050.0,0.7241,0.756,0.7871,0.7988,130.1115,46.1001,1.9992,0.608
openml__poker-hand__9890,10.0,10.0,1025009.0,0.7178,0.8362,0.8385,0.67,49.3634,41.553,355.3651,7.7824


In [58]:
method = 'TabPFNModel'
dataset = 'openml__lymph__10'

for time_type in ['training_time', 'eval-time__test']:
    print(time_type, metadataset_df.loc[
        (metadataset_df["alg_name"] == method) & 
        (metadataset_df["hparam_source"] == "default") &
        (metadataset_df["dataset_name"] == dataset),
        [
            "dataset_fold_id", 
            "alg_name", 
            "hparam_source", 
            f"{metric}__test", 
            "training_time",
            "eval-time__test"]
    ][time_type].mean())

training_time 0.00043345699999989006
eval-time__test 0.1568050029


In [None]:
eval[method][dataset]

{'median_AUC': 0.9779434631355163, 'mean_AUC': 0.9778310827003283}

In [None]:
metadataset_df.loc[
    (metadataset_df["alg_name"] == method) & 
    (metadataset_df["hparam_source"] == "default") &
    (metadataset_df["dataset_name"] == dataset),
    [
        "dataset_fold_id", 
        "alg_name", 
        "hparam_source", 
        f"{metric}__test", 
        "training_time",
        "eval-time__test"]
]["eval-time__test"]

66390    0.031840
66646    0.014282
66902    0.022972
67158    0.030814
67414    0.030835
67670    0.021542
67926    0.032461
68182    0.022469
68438    0.029921
68694    0.023271
Name: eval-time__test, dtype: float64

In [None]:
metadataset_df.loc[
    (metadataset_df["alg_name"] == method) & 
    (metadataset_df["hparam_source"] == "default") &
    (metadataset_df["dataset_name"] == dataset),
    [
        "dataset_fold_id", 
        "alg_name", 
        "hparam_source", 
        f"{metric}__test", 
        "training_time",
        "eval-time__test"]
]["training_time"]

66390    3.497594
66646    2.381082
66902    2.475568
67158    2.398105
67414    2.505245
67670    2.597106
67926    2.478758
68182    2.374573
68438    2.570568
68694    2.507408
Name: training_time, dtype: float64