In [1]:
import pandas as pd
import os, sys, json
from tqdm import tqdm
root_dir = os.path.dirname(os.getcwd())
sys.path.append(root_dir)

# Datasets & Baselines

In [2]:

metadataset_df = pd.read_csv(f"{root_dir}/TabZilla/tutorials/metadataset_clean.csv")

def read_json(file):
    with open(file, "r") as f:
        return json.load(f)
    
eval = {}

model_dict = {
    'tabpfn': 'TabPFNModel',
    'tabflex': 'TabFlexModel',
    'tabfast': 'TabFastModel',
}

baselines = [
    'LinearModel', 
    'KNN', 
    'SVM', 
    'DecisionTree', 
    'RandomForest', 
    'XGBoost', 
    'CatBoost', 
    'LightGBM', 
    'MLP', 
    'TabNet', 
    'VIME', 
    'TabTransformer', 
    'NODE', 
    'DeepGBM', 
    'STG', 
    'NAM', 
    'DeepFM', 
    'SAINT', 
    'DANet', 
    'rtdl_MLP', 
    'rtdl_ResNet', 
    'rtdl_FTTransformer',
]

In [3]:
def get_datasets(table_idx):
    if table_idx == 1:
        from analysis.table1 import DATASETS
    elif table_idx == 2:
        from analysis.table2 import DATASETS
    elif table_idx == 4:
        from analysis.table4 import DATASETS
        
    return DATASETS

In [4]:
def get_dataset_stat(dataset):
    result_dir = f"{root_dir}/results/TabFlexModel/{dataset}"
    items = os.listdir(result_dir)
    for item in items:
        if item == 'default_trial0_results.json':
            result = read_json(f"{result_dir}/{item}")
            break
        elif os.path.isdir(f"{result_dir}/{item}"):
            result = read_json(f"{result_dir}/{item}/default_trial0_results.json")
            break
    return {
        'num_classes': result['dataset']['num_classes'],
        'num_features': result['dataset']['num_features'],
        'num_instances': result['dataset']['num_instances'],
    }

In [5]:
def get_results(dataset, model, metric = 'Accuracy'):
    result_dir = f"{root_dir}/results/{model_dict[model]}/{dataset}"
    items = os.listdir(result_dir)
    
    best_eval = None
    for item in items: 
        try:
            if item == 'default_trial0_results.json':
                eval = read_json(f"{result_dir}/{item}")
                if best_eval is None or eval['scorers']['test'][metric] > best_eval['scorers']['test'][metric]:
                    best_eval = eval
            elif os.path.isdir(f"{result_dir}/{item}"):
                eval = read_json(f"{result_dir}/{item}/default_trial0_results.json")
                if best_eval is None or eval['scorers']['test'][metric] > best_eval['scorers']['test'][metric]:
                    best_eval = eval
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        except Exception as e:
            # print the error message
            print(e)
            print(f"Error reading {result_dir}/{item}")
                  
    return best_eval

In [6]:
def get_tabmodels(method, eval, raw_results, metric = 'Accuracy'):
    eval[method] = {}
    for dataset in raw_results[method]:
        eval[method][dataset] = {}
        metrics = raw_results[method][dataset]['scorers']['test'][metric]
        metrics = pd.Series(metrics)
        eval[method][dataset][f'median_{metric}'] = metrics.median()
        eval[method][dataset][f'mean_{metric}'] = metrics.mean()
    

In [7]:
def get_baselines(method, eval, DATASETS, metric = 'Accuracy'):
    eval[method] = {}
    for dataset in DATASETS:
        eval[method][dataset] = {}
        result = metadataset_df.loc[
            (metadataset_df["alg_name"] == method) & 
            (metadataset_df["hparam_source"] == "default") &
            (metadataset_df["dataset_name"] == dataset),
            [
                "dataset_fold_id", 
                "alg_name", 
                "hparam_source", 
                f"{metric}__test", 
                "training_time"]
        ]
        if result.empty:
            eval[method][dataset][f'median_{metric}'] = 0
            eval[method][dataset][f'mean_{metric}'] = 0
        else:
            eval[method][dataset][f'median_{metric}'] = result[f"{metric}__test"].median()
            eval[method][dataset][f'mean_{metric}'] = result[f"{metric}__test"].mean()
    

# Get the Results of TabModels

In [14]:
table_idx = 4
metric = 'AUC'

DATASETS = get_datasets(table_idx)
raw_results = {}

tqdm_bar = tqdm(model_dict)
for model in tqdm_bar:
    tqdm_bar.set_description(f"Reading results of {model}...")
    raw_results[model] = {}
    for dataset in DATASETS:
        raw_results[model][dataset] = get_results(dataset, model, metric = 'Accuracy')            
        
tqdm_bar = tqdm(model_dict)
for model in tqdm_bar:
    tqdm_bar.set_description(f"Computing results of {model}...")
    get_tabmodels(model, eval, raw_results, metric)

Reading results of tabfast...: 100%|██████████| 3/3 [00:00<00:00, 121.68it/s]
Computing results of tabfast...: 100%|██████████| 3/3 [00:00<00:00, 180.87it/s]


In [15]:
table = {}

for dataset in DATASETS:
    table[dataset] = get_dataset_stat(dataset)
    for method in model_dict:
        table[dataset][method] = round(eval[method][dataset][f'mean_{metric}'], 2)
table = pd.DataFrame(table).T

table

Unnamed: 0,num_classes,num_features,num_instances,tabpfn,tabflex,tabfast
openml__Australian__146818,1.0,14.0,690.0,0.93,0.93,0.93
openml__Bioresponse__9910,1.0,1776.0,3751.0,0.51,0.81,0.49
openml__GesturePhaseSegmentationProcessed__14969,5.0,32.0,9873.0,0.85,0.75,0.81
openml__MiniBooNE__168335,1.0,50.0,130064.0,0.98,0.96,0.97
openml__SpeedDating__146607,1.0,120.0,8378.0,0.58,0.84,0.56
openml__ada_agnostic__3896,1.0,48.0,4562.0,0.9,0.89,0.9
openml__airlines__189354,1.0,7.0,539383.0,0.63,0.61,0.64
openml__albert__189356,1.0,78.0,425240.0,0.69,0.69,0.7
openml__artificial-characters__14964,10.0,7.0,10218.0,0.96,0.86,0.95
openml__audiology__7,24.0,69.0,226.0,0.82,0.75,0.82


In [16]:
table[(table['tabpfn'] >= table['tabflex']) & (table['tabpfn'] >= table['tabfast'])]

Unnamed: 0,num_classes,num_features,num_instances,tabpfn,tabflex,tabfast
openml__GesturePhaseSegmentationProcessed__14969,5.0,32.0,9873.0,0.85,0.75,0.81
openml__MiniBooNE__168335,1.0,50.0,130064.0,0.98,0.96,0.97
openml__artificial-characters__14964,10.0,7.0,10218.0,0.96,0.86,0.95
openml__credit-g__31,1.0,20.0,1000.0,0.77,0.73,0.76
openml__jungle_chess_2pcs_raw_endgame_complete__167119,3.0,6.0,44819.0,0.95,0.87,0.93
openml__kc1__3917,1.0,21.0,2109.0,0.83,0.8,0.81
openml__lymph__10,4.0,18.0,148.0,0.91,0.87,0.89
openml__monks-problems-2__146065,1.0,6.0,601.0,1.0,0.63,0.96
openml__phoneme__9952,1.0,5.0,5404.0,0.94,0.91,0.93
openml__qsar-biodeg__9957,1.0,41.0,1055.0,0.95,0.93,0.94


In [17]:
table[(table['tabflex'] >= table['tabpfn']) & (table['tabflex'] >= table['tabfast'])]

Unnamed: 0,num_classes,num_features,num_instances,tabpfn,tabflex,tabfast
openml__Bioresponse__9910,1.0,1776.0,3751.0,0.51,0.81,0.49
openml__SpeedDating__146607,1.0,120.0,8378.0,0.58,0.84,0.56
openml__cnae-9__9981,9.0,856.0,1080.0,0.5,0.92,0.5
openml__guillermo__168337,1.0,4296.0,20000.0,0.5,0.51,0.5
openml__jasmine__168911,1.0,144.0,2984.0,0.75,0.85,0.69
openml__nomao__9977,1.0,118.0,34465.0,0.66,0.99,0.77


In [13]:
table[(table['tabfast'] >= table['tabpfn']) & (table['tabfast'] >= table['tabflex'])]

Unnamed: 0,num_classes,num_features,num_instances,tabpfn,tabflex,tabfast
openml__ada_agnostic__3896,1.0,48.0,4562.0,0.84,0.83,0.85
openml__airlines__189354,1.0,7.0,539383.0,0.59,0.58,0.61
openml__albert__189356,1.0,78.0,425240.0,0.64,0.64,0.65
openml__higgs__146606,1.0,28.0,98050.0,0.66,0.63,0.69
openml__jungle_chess_2pcs_raw_endgame_complete__167119,3.0,6.0,44819.0,0.8,0.74,0.81


# Compare with Other Baselines

In [10]:
tqdm_bar = tqdm(baselines)
for baseline in tqdm_bar:
    tqdm_bar.set_description(f"Computing results of {baseline}...") 
    try:
        get_baselines(baseline, eval, DATASETS, metric)
    except:
        print(f"Error in {baseline}")

methods = baselines + list(model_dict.keys())
final_results = {}

for method in methods:
    result = pd.DataFrame(eval[method]).T
    final_results[method] = result[f'mean_{metric}'].mean()
    
sorted_methods = sorted(methods, key=lambda x: final_results[x], reverse=True)
for method in sorted_methods:
    print(f"| {method} | {final_results[method]} |")

Computing results of rtdl_FTTransformer...: 100%|██████████| 22/22 [02:01<00:00,  5.53s/it]

| XGBoost | 0.8166618488039813 |
| rtdl_ResNet | 0.7889798721315184 |
| CatBoost | 0.7829282619838743 |
| rtdl_MLP | 0.7662391170637552 |
| RandomForest | 0.749623273657127 |
| MLP | 0.7439781326456066 |
| LightGBM | 0.7412342972503079 |
| tabpfn | 0.7329701309100504 |
| tabfast | 0.7154438572660874 |
| tabflex | 0.7100851215882988 |
| DecisionTree | 0.7039372670440281 |
| LinearModel | 0.7011188142309125 |
| KNN | 0.6804262994870354 |
| NODE | 0.680162669798773 |
| TabNet | 0.6427192801995545 |
| rtdl_FTTransformer | 0.603225789055825 |
| STG | 0.5959023253558243 |
| SVM | 0.592952329205104 |
| DANet | 0.5791497874748692 |
| VIME | 0.5783794566153639 |
| TabTransformer | 0.5208634078763127 |
| SAINT | 0.4860593724545725 |
| DeepFM | 0.47933240005782235 |
| NAM | 0.31928851998597413 |
| DeepGBM | 0.0 |



