In [1]:
import matplotlib.pyplot as plt

from scripts import tabular_baselines

import seaborn as sns
import numpy as np

from datasets import load_openml_list, valid_dids_classification, test_dids_classification, open_cc_dids
from scripts.tabular_baselines import *
from scripts.tabular_evaluation import evaluate
from scripts.tabular_metrics import calculate_score, make_ranks_and_wins_table, make_metric_matrix
from scripts import tabular_metrics

In [2]:
from notebook_utils import *

In [3]:
%load_ext autoreload

%autoreload 2

# Datasets

In [4]:
cc_test_datasets_multiclass, cc_test_datasets_multiclass_df = load_openml_list(open_cc_dids, multiclass=True, shuffled=True, filter_for_nan=False, max_samples = 10000, num_feats=100, return_capped=True)


Number of datasets: 30
Loading balance-scale 11 ..
Loading mfeat-fourier 14 ..
Loading breast-w 15 ..
Loading mfeat-karhunen 16 ..
Loading mfeat-morphological 18 ..
Loading mfeat-zernike 22 ..
Loading cmc 23 ..
Loading credit-approval 29 ..
Loading credit-g 31 ..
Loading diabetes 37 ..
Loading tic-tac-toe 50 ..
Loading vehicle 54 ..
Loading eucalyptus 188 ..
Loading analcatdata_authorship 458 ..
Loading analcatdata_dmft 469 ..
Loading pc4 1049 ..
Loading pc3 1050 ..
Loading kc2 1063 ..
Loading pc1 1068 ..
Loading banknote-authentication 1462 ..
Loading blood-transfusion-service-center 1464 ..
Loading ilpd 1480 ..
Loading qsar-biodeg 1494 ..
Loading wdbc 1510 ..
Loading cylinder-bands 6332 ..
Loading dresses-sales 23381 ..
Loading MiceProtein 40966 ..
Loading car 40975 ..
Loading steel-plates-fault 40982 ..
Loading climate-model-simulation-crashes 40994 ..


In [5]:
def get_datasets(selector, task_type, suite='openml'):
    if task_type == 'binary':
        ds = valid_datasets_binary if selector == 'valid' else test_datasets_binary
    else:
        if suite == 'openml':
            ds = valid_datasets_multiclass if selector == 'valid' else test_datasets_multiclass
        elif suite == 'cc':
            ds = valid_datasets_multiclass if selector == 'valid' else cc_test_datasets_multiclass
        else:
            raise Exception("Unknown suite")
    return ds

# Setting parameters

In [14]:
eval_positions = [1000]
max_features = 100
bptt = 2000
selector = 'test'
base_path = os.path.join('.')
overwrite=False
max_times = [0.5, 1, 5, 30, 60, 60*5, 60*30]
metric_used = tabular_metrics.auc_metric
task_type = 'multiclass'

methods = ['tab_transformer',
           'i_transformer_2_1',
           'logistic',
           'knn',
           'gp',
           'lgbm',
           'xgb',
           'autosklearn',
           'autosklearn2',
           ]

In [15]:
suite = 'cc'
test_datasets = get_datasets('test',task_type, suite=suite)

In [16]:
clf_dict= {
            'gp': gp_metric 
          , 'knn': knn_metric
          , 'xgb': xgb_metric
          , 'i_transformer_2_1': transformer_metric # our iTabPFN - trained 12h
          , 'tab_transformer': transformer_metric # original TabPFN - trained 12h
          , 'logistic': logistic_metric
          , 'autosklearn': autosklearn_metric
          , 'autosklearn2': autosklearn2_metric
          , 'lgbm': lightgbm_metric 
          , 'catboost': catboost_metric # not used
          , 'autogluon': autogluon_metric # not used
          }

In [7]:
device = 'cpu'

def eval_method(task_type, method, dids, selector, eval_positions, max_time, metric_used, split_number, append_metric=True, fetch_only=False, verbose=False):
    
    dids = dids if type(dids) is list else [dids]
    
    for did in dids:

        ds = get_datasets(selector, task_type, suite=suite)

        ds = ds if did is None else ds[did:did+1]

        clf = clf_dict[method]

        time_string = '_time_'+str(max_time) if max_time else ''
        metric_used_string = '_'+tabular_baselines.get_scoring_string(metric_used, usage='') if append_metric else ''

        result = evaluate(datasets=ds
                          , model=clf
                          , method=method+time_string+metric_used_string
                          , bptt=bptt, base_path=base_path
                          , eval_positions=eval_positions
                          , device=device, max_splits=1
                          , overwrite=overwrite
                          , save=True
                          , metric_used=metric_used
                          , path_interfix=task_type
                          , fetch_only=fetch_only
                          , split_number=split_number
                          , verbose=verbose
                          , max_time=max_time)
    
    return result

# Baseline Evaluation
This section runs baselines and saves results locally.

In [12]:
!mkdir {base_path}/results
!mkdir {base_path}/results/tabular/
!mkdir {base_path}/results/tabular/multiclass/

mkdir: {base_path}: No such file or directory
mkdir: {base_path}/results: No such file or directory
mkdir: {base_path}/results/tabular: No such file or directory


In [17]:
# RUN ONE METHOD ON ONE DATASET AND SPLIT
overwrite=True
dataset_id = 0
split_number = 1
maximum_runtime = 1
r = eval_method(task_type, 'knn', dataset_id, 'test', eval_positions, maximum_runtime, metric_used, split_number)

dids
[0]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 1/1000 [00:00<01:06, 15.01trial/s, best loss: -0.7700197956577267]

Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:04<?, ?it/s]


saved results to ./results/tabular/multiclass/results_knn_time_1_roc_auc_balance-scale_312_624_1.npy





In [18]:
# RUN ALL METHODS, SPLITS AND DATASETS
test_datasets = get_datasets('test',task_type, suite=suite)

methods = [
           #'tab_transformer',
           #'i_transformer_2_1',
           #'logistic',
           'knn',
           #'gp',
           #'lgbm',
           #'xgb',
           #'autosklearn',
           #'autosklearn2',
           ]

# max_times = [0.5, 1, 5, 30, 60, 60*5, 60*30]
max_times = [0.5]

overwrite=True
jobs = [
    eval_method(task_type, m, did, selector, eval_positions, max_time, metric_used, split_number)
    for did in range(0, len(test_datasets))
    for selector in ['test']
    for m in methods
    for max_time in max_times
    for split_number in [1, 2, 3, 4, 5]
]

dids
[0]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]

  1%|          | 8/1000 [00:00<00:53, 18.49trial/s, best loss: -0.8888663260962112]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]


saved results to ./results/tabular/multiclass/results_knn_time_0.5_roc_auc_balance-scale_312_624_1.npy
dids
[0]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]

  1%|          | 10/1000 [00:00<00:47, 20.85trial/s, best loss: -0.8783904107640668]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]


saved results to ./results/tabular/multiclass/results_knn_time_0.5_roc_auc_balance-scale_312_624_2.npy
dids
[0]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]

  1%|          | 9/1000 [00:00<00:50, 19.43trial/s, best loss: -0.8517916545260296]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]


saved results to ./results/tabular/multiclass/results_knn_time_0.5_roc_auc_balance-scale_312_624_3.npy
dids
[0]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]

  1%|          | 10/1000 [00:00<00:45, 21.78trial/s, best loss: -0.9096907498631637]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]


saved results to ./results/tabular/multiclass/results_knn_time_0.5_roc_auc_balance-scale_312_624_4.npy
dids
[0]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]

  1%|          | 10/1000 [00:00<00:46, 21.23trial/s, best loss: -0.8401243078075515]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 312:   0%|          | 0/1 [00:00<?, ?it/s]


saved results to ./results/tabular/multiclass/results_knn_time_0.5_roc_auc_balance-scale_312_624_5.npy
dids
[1]


Calculating splits<function knn_metric at 0x7fd424f5cf80> 1000:   0%|          | 0/1 [00:00<?, ?it/s]


KeyboardInterrupt: 