In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
import pprint
import sys
from pathlib import Path
from collections import OrderedDict

import numpy as np
from scipy.stats import wilcoxon

REPO_DIR = os.path.abspath('..')  # path to the root of the repository
sys.path.append(REPO_DIR)
os.environ["PROJECT_DIR"] = REPO_DIR
import lib
import lib.env as env

In [3]:
CALIFORNIA = 'california_housing'  
ADULT = 'adult' 
HELENA = 'helena'
JANNIS = 'jannis'
HIGGS = 'higgs_small'
ALOI = 'aloi' 
EPSILON = 'epsilon' 
YEAR = 'year'
COVTYPE = 'covtype'
YAHOO = 'yahoo'
MICROSOFT = 'microsoft'  
SMALL_DATASETS = [CALIFORNIA, ADULT, HELENA, JANNIS, HIGGS]
LARGE_DATASETS = [ALOI, EPSILON, YEAR, COVTYPE, YAHOO, MICROSOFT]
ALL_DATASETS = SMALL_DATASETS + LARGE_DATASETS

ABLATION_DATASETS = [CALIFORNIA, HELENA, JANNIS, HIGGS, ALOI, YEAR, COVTYPE, MICROSOFT]

In [6]:
baseline_nn = set([
    'snn/tuned',
    'tabnet/tuned',
    'grownet/tuned',
    'dcn2/tuned',
    'autoint/tuned',
    'node/tuned',
    'mlp/tuned',
    'resnet/tuned',
])

gbdt = set([
    'catboost/default',
    'xgboost/default',
    'catboost/tuned',
    'xgboost/tuned',
])

In [7]:
eval_baseline_nn = {
    CALIFORNIA: baseline_nn,
    ADULT: (baseline_nn),
    HELENA: (baseline_nn - {'node/tuned', 'grownet/tuned'}) | {'node/default'},
    JANNIS: baseline_nn - {'grownet/tuned'},
    HIGGS: baseline_nn,
    ALOI: (baseline_nn - {'node/tuned', 'grownet/tuned'}) | {'node/default'},
    EPSILON: (baseline_nn - {'autoint/tuned'}) | {'autoint/tuned'},
    YEAR: baseline_nn,
    COVTYPE: baseline_nn - {'grownet/tuned'},
    YAHOO: (baseline_nn - {'autoint/tuned'}) | {'autoint/tuned'},
    MICROSOFT: baseline_nn,
}

eval_baseline_nn_tr = {
    CALIFORNIA: baseline_nn | {'ft_transformer/tuned', 'ft_transformer/default'},
    ADULT: (baseline_nn) | {'ft_transformer/tuned', 'ft_transformer/default'},
    HELENA: (baseline_nn - {'node/tuned', 'grownet/tuned'}) | {'ft_transformer/tuned', 'ft_transformer/default'},
    JANNIS: (baseline_nn - {'grownet/tuned'}) | {'ft_transformer/tuned', 'ft_transformer/default'}, 
    HIGGS: baseline_nn | {'ft_transformer/tuned', 'ft_transformer/default'},
    ALOI: (baseline_nn - {'node/tuned', 'grownet/tuned'}) | {'ft_transformer/tuned', 'ft_transformer/default'},
    EPSILON: (baseline_nn - {'autoint/tuned'}) | {'autoint/tuned', 'ft_transformer/tuned', 'ft_transformer/default'},
    YEAR: baseline_nn | {'ft_transformer/tuned', 'ft_transformer/default'},
    COVTYPE: (baseline_nn - {'grownet/tuned'}) | {'ft_transformer/tuned', 'ft_transformer/default'},
    YAHOO: (baseline_nn - {'autoint/tuned'}) | {'autoint/tuned', 'ft_transformer/tuned', 'ft_transformer/default'},
    MICROSOFT: baseline_nn | {'ft_transformer/tuned', 'ft_transformer/default'},
}

eval_all = {
    CALIFORNIA: baseline_nn | gbdt | {'ft_transformer/tuned', 'ft_transformer/default'},
    ADULT: (baseline_nn) | {'ft_transformer/tuned', 'ft_transformer/default'} | gbdt,
    HELENA: (baseline_nn - {'node/tuned', 'grownet/tuned'}) | {'node/default'} | gbdt | {'ft_transformer/tuned', 'ft_transformer/default'},
    JANNIS: (baseline_nn - {'grownet/tuned'}) | gbdt | {'ft_transformer/tuned', 'ft_transformer/default'}, 
    HIGGS: baseline_nn | gbdt | {'ft_transformer/tuned', 'ft_transformer/default'},
    ALOI: (baseline_nn - {'node/tuned', 'grownet/tuned'}) | (gbdt - {'catboost/tuned', 'xgboost/tuned'}) | {'ft_transformer/tuned', 'ft_transformer/default'},
    EPSILON: (baseline_nn - {'autoint/tuned'}) | gbdt | {'autoint/tuned', 'ft_transformer/tuned', 'ft_transformer/default'},
    YEAR: baseline_nn | gbdt | {'ft_transformer/tuned', 'ft_transformer/default'},
    COVTYPE: (baseline_nn - {'grownet/tuned'}) | gbdt | {'ft_transformer/tuned', 'ft_transformer/default'},
    YAHOO: (baseline_nn - {'autoint/tuned'}) | gbdt | {'autoint/tuned', 'ft_transformer/tuned', 'ft_transformer/default'},
    MICROSOFT: baseline_nn | gbdt | {'ft_transformer/tuned', 'ft_transformer/default'},
}

eval_ablation = {
    CALIFORNIA: {'autoint/tuned', 'ft_transformer/tuned_nobias', 'ft_transformer/tuned'},
    HELENA: {'autoint/tuned', 'ft_transformer/tuned_nobias', 'ft_transformer/tuned'},
    JANNIS: {'autoint/tuned', 'ft_transformer/tuned_nobias', 'ft_transformer/tuned'},
    HIGGS: {'autoint/tuned', 'ft_transformer/tuned_nobias', 'ft_transformer/tuned'},
    ALOI: {'autoint/tuned', 'ft_transformer/tuned_nobias', 'ft_transformer/tuned'},
    YEAR: {'autoint/tuned', 'ft_transformer/tuned_nobias', 'ft_transformer/tuned'},
    COVTYPE: {'autoint/tuned', 'ft_transformer/tuned_nobias', 'ft_transformer/tuned'},
    MICROSOFT: {'autoint/tuned', 'ft_transformer/tuned_nobias', 'ft_transformer/tuned'},
}

In [8]:
alpha = 0.01

In [9]:
def eval_algos(ds_algos, ensemble=False):
    out = {}
    for ds in ds_algos:
        print(f"\n{ds}\n")

        if ensemble:
            algos_score = OrderedDict([
                (alg, [lib.load_json(env.OUTPUT_DIR/ds/f"{alg}_ensemble/{i}_{i+4}/stats.json")["metrics"]["test"]["score"] for i in range(0,15,5)])
                for alg in ds_algos[ds]
            ])
        else:
            algos_score = OrderedDict([
                (alg, [lib.load_json(env.OUTPUT_DIR/ds/f"{alg}/{i}/stats.json")["metrics"]["test"]["score"] for i in range(0,15)])
                for alg in ds_algos[ds]
            ])

        algos_mean_score = OrderedDict([
            (alg, np.mean(score)) for alg, score in algos_score.items()
        ])

        best_name = sorted(list(algos_mean_score.items()), key=lambda x: x[1], reverse=True)[0][0]
        pprint.pprint(sorted(list(algos_mean_score.items()), key=lambda x: x[1], reverse=True))

        print("\nbest algos:")
        best_algos = [best_name]
        if ensemble:
            out[ds] = best_algos
            continue

        print(best_name)

        for name, scores in list(algos_score.items()):
            if name == best_name: continue

            s, p = wilcoxon(
                algos_score[best_name], scores, 
                alternative='greater', 
                zero_method='zsplit', 
                mode='auto'
            )

            if p > alpha:
                best_algos.append(name)
                print(name)
                
        out[ds] = best_algos
    return out

In [10]:
baseline_nn_best_algos = eval_algos(eval_baseline_nn)
baseline_nn_tr_best_algos = eval_algos(eval_baseline_nn_tr)
all_best_algos = eval_algos(eval_all)


california_housing

[('node/tuned', -0.46418097891832194),
 ('autoint/tuned', -0.4787448456249333),
 ('dcn2/tuned', -0.48554570407429154),
 ('resnet/tuned', -0.4866866196712799),
 ('mlp/tuned', -0.4944019630749705),
 ('grownet/tuned', -0.4997131082124803),
 ('snn/tuned', -0.5067156099585352),
 ('tabnet/tuned', -0.5128057526591715)]

best algos:
node/tuned

adult

[('snn/tuned', 0.8163252030684363),
 ('resnet/tuned', 0.8162524900674354),
 ('autoint/tuned', 0.8011928444970526),
 ('mlp/tuned', 0.7959205758660582),
 ('tabnet/tuned', 0.7958362057843792),
 ('node/tuned', 0.7935806345675621),
 ('grownet/tuned', 0.7934302596068459),
 ('dcn2/tuned', 0.7840384936668073)]

best algos:
snn/tuned
resnet/tuned

helena

[('resnet/tuned', 0.3959969325153375),
 ('dcn2/tuned', 0.38527096114519427),
 ('mlp/tuned', 0.383241308793456),
 ('tabnet/tuned', 0.3781697341513292),
 ('snn/tuned', 0.3727709611451942),
 ('autoint/tuned', 0.372239263803681),
 ('node/default', 0.3592740286298569)]

best algos:
resnet

In [11]:
baseline_nn_best_algos_ens = eval_algos(eval_baseline_nn, ensemble=True)
baseline_nn_tr_best_algos_ens = eval_algos(eval_baseline_nn_tr, ensemble=True)
all_best_algos_ens = eval_algos(eval_all, ensemble=True)


california_housing

[('autoint/tuned', -0.46132644858747446),
 ('node/tuned', -0.461444634464033),
 ('dcn2/tuned', -0.47757468992898516),
 ('resnet/tuned', -0.4781371106445265),
 ('grownet/tuned', -0.4826446964775039),
 ('snn/tuned', -0.48457810878212354),
 ('mlp/tuned', -0.4885223695605175),
 ('tabnet/tuned', -0.4915098951807906)]

best algos:

adult

[('snn/tuned', 0.8218651600909231),
 ('resnet/tuned', 0.8175798185928241),
 ('tabnet/tuned', 0.8092007612753244),
 ('mlp/tuned', 0.8055258535230835),
 ('autoint/tuned', 0.8045825709184379),
 ('node/tuned', 0.7966370524543539),
 ('grownet/tuned', 0.7941248104286857),
 ('dcn2/tuned', 0.7860548068886969)]

best algos:

helena

[('resnet/tuned', 0.3981083844580777),
 ('tabnet/tuned', 0.39084867075664625),
 ('mlp/tuned', 0.3902351738241309),
 ('dcn2/tuned', 0.3883691206543967),
 ('autoint/tuned', 0.382157464212679),
 ('snn/tuned', 0.3803936605316973),
 ('node/default', 0.3609406952965235)]

best algos:

jannis

[('tabnet/tuned', 0.7338229732

In [12]:
ablation_best_algos = eval_algos(eval_ablation)


california_housing

[('ft_transformer/tuned', -0.46386973448925317),
 ('ft_transformer/tuned_nobias', -0.4720844349853418),
 ('autoint/tuned', -0.4787448456249333)]

best algos:
ft_transformer/tuned

helena

[('ft_transformer/tuned', 0.3913292433537832),
 ('ft_transformer/tuned_nobias', 0.3811145194274028),
 ('autoint/tuned', 0.372239263803681)]

best algos:
ft_transformer/tuned

jannis

[('ft_transformer/tuned', 0.7305228797197507),
 ('ft_transformer/tuned_nobias', 0.724432236619494),
 ('autoint/tuned', 0.7162397245277761)]

best algos:
ft_transformer/tuned

higgs_small

[('ft_transformer/tuned', 0.7277718320076882),
 ('autoint/tuned', 0.7256000301245393),
 ('ft_transformer/tuned_nobias', 0.7234556220921753)]

best algos:
ft_transformer/tuned
autoint/tuned

aloi

[('ft_transformer/tuned', 0.9604691358024692),
 ('ft_transformer/tuned_nobias', 0.9579166666666665),
 ('autoint/tuned', 0.9452314814814814)]

best algos:
ft_transformer/tuned

year

[('ft_transformer/tuned', -8.8196175278882

Ablation table (table 4)

In [31]:
for alg, name in zip(
    ['autoint/tuned', 'ft_transformer/tuned_nobias', 'ft_transformer/tuned'],
    ['AutoInt', 'Transformer', '\\architecture'],
):
    print(f"{name:<15}", end=" & ")
    for ds in ABLATION_DATASETS:
        score = abs(np.mean([lib.load_json(env.OUTPUT_DIR/ds/f"{alg}/{i}/stats.json")["metrics"]["test"]["score"] for i in range(15)]))
        if alg in ablation_best_algos[ds]:
            out_str = "$\\mathbf{" f"{score:.3f}" "}$"
        else:
            out_str = f"{score:.3f}"

        print(out_str, end=" & " if ds != ABLATION_DATASETS[-1] else " \\\\\n")

AutoInt         & 0.479 & 0.372 & 0.716 & $\mathbf{0.726}$ & 0.945 & 8.875 & 0.931 & 0.752 \\
Transformer     & 0.472 & 0.381 & 0.724 & 0.723 & 0.958 & $\mathbf{8.821}$ & 0.962 & 0.751 \\
\architecture   & $\mathbf{0.464}$ & $\mathbf{0.391}$ & $\mathbf{0.731}$ & $\mathbf{0.728}$ & $\mathbf{0.960}$ & $\mathbf{8.820}$ & $\mathbf{0.964}$ & $\mathbf{0.747}$ \\


In [14]:
precisions = [3, 3, 4, 3, 3, 3, 4, 3, 4, 3, 4]

Single models table (table 1)

In [32]:
print("""\midrule
\multicolumn{12}{c}{Baseline Neural Networks}\\\\
\midrule""")

for alg, name in zip([
    'snn/tuned',
    'tabnet/tuned',
    'grownet/tuned',
    'dcn2/tuned',
    'autoint/tuned',
    'mlp/tuned',
    'node/tuned',
    'resnet/tuned',
    'ft_transformer/default',
    'ft_transformer/tuned',
    'catboost/default',
    'catboost/tuned',
    'xgboost/default',
    'xgboost/tuned',
], [
    'SNN',
    'TabNet',
    'GrowNet',
    'DCN2',
    'AutoInt',
    'MLP',
    'NODE',
    'ResNet',
    '\\architecture\\textsubscript{d}',
    '\\architecture',
    'CatBoost\\textsubscript{d}',
    'CatBoost',
    'XGBoost\\textsubscript{d}',
    'XGBoost',
]):

    if alg == "ft_transformer/default": print("""\midrule
\multicolumn{12}{c}{\\architecture}\\\\
\midrule""")

    if alg == "catboost/default": print("""\midrule
\multicolumn{12}{c}{GBDT}\\\\
\midrule""")

    print(f"{name:<40}", end=" & ")
    old_alg = alg
    for ds, pr in zip(ALL_DATASETS[:1] + [ADULT] + ALL_DATASETS[1:], precisions):
        if ds in [HELENA, ALOI] and alg in ["node/tuned"]: old_alg = alg; alg = "node/default"
        if ds in [JANNIS, ALOI, HELENA, COVTYPE] and alg in ["grownet/tuned"]: print("--", end=" &  "); continue
        if ds in [YAHOO, EPSILON] and alg in ["autoint/tuned"]: old_alg = alg; alg = "autoint/tuned"
        if ds in [YAHOO, EPSILON] and alg in ["ft_transformer/tuned"]: old_alg = alg; alg = "ft_transformer/tuned"
        if ds in [ALOI] and alg in ["catboost/tuned", "xgboost/tuned"]: print("--", end=" &  "); continue

        score = abs(np.mean([lib.load_json(env.OUTPUT_DIR/ds/f"{alg}/{i}/stats.json")["metrics"]["test"]["score"] for i in range(15)]))
        if alg in all_best_algos[ds]:
            out_str = "$\\mathbf{\\textcolor{red}{" f"{score:.{pr}f}" "}}$"
        elif alg in baseline_nn_tr_best_algos[ds]:
            out_str = "$\\mathbf{\\textcolor{blue}{" f"{score:.{pr}f}" "}}$"
        elif alg in baseline_nn_best_algos[ds]:
            out_str = "$\\mathbf{" f"{score:.{pr}f}" "}$"
        else:
            out_str = f"${score:.{pr}f}$"

        print(f"{out_str:>5}", end=" ")
        if ds == ALL_DATASETS[-1]:
            print("\\\\")
        else:
            print("", end = "& ")
        alg = old_alg

\midrule
\multicolumn{12}{c}{Baseline Neural Networks}\\
\midrule
SNN                                      & $0.507$ & $\mathbf{\textcolor{red}{0.816}}$ & $0.3728$ & $0.718$ & $0.721$ & $0.954$ & $0.8970$ & $8.881$ & $0.9465$ & $0.769$ & $0.7521$ \\
TabNet                                   & $0.513$ & $0.796$ & $0.3782$ & $0.724$ & $0.717$ & $0.954$ & $0.8902$ & $9.032$ & $0.9335$ & $0.819$ & $0.7565$ \\
GrowNet                                  & $0.500$ & $0.793$ & -- &  -- &  $0.724$ & -- &  $\mathbf{0.8977}$ & $8.866$ & -- &  $0.775$ & $0.7549$ \\
DCN2                                     & $0.486$ & $0.784$ & $0.3853$ & $0.714$ & $0.720$ & $0.955$ & $\mathbf{0.8975}$ & $8.939$ & $0.9491$ & $0.766$ & $0.7500$ \\
AutoInt                                  & $0.479$ & $0.801$ & $0.3722$ & $0.716$ & $\mathbf{\textcolor{red}{0.726}}$ & $0.945$ & $0.8948$ & $8.875$ & $0.9312$ & $0.795$ & $0.7517$ \\
MLP                                      & $0.494$ & $0.796$ & $0.3832$ & $0.719$ & $0.721$ 

Ensembles table (table 2)

In [34]:
print("""\midrule
\multicolumn{12}{c}{Baseline Neural Networks}\\\\
\midrule""")

for alg, name in zip([
    'snn/tuned',
    'tabnet/tuned',
    'grownet/tuned',
    'dcn2/tuned',
    'autoint/tuned',
    'mlp/tuned',
    'node/tuned',
    'resnet/tuned',
    'ft_transformer/default',
    'ft_transformer/tuned',
    'catboost/default',
    'catboost/tuned',
    'xgboost/default',
    'xgboost/tuned',
], [
    'SNN',
    'TabNet',
    'GrowNet',
    'DCN2',
    'AutoInt',
    'MLP',
    'NODE',
    'ResNet',
    '\\architecture\\textsubscript{d}',
    '\\architecture',
    'CatBoost\\textsubscript{d}',
    'CatBoost',
    'XGBoost\\textsubscript{d}',
    'XGBoost',
]):
    if alg == "ft_transformer/default": print("""\midrule
\multicolumn{12}{c}{\\architecture}\\\\
\midrule""")

    if alg == "catboost/default": print("""\midrule
\multicolumn{12}{c}{GBDT}\\\\
\midrule""")

    print(f"{name:<30}", end=" & ")
    old_alg = alg
    for ds, pr in zip(ALL_DATASETS[:1] + [ADULT] + ALL_DATASETS[1:], precisions):
        if ds in [HELENA, ALOI]   and alg in ["node/tuned"]: old_alg = alg; alg = "node/default"
        if ds in [YAHOO, EPSILON] and alg in ["ft_transformer/tuned"]: old_alg = alg; alg = "ft_transformer/tuned"
        if ds in [YAHOO, EPSILON] and alg in ["autoint/tuned"]: old_alg = alg; alg = "autoint/tuned"
        if ds in [JANNIS, ALOI, HELENA, COVTYPE] and alg in ["grownet/tuned"]: print("--", end=" &  "); continue
        if ds in [ALOI] and alg in ["catboost/tuned", "xgboost/tuned"]: print("--", end=" &  "); continue

        score = abs(
            np.mean([lib.load_json(env.OUTPUT_DIR/ds/f"{alg}_ensemble/{i}_{i+4}/stats.json")["metrics"]["test"]["score"] for i in range(0,15,5)]))

        if alg in all_best_algos_ens[ds]:
            out_str = "$\\mathbf{\\textcolor{red}{" f"{score:.{pr}f}" "}}$"
        elif alg in baseline_nn_tr_best_algos_ens[ds]:
            out_str = "$\\mathbf{\\textcolor{blue}{" f"{score:.{pr}f}" "}}$"
        elif alg in baseline_nn_best_algos_ens[ds]:
            out_str = "$\\mathbf{" f"{score:.{pr}f}" "}$"
        else:
            out_str = f"${score:.{pr}f}$"

        print(f"{out_str}", end=" ")
        if ds == ALL_DATASETS[-1]:
            print("\\\\", end="")
        else:
            print("", end = "& ")
        alg = old_alg

    print("")

\midrule
\multicolumn{12}{c}{Baseline Neural Networks}\\
\midrule
SNN                            & $0.485$ & $\mathbf{\textcolor{red}{0.822}}$ & $0.3804$ & $0.722$ & $0.725$ & $0.962$ & $0.8971$ & $8.747$ & $0.9542$ & $0.762$ & $0.7487$ \\
TabNet                         & $0.492$ & $0.809$ & $0.3908$ & $\mathbf{0.734}$ & $0.724$ & $0.961$ & $0.8952$ & $8.773$ & $0.9497$ & $0.814$ & $0.7505$ \\
GrowNet                        & $0.483$ & $0.794$ & -- &  -- &  $0.731$ & -- &  $\mathbf{\textcolor{red}{0.8986}}$ & $8.718$ & -- &  $0.766$ & $0.7504$ \\
DCN2                           & $0.478$ & $0.786$ & $0.3884$ & $0.721$ & $0.721$ & $0.960$ & $0.8977$ & $8.764$ & $0.9575$ & $0.762$ & $0.7488$ \\
AutoInt                        & $\mathbf{0.461}$ & $0.805$ & $0.3822$ & $0.728$ & $\mathbf{\textcolor{red}{0.732}}$ & $0.959$ & $0.8967$ & $8.730$ & $0.9519$ & $0.782$ & $0.7477$ \\
MLP                            & $0.489$ & $0.806$ & $0.3902$ & $0.722$ & $0.726$ & $0.960$ & $0.8970$ & $8.716$ & $

Dataset description

In [None]:
import inspect

x,y,z = 1,2,3

def retrieve_name(var):
    callers_local_vars = inspect.currentframe().f_back.f_locals.items()
    return [var_name for var_name, var_val in callers_local_vars if var_val is var]


for ds in ALL_DATASETS:
    D = lib.Dataset.from_dir(env.DATA_DIR/"tabular"/ds)
    # print(D.info)
    print(
        " ".join(s.capitalize() for s in D.info["basename"].split('_')[:2]), 
        retrieve_name(ds)[0][:2], 
        f"${D.info['train_size']}$",
        f"${D.info['val_size']}$",
        f"${D.info['test_size']}$",
        f"${D.info['n_num_features']}$",
        f"${D.info['n_cat_features']}$",
        D.info['task_type'].capitalize(),
        lib.load_toml(env.OUTPUT_DIR/f"{ds}/resnet/tuned/0.toml")["training"]["batch_size"],
        sep=" & ", end=" "
    )
    print("\\\\")