In [1]:
import json
import os
import sys
from copy import deepcopy
from pathlib import Path

import pandas as pd

REPO_DIR = os.path.abspath('..')  # path to the root of the repository
sys.path.append(REPO_DIR)
os.environ["PROJECT_DIR"] = REPO_DIR
import lib

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [2]:
CALIFORNIA = 'california_housing'
ADULT = 'adult'
HELENA = 'helena'
JANNIS = 'jannis'
HIGGS = 'higgs_small'
ALOI = 'aloi'
EPSILON = 'epsilon'
YEAR = 'year'
COVTYPE = 'covtype'
YAHOO = 'yahoo'
MICROSOFT = 'microsoft'
ALL_DATASETS = [CALIFORNIA, ADULT, HELENA, JANNIS, HIGGS, ALOI, EPSILON, YEAR, COVTYPE, YAHOO, MICROSOFT]
DATASET_NAMES = {
    CALIFORNIA: 'California Housing',
    ADULT: 'Adult',
    HELENA: 'Helena',
    JANNIS: 'Jannis',
    HIGGS: 'Higgs Small',
    ALOI: 'ALOI',
    EPSILON: 'Epsilon',
    YEAR: 'Year',
    COVTYPE: 'Covertype',
    YAHOO: 'Yahoo',
    MICROSOFT: 'Microsoft',
}
REGRESSION_DATASETS = [x for x in ALL_DATASETS if lib.load_dataset_info(x)['task_type'] == lib.REGRESSION]
DETAILS = ['task_type', 'n_objects', 'n_features']
PARTS = ['test', 'val', 'train']


def format_scores(df, precision):
    def f(record):
        if record['task_type'] == lib.REGRESSION:
            for part in PARTS:
                for suffix in 'best', 'score':
                    key = f'{part}_{suffix}'
                    if key in record:
                        record[key] *= -1
        for k, v in list(record.items()):
            if isinstance(v, float):
                record[k] = round(v, precision)
        return record
    return df.apply(f, axis=1)


def load_record(output):
    output = Path(output)
    if not output.exists():
        return None
    path = output / 'stats.json'
    if not path.exists():
        print(f'WARNING! This path does not exist: {path}')
        return None
    stats = lib.load_json(path)
    metrics = stats.get('metrics')
    if metrics is None:
        return None

    dataset = Path(stats['dataset']).name
    info = lib.load_dataset_info(dataset)
    dataset, algorithm, experiment, suffix = str(output.relative_to(lib.env.OUTPUT_DIR)).split('/', 4)
    r = {
        'dataset': DATASET_NAMES[dataset],
        'task_type': info['task_type'],
        'n_objects': info['size'],
        'n_features': info['n_num_features'] + info['n_cat_features'],
        'algorithm': algorithm + f' | {experiment}',
        's': suffix
    }
    for x in PARTS:
        if x in stats['metrics']:
            r[f'{x}_score'] = stats['metrics'][x]['score']
    return r


def sort(df, by):
    if isinstance(by, str):
        by = [by]
    return df.sort_values(['n_objects'] + by, ascending=[True] + ['score' not in x for x in by]).reset_index(drop=True)


def make_df(outputs_and_names):
    df = []
    for output, algorithm_name in outputs_and_names:
        record = load_record(output)
        if not record:
            continue
        if algorithm_name is not None:
            record['algorithm'] = algorithm_name
        df.append(record)
    df = sort(pd.DataFrame(df).fillna(0.0), 'val_score').reset_index(drop=True)
    return df


def collect_outputs(experiment_dir, filter_info=None):
    if isinstance(filter_info, int):
        filter_info = [str(x) for x in range(filter_info)]
    if isinstance(filter_info, list):
        assert all(isinstance(x, str) for x in filter_info)
        filter_fn = lambda x: x.name in filter_info
    elif callable(filter_info):
        filter_fn = filter_info
    else:
        assert filter_info is None
        filter_fn = lambda x: True

    outputs = []
    if not isinstance(experiment_dir, Path):
        experiment_dir = lib.env.OUTPUT_DIR / experiment_dir
    if experiment_dir.exists():
        outputs.extend(
            filter(
                filter_fn,
                filter(Path.is_dir, experiment_dir.iterdir())
            )
        )
    return outputs


def aggregate(df):
    aggrs = dict(
        task_type=('task_type', 'first'),
        n_objects=('n_objects', 'first'),
        n_features=('n_features', 'first'),
        test_score=('test_score', 'mean'),
        test_std=('test_score', 'std'),
        val_score=('val_score', 'mean'),
        val_std=('val_score', 'std'),
        count=('test_score', 'count')
    )
    if 'train_score' in df.columns:
        aggrs.update(dict(
            train_score=('train_score', 'mean'),
            train_std=('train_score', 'std'),
        ))
    df = df.groupby(['dataset', 'algorithm']).agg(**aggrs)
    df['count'] = df['count'].astype(int)
    return df.reset_index().fillna(0.0)


def build_report(outputs_and_names):
    df = make_df(outputs_and_names)
    df = aggregate(df)
    df = sort(df, 'test_score')
    df = format_scores(df, 4)
    # df = df.set_index(['dataset', 'algorithm']).drop(columns=DETAILS)
    df = df.set_index(['dataset'] + DETAILS + ['algorithm'])
    return df[['test_score', 'test_std', 'val_score', 'val_std', 'train_score', 'train_std', 'count']]

## Default configurations (GBDT and FT-Transformer)

In [3]:
all_datasets = set(deepcopy(ALL_DATASETS))
n_seeds = 15
ensemble_names = ['0_4', '5_9', '10_14']
outputs_and_names = []
for experiment, algorithm_name, datasets in [
    ('ft_transformer/default', 'FT-Transformer', all_datasets),
    ('catboost/default', 'CatBoost', all_datasets),
    ('xgboost/default', 'XGBoost', all_datasets),
]:
    for dataset in datasets:
        for output in collect_outputs(dataset + '/' + experiment, n_seeds):
            outputs_and_names.append((output, algorithm_name))
        for output in collect_outputs(dataset + '/' + experiment + '_ensemble', ensemble_names):
            outputs_and_names.append((output, '(e) ' + algorithm_name))
build_report(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_score,test_std,val_score,val_std,train_score,train_std,count
dataset,task_type,n_objects,n_features,algorithm,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
California Housing,regression,20640,8,(e) CatBoost,0.4281,0.0001,0.4447,0.0006,0.29,0.0008,3
California Housing,regression,20640,8,CatBoost,0.4303,0.0008,0.4468,0.0013,0.2918,0.0007,15
California Housing,regression,20640,8,(e) FT-Transformer,0.4543,0.0014,0.4615,0.0019,0.3572,0.0056,3
California Housing,regression,20640,8,XGBoost,0.4622,0.0,0.4851,0.0,0.2391,0.0,15
California Housing,regression,20640,8,(e) XGBoost,0.4622,0.0,0.4851,0.0,0.2391,0.0,3
California Housing,regression,20640,8,FT-Transformer,0.4688,0.0039,0.4761,0.0044,0.3741,0.0241,15
Adult,binclass,48842,14,XGBoost,0.8741,0.0,0.8729,0.0,0.8942,0.0,15
Adult,binclass,48842,14,(e) XGBoost,0.8741,0.0,0.8729,0.0,0.8942,0.0,3
Adult,binclass,48842,14,(e) CatBoost,0.8735,0.0005,0.8749,0.001,0.8939,0.0029,3
Adult,binclass,48842,14,CatBoost,0.8727,0.001,0.8749,0.0007,0.8935,0.0038,15


## All Neural Networks

In [4]:
all_datasets = set(deepcopy(ALL_DATASETS))
n_seeds = 15
outputs_and_names = []
for experiment, algorithm_name, datasets in [
    # !!! TUTORIAL !!! Uncomment the following line for completing the tutorial:
    # ('mlp/tuned_reproduced', 'MLP | reproduced', [CALIFORNIA]),
    ('mlp/tuned', 'MLP', all_datasets),
    ('resnet/tuned', 'ResNet', all_datasets),
    ('snn/tuned', 'SNN', all_datasets),
    ('dcn2/tuned', 'DCN V2', all_datasets),
    ('tabnet/tuned', 'TabNet', all_datasets),
    ('grownet/tuned', 'GrowNet', all_datasets - {HELENA, JANNIS, ALOI, COVTYPE}),  # GrowNet does not support multiclass problems
    ('node/tuned', 'NODE', all_datasets - {HELENA, ALOI}),
    ('node/default', 'NODE', {HELENA, ALOI}),
    ('autoint/tuned', 'AutoInt', all_datasets),
    ('ft_transformer/tuned', 'FT-Transformer', all_datasets - {YAHOO}),
    ('ft_transformer/default', 'FT-Transformer | default', {YAHOO}),
]:
    for dataset in datasets:
        for output in collect_outputs(dataset + '/' + experiment, n_seeds):
            outputs_and_names.append((output, algorithm_name))
build_report(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_score,test_std,val_score,val_std,train_score,train_std,count
dataset,task_type,n_objects,n_features,algorithm,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
California Housing,regression,20640,8,FT-Transformer,0.4591,0.0036,0.4639,0.0038,0.3697,0.0111,15
California Housing,regression,20640,8,NODE,0.4636,0.0016,0.4757,0.0015,-0.0,0.0,15
California Housing,regression,20640,8,AutoInt,0.4739,0.0034,0.4827,0.0053,0.4017,0.0118,15
California Housing,regression,20640,8,DCN V2,0.4837,0.0025,0.4846,0.0024,0.4143,0.0088,15
California Housing,regression,20640,8,ResNet,0.4856,0.0031,0.4899,0.0021,0.4212,0.0109,15
California Housing,regression,20640,8,GrowNet,0.4869,0.0074,0.4963,0.0062,0.4274,0.0117,15
California Housing,regression,20640,8,SNN,0.4933,0.0048,0.4999,0.0034,0.4086,0.0126,15
California Housing,regression,20640,8,MLP,0.4985,0.0031,0.507,0.0037,0.392,0.0185,15
California Housing,regression,20640,8,TabNet,0.51,0.0079,0.5172,0.0078,0.4681,0.0089,15
Adult,binclass,48842,14,AutoInt,0.8589,0.0016,0.8593,0.0013,0.8661,0.001,15


## Main Neural Networks and GDBT

In [5]:
all_datasets = set(deepcopy(ALL_DATASETS))
n_seeds = 15
ensemble_names = ['0_4', '5_9', '10_14']
outputs_and_names = []
for experiment, algorithm_name, datasets in [
    ('mlp/tuned', 'MLP', all_datasets),
    ('resnet/tuned', 'ResNet', all_datasets),
    ('ft_transformer/tuned', 'FT-Transformer', all_datasets - {YAHOO}),
    ('ft_transformer/default', 'FT-Transformer | default', all_datasets),
    ('catboost/tuned', 'CatBoost', all_datasets),
    ('xgboost/tuned', 'XGBoost', all_datasets),
    ('lightgbm_/tuned', 'LightGBM', {CALIFORNIA, ADULT, HIGGS}),
]:
    for dataset in datasets:
        for output in collect_outputs(dataset + '/' + experiment, n_seeds):
            outputs_and_names.append((output, algorithm_name))
        for output in collect_outputs(dataset + '/' + experiment + '_ensemble', ensemble_names):
            outputs_and_names.append((output, '(e) ' + algorithm_name))
build_report(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_score,test_std,val_score,val_std,train_score,train_std,count
dataset,task_type,n_objects,n_features,algorithm,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
California Housing,regression,20640,8,(e) CatBoost,0.4234,0.0011,0.4371,0.0012,0.0748,0.0033,3
California Housing,regression,20640,8,(e) LightGBM,0.4268,0.0015,0.4418,0.0008,0.1343,0.0041,3
California Housing,regression,20640,8,CatBoost,0.4308,0.0016,0.4445,0.0023,0.0778,0.0149,15
California Housing,regression,20640,8,(e) XGBoost,0.4314,0.0004,0.4457,0.0005,0.1501,0.006,3
California Housing,regression,20640,8,LightGBM,0.4322,0.0036,0.4471,0.0036,0.1375,0.0213,15
California Housing,regression,20640,8,XGBoost,0.4334,0.0017,0.4478,0.0023,0.1512,0.0153,15
California Housing,regression,20640,8,(e) FT-Transformer,0.448,0.0009,0.4526,0.0016,0.3573,0.0085,3
California Housing,regression,20640,8,(e) FT-Transformer | default,0.4543,0.0014,0.4615,0.0019,0.3572,0.0056,3
California Housing,regression,20640,8,FT-Transformer,0.4591,0.0036,0.4639,0.0038,0.3697,0.0111,15
California Housing,regression,20640,8,FT-Transformer | default,0.4688,0.0039,0.4761,0.0044,0.3741,0.0241,15


## Ablation Study

In [6]:
all_datasets = {CALIFORNIA, HELENA, JANNIS, HIGGS, ALOI, YEAR, COVTYPE, MICROSOFT}
n_seeds = 15
outputs_and_names = []
for experiment, algorithm_name, datasets in [
    ('autoint/tuned', 'AutoInt', all_datasets),
    ('ft_transformer/tuned_nobias', 'FT-Transformer | nobias', all_datasets),
    ('ft_transformer/tuned', 'FT-Transformer', all_datasets),
]:
    for dataset in datasets:
        for output in collect_outputs(dataset + '/' + experiment, n_seeds):
            outputs_and_names.append((output, algorithm_name))
build_report(outputs_and_names)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,test_score,test_std,val_score,val_std,train_score,train_std,count
dataset,task_type,n_objects,n_features,algorithm,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
California Housing,regression,20640,8,FT-Transformer,0.4591,0.0036,0.4639,0.0038,0.3697,0.0111,15
California Housing,regression,20640,8,FT-Transformer | nobias,0.4701,0.006,0.4798,0.0046,0.4042,0.0157,15
California Housing,regression,20640,8,AutoInt,0.4739,0.0034,0.4827,0.0053,0.4017,0.0118,15
Helena,multiclass,65196,27,FT-Transformer,0.3913,0.0013,0.3883,0.0016,0.5162,0.0257,15
Helena,multiclass,65196,27,FT-Transformer | nobias,0.3811,0.0017,0.3783,0.0017,0.5147,0.0236,15
Helena,multiclass,65196,27,AutoInt,0.3722,0.0027,0.3674,0.0026,0.4488,0.016,15
Jannis,multiclass,83733,54,FT-Transformer,0.7323,0.0021,0.7373,0.0017,0.7761,0.0115,15
Jannis,multiclass,83733,54,FT-Transformer | nobias,0.7243,0.0041,0.7299,0.0042,0.751,0.0139,15
Jannis,multiclass,83733,54,AutoInt,0.7211,0.0024,0.7259,0.0019,0.7576,0.0091,15
Higgs Small,binclass,98050,28,FT-Transformer,0.729,0.0016,0.7388,0.0014,0.7566,0.0067,15
