In [1]:
import torch
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score, balanced_accuracy_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
import imblearn
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from collections import defaultdict
from tqdm import trange
import seaborn as sns
import os
import matplotlib.pyplot as plt
from io import BytesIO
import base64
import sklearn
import time
import datetime
import random

plt.style.use("seaborn")
torch.set_default_dtype(torch.float32)

In [2]:
from comet_ml import Experiment, Optimizer

In [3]:
import pyhopper

In [4]:
from loguru import logger

logger.add("log.txt", format='{time:YYYY-MM-DD HH:mm:ss.SSS} | {message}')

1

In [5]:
from hypertab_benchmark_utils import *

# GLOBALS

In [6]:
DEVICE="cpu"#"cuda:0"

In [7]:
GS_METRIC = "balanced_accuracy"

In [8]:
SEED = 42

In [9]:
TEST_RUN = False

os.environ["HYPERTAB_TEST_RUN"] = str(TEST_RUN)

In [10]:
TIME_BUDGET = "30m"

In [11]:
DATA = "Hill-Valley-with-noise"

# Load data

In [12]:
set_seed(SEED)

In [13]:
_X, _y = get_data(DATA)

train (606, 100) 2
test (606, 100) 2


In [14]:
n_classes = get_n_classes(_X, _y)
n_features = get_n_features(_X, _y)

n_classes 2
n_features 100


In [15]:
get_each_class_counts(_X, _y)

class counts {0: 606, 1: 606}


{0: 606, 1: 606}

# Split

In [16]:
X_train, X_test, y_train, y_test = initial_split(_X, _y)

X = X_train, X_test
y = y_train, y_test

In [17]:
n_classes = get_n_classes(X_train, y_train)
n_features = get_n_features(X_train, y_train)

n_classes 2
n_features 100


In [18]:
get_each_class_counts(X_train, y_train)

class counts {0: 307, 1: 299}


{0: 307, 1: 299}

In [19]:
eval_train_max_size, train_max_size = get_eval_and_benchmark_size(X_train=X_train)

eval_max_size 606
train_max_size 454


# TRAIN MODELS

## Common hyperparams

In [20]:
_mask_sizes = [int(n_features*0.1), int(n_features*0.25), int(n_features*0.5), int(n_features*0.7), int(n_features*0.9)]
if 2 not in _mask_sizes:
    _mask_sizes.insert(0, 2)
    
print('_mask_sizes', _mask_sizes)

class CommonHyperparams:
    lr = pyhopper.choice([3e-5, 3e-4, 3e-3, 3e-2, 3e-1])
    batch_size = pyhopper.int(32, 512, 32, 32)
    ht_mask_size = pyhopper.choice(_mask_sizes)
    ht_target_size = pyhopper.choice([5, 10, 20, 50])
    ht_mask_no = pyhopper.choice([5, 10, 50, 100, 150, 200, 300])
    ht_epochs = pyhopper.choice([100, 200])
    ht_first_hidden = pyhopper.choice([32, 64, 256, 512])
    

_mask_sizes [2, 10, 25, 50, 70, 90]


## NODE

In [21]:
set_seed(SEED)

node_fn=get_parametrized_node_fn(X_train=X_train, n_classes=n_classes, n_features=n_features, DEVICE=DEVICE)

#### Tune hyperparams

In [21]:
# 'layer_dim': hp.quniform('layer_dim', 100, 1200, 100),
# 'num_layers': hp.quniform('num_layers', 1, 4, 1),
# 'depth': hp.quniform('depth', 2, 7, 1)
                    
param_grid = {
    'layer_dim': pyhopper.int(64, 1024, power_of=2),
    'num_layers': pyhopper.int(1, 5),
    'depth': pyhopper.int(2, 7),
    'batch_size': pyhopper.choice([32, 64, 128]),
}

node_best, node_history = pyhopper_best_params(
    model_fn=node_fn,
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device=DEVICE,
    time="180m"
)

node_best

THIS IS TEST RUN
| DEVICE: cpu
| model_fn node_fn

pyhopper X.shape: (606, 100) y.shape: (606,) train_size: 454


  0%|          | [00:00<?]

Search is scheduled for 01:00 (m:s)
params {'layer_dim': 256, 'num_layers': 3, 'depth': 4, 'batch_size': 32}
iter 1 of 1 X_train shape torch.Size([454, 100])


  bin_codes = (indices.view(1, -1) // offsets.view(-1, 1) % 2).to(torch.float32)
  warn("Data-aware initialization is performed on less than 1000 data points. This may cause instability."
	add_(Number alpha, Tensor other)
Consider using one of the following signatures instead:
	add_(Tensor other, *, Number alpha) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:1055.)
  exp_avg.mul_(beta1_adj).add_(1.0 - beta1_adj, d_p)


Mode              : Best f : Steps : Time       
----------------  : ----   : ----  : ----       
Initial solution  : 50     : 1     : 18:24 (m:s)
----------------  : ----   : ----  : ----       
Total             : 50     : 1     : 18:24 (m:s)
Hill-Valley-with-noise_node_fn_{'layer_dim': 256, 'num_layers': 3, 'depth': 4, 'batch_size': 32}


  _warn_prf(average, modifier, msg_start, len(result))


{'layer_dim': 256, 'num_layers': 3, 'depth': 4, 'batch_size': 32}

#### Use best hyperparams

In [22]:
%%capture --no-stdout

node_results = test_model(
    model_fn=node_fn(**node_best),
    data=(X, y),
    train_size=eval_train_max_size,
    iters=10
)

iter 1 of 1 X_train shape torch.Size([606, 100])


In [23]:
print_mean_std_max(node_results, DATA)

metric balanced_accuracy
dataset_name Hill-Valley-with-noise
Hill-Valley-with-noise: 50.00 ~ nan (max: 50.00)


## Dropout Neural network

#### Find Hyperparams

### Dropout 1 layer

In [24]:
set_seed(SEED)

network_fn1=get_parametrized_dropout_net1(DEVICE=DEVICE, n_features=n_features, n_classes=n_classes)

In [25]:
param_grid = {
                "epochs": CommonHyperparams.ht_epochs,
                "lr": CommonHyperparams.lr,
                "batch_size": CommonHyperparams.batch_size,
                "drop1": pyhopper.choice([0.1, 0.3, 0.5, 0.7], is_ordinal=True),
                "drop2": pyhopper.choice([0.1, 0.3, 0.5, 0.7], is_ordinal=True),
                "batch_size": pyhopper.choice([32, 64]),
             }

nn_fn1_best_params, nn_fn1_history = pyhopper_best_params(
    model_fn=network_fn1,
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device=DEVICE,
    time="60m",
)

nn_fn1_best_params

THIS IS TEST RUN
| DEVICE: cpu
| model_fn network_fn1

pyhopper X.shape: (606, 100) y.shape: (606,) train_size: 454


  0%|          | [00:00<?]

Search is scheduled for 01:00 (m:s)
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 32, 'drop1': 0.3, 'drop2': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 64, 'drop1': 0.7, 'drop2': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.03, 'batch_size': 64, 'drop1': 0.7, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.03, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 64, 'drop1': 0.7, 'drop2': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 64, 'drop1': 0.5, 'drop2': 0.1}
iter 1 of 1 X

  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.003, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.003, 'batch_size': 64, 'drop1': 0.3, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 32, 'drop1': 0.3, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.003, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.003, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.1}
iter 1 o

  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.03, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 32, 'drop1': 0.3, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 64, 'drop1': 0.3, 'drop2': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 3e-05, 'batch_size': 64, 'drop1': 0.3, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.03, 'batch_size': 64, 'drop1': 0.3, 'drop2': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
Mode              : Best f : Steps : Time  
----------------  : ----   : ----  : ----  
Initial solution  : 50     : 1     : 1.46 s
Random seeding    : 53.61  : 19    : 29 s  
Local sampling    : 54.94  : 17    : 29 s  
Duplicates        : -      : 58    : -     
----------------  : ----   : ----  : ----  
Total             : 54.94  : 95    : 60 s  
Hill-Valley-with-noise_network_fn1_{'epochs': 10, 'lr': 0.003, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.1}


{'epochs': 10, 'lr': 0.003, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.1}

In [26]:
nn1_results = test_model(
    model_fn=network_fn1(**nn_fn1_best_params),
    data=(X, y),
    train_size=eval_train_max_size,
    iters=10
)

iter 1 of 1 X_train shape torch.Size([606, 100])


In [27]:
print_mean_std_max(nn1_results, DATA)

metric balanced_accuracy
dataset_name Hill-Valley-with-noise
Hill-Valley-with-noise: 50.15 ~ nan (max: 50.15)


### Dropout 2 layers

In [28]:
set_seed(SEED)

network_fn2=get_parametrized_dropout_net2(DEVICE=DEVICE, n_features=n_features, n_classes=n_classes)

In [29]:
param_grid = {
                "epochs": CommonHyperparams.ht_epochs,
                "lr": CommonHyperparams.lr,
                "batch_size": CommonHyperparams.batch_size,
                "drop1": pyhopper.choice([0.1, 0.3, 0.5, 0.7], is_ordinal=True),
                "drop2": pyhopper.choice([0.1, 0.3, 0.5, 0.7], is_ordinal=True),
                "drop3": pyhopper.choice([0.1, 0.3, 0.5, 0.7], is_ordinal=True),
             }

nn_fn2_best_params, nn_fn2_history = pyhopper_best_params(
    model_fn=network_fn2,
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device=DEVICE,
    time="60m",
)
nn_fn2_best_params

THIS IS TEST RUN
| DEVICE: cpu
| model_fn network_fn2

pyhopper X.shape: (606, 100) y.shape: (606,) train_size: 454


  0%|          | [00:00<?]

Search is scheduled for 01:00 (m:s)
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 32, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.003, 'batch_size': 96, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 256, 'drop1': 0.3, 'drop2': 0.7, 'drop3': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 480, 'drop1': 0.7, 'drop2': 0.1, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 384, 'drop1': 0.1, 'drop2': 0.3, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 96, 'drop1': 0.7, 'drop2': 0.1, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 160, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.1

  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 3e-05, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 480, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.0003, 'batch_size': 160, 'drop1': 0.7, 'drop2': 0.3, 'drop3': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.03, 'batch_size': 224, 'drop1': 0.3, 'drop2': 0.7, 'drop3': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.03, 'batch_size': 96, 'drop1': 0.7, 'drop2': 0.5, 'drop3': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 3e-05, 'batch_size': 224, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.03, 'batch_size': 416, 'drop1': 0.5, 'drop2': 0.5, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.03, 'batch_size': 288, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.03, 'batch_size': 160, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 192, 'drop1': 0.3, 'drop2': 0.1, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 160, 'drop1': 0.5, 'drop2': 0.3, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 224, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Si

  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.003, 'batch_size': 160, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 256, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 160, 'drop1': 0.3, 'drop2': 0.1, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 224, 'drop1': 0.1, 'drop2': 0.3, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 416, 'drop1': 0.5, 'drop2': 0.3, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 288, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 288, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.3}
iter 1 of 1 X_train shape torch.Size(

  _warn_prf(average, modifier, msg_start, len(result))


{'epochs': 10,
 'lr': 0.0003,
 'batch_size': 256,
 'drop1': 0.3,
 'drop2': 0.3,
 'drop3': 0.1}

In [30]:
nn2_results = test_model(
    model_fn=network_fn2(**nn_fn2_best_params),
    data=(X, y),
    train_size=eval_train_max_size,
    iters=10
)

iter 1 of 1 X_train shape torch.Size([606, 100])


In [31]:
print_mean_std_max(nn2_results, DATA)

metric balanced_accuracy
dataset_name Hill-Valley-with-noise
Hill-Valley-with-noise: 50.88 ~ nan (max: 50.88)


### Dropout 3 layers

In [32]:
set_seed(SEED)

network_fn3=get_parametrized_dropout_net3(DEVICE=DEVICE, n_features=n_features, n_classes=n_classes)

In [33]:
param_grid = {
                "epochs": CommonHyperparams.ht_epochs,
                "lr": CommonHyperparams.lr,
                "batch_size": CommonHyperparams.batch_size,
                "first_hidden_layer": CommonHyperparams.ht_first_hidden,
                "drop1": pyhopper.choice([0.1, 0.3, 0.5, 0.7], is_ordinal=True),
                "drop2": pyhopper.choice([0.1, 0.3, 0.5, 0.7], is_ordinal=True),
                "drop3": pyhopper.choice([0.1, 0.3, 0.5, 0.7], is_ordinal=True),
                "drop4": pyhopper.choice([0.1, 0.3, 0.5, 0.7], is_ordinal=True),
             }

nn_fn3_best_params, nn_fn3_history = pyhopper_best_params(
    model_fn=network_fn3,
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device=DEVICE,
    time="60m",
)

nn_fn3_best_params

THIS IS TEST RUN
| DEVICE: cpu
| model_fn network_fn3

pyhopper X.shape: (606, 100) y.shape: (606,) train_size: 454


  0%|          | [00:00<?]

Search is scheduled for 01:00 (m:s)
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 32, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.3, 'drop4': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 320, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.3, 'batch_size': 512, 'drop1': 0.5, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 128, 'drop1': 0.1, 'drop2': 0.1, 'drop3': 0.7, 'drop4': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 64, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.5, 'drop4': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 128, 'drop1': 0.7, 'drop2': 0.7, 'drop3': 0.1, 'drop4': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 320, 'drop1': 0.5, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 512, 'drop1': 0.3, 'drop2': 0.5, 'drop3': 0.5, 'drop4': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 5

  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.003, 'batch_size': 256, 'drop1': 0.1, 'drop2': 0.5, 'drop3': 0.3, 'drop4': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 288, 'drop1': 0.7, 'drop2': 0.3, 'drop3': 0.5, 'drop4': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 320, 'drop1': 0.5, 'drop2': 0.3, 'drop3': 0.7, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.03, 'batch_size': 224, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.03, 'batch_size': 224, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.1, 'drop4': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.003, 'batch_size': 288, 'drop1': 0.3, 'drop2': 0.7, 'drop3': 0.5, 'drop4': 0.1}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.03, 'batch_size': 384, 'drop1': 0.5, 'drop2': 0.5, 'drop3': 0.7, 'drop4': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.0003, 'batch_size': 352, 'drop1': 0.3, 'drop2': 0.5, 'drop3': 0.1, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.0003, 'batch_size': 320, 'drop1': 0.3, 'drop2': 0.1, 'drop3': 0.5, 'drop4': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.003, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.1, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 64, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.1, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 0.3, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.003, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 3e-05, 'batch_size': 32, 'drop1': 0.3, 'drop2': 0.3, 'drop3': 0.5, 'drop4': 0.3}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'lr': 3e-05, 'batch_size': 96, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 3e-05, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.3, 'drop3': 0.3, 'drop4': 0.5}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'lr': 0.03, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.1, 'drop4': 0.7}
iter 1 of 1 X_train shape torch.Size([454, 100])
Mode              : Best f : Steps : Time       
----------------  : ----   : ----  : ----       
Initial solution  : 50     : 1     : 3.45 s     
Random seeding    : 53.61  : 19    : 28 s       
Local sampling    : 50.93  : 8     : 30 s       
Duplicates        : -      : 4     : -          
----------------  : ----   : ----  : ----       
Total             : 53.61  : 32    : 01:01 (m:s)
Hill-Valley-with-noise_network_fn3_{'epochs': 10, 'lr': 3e-05, 'batch_size': 32, 'drop1': 0.1, 'drop2': 0.7, 'drop3': 0.3, 'drop4': 0.7}


  _warn_prf(average, modifier, msg_start, len(result))


{'epochs': 10,
 'lr': 3e-05,
 'batch_size': 32,
 'drop1': 0.1,
 'drop2': 0.7,
 'drop3': 0.3,
 'drop4': 0.7}

In [34]:
nn3_results = test_model(
    model_fn=network_fn3(**nn_fn3_best_params),
    data=(X, y),
    train_size=eval_train_max_size,
    iters=10
)

iter 1 of 1 X_train shape torch.Size([606, 100])


  _warn_prf(average, modifier, msg_start, len(result))


In [35]:
print_mean_std_max(nn3_results, DATA)

metric balanced_accuracy
dataset_name Hill-Valley-with-noise
Hill-Valley-with-noise: 50.00 ~ nan (max: 50.00)


### Hypernetwork

In [20]:
set_seed(SEED)

network_hp_fn=get_parametrized_hypertab_fn(DEVICE=DEVICE, n_features=n_features, n_classes=n_classes)

#### Find hyperparams

In [21]:
param_grid = {
    "epochs": CommonHyperparams.ht_epochs,
    "masks_no": CommonHyperparams.ht_mask_no,
    "mask_size": CommonHyperparams.ht_mask_size,
    "target_size": CommonHyperparams.ht_target_size,
    "first_hidden_layer": CommonHyperparams.ht_first_hidden,
    "lr": CommonHyperparams.lr,
    "batch_size": CommonHyperparams.batch_size,
}

hp_best_params, hp_history = pyhopper_best_params(
    model_fn=network_hp_fn, 
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device=DEVICE,
    time="60m",
)

hp_best_params

THIS IS TEST RUN
| DEVICE: cpu
| model_fn network_hp_fn

pyhopper X.shape: (606, 100) y.shape: (606,) train_size: 454


  0%|          | [00:00<?]

Search is scheduled for 01:00 (m:s)
params {'epochs': 10, 'masks_no': 50, 'mask_size': 2, 'target_size': 5, 'first_hidden_layer': 32, 'lr': 3e-05, 'batch_size': 32}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 200, 'mask_size': 90, 'target_size': 5, 'first_hidden_layer': 32, 'lr': 0.03, 'batch_size': 384}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 100, 'mask_size': 2, 'target_size': 20, 'first_hidden_layer': 256, 'lr': 0.0003, 'batch_size': 448}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 200, 'mask_size': 2, 'target_size': 5, 'first_hidden_layer': 256, 'lr': 3e-05, 'batch_size': 192}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'masks_no': 200, 'mask_size': 2, 'target_size': 5, 'first_hidden_layer': 512, 'lr': 0.03, 'batch_size': 448}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 50, 'mask_size': 50, 'target_size': 10, 'first_hidden_layer': 256, 'lr': 3e-05, 'batch_size': 320}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 200, 'mask_size': 2, 'target_size': 50, 'first_hidden_layer': 512, 'lr': 0.3, 'batch_size': 480}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'masks_no': 100, 'mask_size': 50, 'target_size': 50, 'first_hidden_layer': 512, 'lr': 0.0003, 'batch_size': 224}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 100, 'mask_size': 5, 'target_size': 50, 'first_hidden_layer': 32, 'lr': 0.0003, 'batch_size': 320}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 300, 'mask_size': 20, 'target_size': 5, 'first_hidden_layer': 64, 'lr': 0.03, 'batch_size': 64}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 150, 'mask_size': 90, 'target_size': 50, 'first_hidden_layer': 32, 'lr': 0.003, 'batch_size': 224}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 300, 'mask_size': 5, 'target_size': 20, 'first_hidden_layer': 256, 'lr': 0.0003, 'batch_size': 96}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 300, 'mask_size': 2, 'target_size': 20, 'first_hidden_lay

  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'masks_no': 150, 'mask_size': 20, 'target_size': 10, 'first_hidden_layer': 32, 'lr': 0.0003, 'batch_size': 64}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 300, 'mask_size': 10, 'target_size': 5, 'first_hidden_layer': 64, 'lr': 0.3, 'batch_size': 32}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 300, 'mask_size': 5, 'target_size': 5, 'first_hidden_layer': 64, 'lr': 0.03, 'batch_size': 64}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 100, 'mask_size': 2, 'target_size': 5, 'first_hidden_layer': 64, 'lr': 0.03, 'batch_size': 64}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 100, 'mask_size': 50, 'target_size': 5, 'first_hidden_layer': 64, 'lr': 0.03, 'batch_size': 96}
iter 1 of 1 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


params {'epochs': 10, 'masks_no': 200, 'mask_size': 20, 'target_size': 20, 'first_hidden_layer': 64, 'lr': 0.03, 'batch_size': 64}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 100, 'mask_size': 20, 'target_size': 5, 'first_hidden_layer': 64, 'lr': 3e-05, 'batch_size': 128}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 100, 'mask_size': 20, 'target_size': 10, 'first_hidden_layer': 64, 'lr': 0.3, 'batch_size': 64}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 300, 'mask_size': 90, 'target_size': 5, 'first_hidden_layer': 64, 'lr': 0.03, 'batch_size': 96}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 100, 'mask_size': 2, 'target_size': 5, 'first_hidden_layer': 512, 'lr': 0.03, 'batch_size': 64}
iter 1 of 1 X_train shape torch.Size([454, 100])
params {'epochs': 10, 'masks_no': 200, 'mask_size': 90, 'target_size': 10, 'first_hidden_layer': 512, 

{'epochs': 10,
 'masks_no': 200,
 'mask_size': 90,
 'target_size': 10,
 'first_hidden_layer': 512,
 'lr': 0.003,
 'batch_size': 160}

In [22]:
hp_best_params

{'epochs': 10,
 'masks_no': 200,
 'mask_size': 90,
 'target_size': 10,
 'first_hidden_layer': 512,
 'lr': 0.003,
 'batch_size': 160}

#### Train using the best hyperparams

In [23]:
hyper_results = test_model(
    model_fn=network_hp_fn(**hp_best_params, verbose=True),
    data=(X, y),
    train_size=eval_train_max_size,
    iters=10
)

iter 1 of 1 X_train shape torch.Size([606, 100])


100%|██████████| 10/10 [00:02<00:00,  4.07it/s]


In [24]:
print_mean_std_max(hyper_results, DATA)

metric balanced_accuracy
dataset_name Hill-Valley-with-noise
Hill-Valley-with-noise: 50.14 ~ nan (max: 50.14)


## Random Subspace

In [22]:
set_seed(SEED)

get_bagged_fn=get_parametrized_bagged_fn()

#### Tune hyperparams

In [23]:
param_grid = {
    "first_hidden_layer": CommonHyperparams.ht_mask_size,
    "second_hidden_layer": CommonHyperparams.ht_target_size,
    "batch_size": CommonHyperparams.batch_size,
    "learning_rate_init": CommonHyperparams.lr, 
    "max_iter": CommonHyperparams.ht_epochs
}

In [None]:
bagged_best, bagged_history = pyhopper_best_params(
    model_fn=get_bagged_fn, 
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device='cpu',
    time=TIME_BUDGET
)

| DEVICE: cpu
| model_fn bagged_fn

pyhopper X.shape: (606, 100) y.shape: (606,) train_size: 454


  0%|          | [00:00<?]

Search is scheduled for 30:00 (m:s)
params {'first_hidden_layer': 2, 'second_hidden_layer': 5, 'batch_size': 32, 'learning_rate_init': 3e-05, 'max_iter': 100}
iter 1 of 5 X_train shape torch.Size([454, 100])




iter 2 of 5 X_train shape torch.Size([454, 100])




iter 3 of 5 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


iter 4 of 5 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


iter 5 of 5 X_train shape torch.Size([454, 100])




params {'first_hidden_layer': 90, 'second_hidden_layer': 10, 'batch_size': 224, 'learning_rate_init': 0.3, 'max_iter': 200}
iter 1 of 5 X_train shape torch.Size([454, 100])
iter 2 of 5 X_train shape torch.Size([454, 100])


  _warn_prf(average, modifier, msg_start, len(result))


iter 3 of 5 X_train shape torch.Size([454, 100])
iter 4 of 5 X_train shape torch.Size([454, 100])
iter 5 of 5 X_train shape torch.Size([454, 100])
params {'first_hidden_layer': 2, 'second_hidden_layer': 50, 'batch_size': 288, 'learning_rate_init': 0.3, 'max_iter': 100}
iter 1 of 5 X_train shape torch.Size([454, 100])
iter 2 of 5 X_train shape torch.Size([454, 100])
iter 3 of 5 X_train shape torch.Size([454, 100])
iter 4 of 5 X_train shape torch.Size([454, 100])
iter 5 of 5 X_train shape torch.Size([454, 100])
params {'first_hidden_layer': 70, 'second_hidden_layer': 20, 'batch_size': 64, 'learning_rate_init': 0.003, 'max_iter': 200}
iter 1 of 5 X_train shape torch.Size([454, 100])
iter 2 of 5 X_train shape torch.Size([454, 100])
iter 3 of 5 X_train shape torch.Size([454, 100])


#### Use best hyperparams

In [None]:
bagging_results = test_model(
    model_fn=get_bagged_fn(**bagged_best),
    data=(X, y),
    train_size=eval_train_max_size,
    iters=10
)

In [None]:
print_mean_std_max(bagging_results, DATA)

## Ensembles

In [None]:
set_seed(SEED)

get_ensembles=get_parametrized_ensemble_fn()

#### Tune

In [None]:
param_grid = {
    "n_models": CommonHyperparams.ht_mask_no,
    "first_hidden_layer": CommonHyperparams.ht_mask_size,
    "second_hidden_layer": CommonHyperparams.ht_target_size,
    "batch_size": CommonHyperparams.batch_size,
    "learning_rate_init": CommonHyperparams.lr, 
    "max_iter": CommonHyperparams.ht_epochs
}

In [None]:
ensemble_best, ensemble_history = pyhopper_best_params(
    model_fn=get_ensembles, 
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device='cpu',
    time=TIME_BUDGET
)

#### Use best

In [None]:
ensemble_results = test_model(
    model_fn=get_ensembles(**ensemble_best),
    data=(X, y),
    train_size=eval_train_max_size,
    iters=10
)

In [None]:
print_mean_std_max(ensemble_results, DATA)

## XGBoost

In [None]:
set_seed(SEED)

get_xgboost = get_parametrized_xgboost_fn(seed=SEED)

#### Hyperparam tuning

In [None]:
param_grid = {
                'n_estimators': pyhopper.int(50, 3000, multiple_of=50),
                'max_depth': pyhopper.choice([2, 3, 5, 10, 15]),
                'learning_rate': pyhopper.float(1e-5,1e-1, log=True),
                'min_child_weight': pyhopper.choice([1, 2, 4, 8, 16, 32]),
                'gamma': pyhopper.choice([0, 0.001, 0.1, 1]),
             }

xgbt_best1, xgbt_history1 = pyhopper_best_params(
    model_fn=get_xgboost, 
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device='cpu',
    time="20m"
)

In [None]:
param_grid = {
                'subsample': pyhopper.choice([0.5, 0.6, 0.7, 0.8, 0.9, 1]),
                'reg_lambda': pyhopper.float(1e-5, 10, init=0, log=True),
                'reg_alpha': pyhopper.float(1e-5, 10, init=0, log=True),
             }


xgbt_best2, xgbt_history2 = pyhopper_best_params(
    model_fn=get_xgboost,
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device='cpu',
    time="10m",
    default_params=xgbt_best1
)

#### Best Params

In [None]:
xgboost_best = {**xgbt_best1, **xgbt_best2}

In [None]:
xgb_dframe = test_model(
    model_fn=get_xgboost(**xgboost_best),
    data=(X, y),
    train_size=eval_train_max_size,
    iters=10
)

In [None]:
print_mean_std_max(xgb_dframe, DATA)

## Random forest

In [None]:
set_seed(SEED)

get_rf = get_parametrized_rf_fn(seed=SEED)

#### Find hyperparams

In [None]:
param_grid = {
    'n_estimators': pyhopper.int(50, 3000, multiple_of=50),
    'max_features': pyhopper.choice([None, 'sqrt', 0.2, 0.3, 0.5, 0.7]),
    'criterion' : pyhopper.choice(['gini', 'entropy']),
    'max_depth': pyhopper.choice([None, 2, 4, 8, 16]),
 }

rf_best, rf_history = pyhopper_best_params(
    model_fn=get_rf,
    param_grid=param_grid,
    data=(X_train, y_train),
    train_size=train_max_size,
    DATA=DATA,
    device='cpu',
    time=TIME_BUDGET,
)

rf_best

#### Use best params

In [None]:
rf_dframe = test_model(
    model_fn=get_rf(**rf_best), 
    data=(X, y),
    train_size=eval_train_max_size,
    iters=10
)

In [None]:
print_mean_std_max(rf_dframe, DATA)

# Collect analytics

In [61]:
hyper_results.groupby("Class")['Metric'].agg(['mean', 'std', 'max'])

Unnamed: 0_level_0,mean,std,max
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,80.602007,,80.602007
1,17.263844,,17.263844
F1 score,25.358852,,25.358852
Precision,47.747748,,47.747748
Recall,17.263844,,17.263844
Total,48.514851,,48.514851
balanced_accuracy,48.932925,,48.932925
roc_auc,50.384016,,50.384016


In [62]:
d = {}

In [63]:
d['Random forest'] = rf_dframe.groupby("Class")['Metric'].agg(['mean', 'std', 'max'])
d['Hypernet'] = hyper_results.groupby("Class")['Metric'].agg(['mean', 'std', 'max'])

d['Dropout_1'] = nn1_results.groupby("Class")['Metric'].agg(['mean', 'std', 'max'])
d['Dropout_2'] = nn2_results.groupby("Class")['Metric'].agg(['mean', 'std', 'max'])
d['Dropout_3'] = nn3_results.groupby("Class")['Metric'].agg(['mean', 'std', 'max'])
d['Node'] = node_results.groupby("Class")['Metric'].agg(['mean', 'std', 'max'])
d['XGBoost'] = xgb_dframe.groupby("Class")['Metric'].agg(['mean', 'std', 'max'])

In [64]:
all_models_df=pd.concat(d, axis=0)
all_models_df

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std,max
Unnamed: 0_level_1,Class,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Random forest,0,54.849498,,54.849498
Random forest,1,47.557003,,47.557003
Random forest,F1 score,49.659864,,49.659864
Random forest,Precision,51.957295,,51.957295
Random forest,Recall,47.557003,,47.557003
Random forest,Total,51.155116,,51.155116
Random forest,balanced_accuracy,51.203251,,51.203251
Random forest,roc_auc,52.000697,,52.000697
Hypernet,0,80.602007,,80.602007
Hypernet,1,17.263844,,17.263844


In [65]:
os.environ['COMET_KEY']

'UXrV5UxyhTK3cyQNG6BDuc4bE'

In [66]:
all_models_df.to_csv(f"{DATA}_metrics.csv")

In [67]:
exp = Experiment(os.environ.get("COMET_KEY"), 'hypernet-uci-tune')
# exp.log_parameters({"epochs": epochs, "mask_size": mask_size, "masks_no": masks_no, "data_size": data_size})
exp.add_tag(f"hypernet-tune2{DATA}")
exp.log_table(f"{DATA}_metrics.csv", all_models_df)

COMET INFO: Experiment is live on comet.ml https://www.comet.com/abulenok/hypernet-uci-tune/149575beb906407eacd784e140ed9789



{'web': 'https://www.comet.com/api/asset/download?assetId=ebb6e7a67520408ab4c331339ea83125&experimentKey=149575beb906407eacd784e140ed9789',
 'api': 'https://www.comet.com/api/rest/v2/experiment/asset/get-asset?assetId=ebb6e7a67520408ab4c331339ea83125&experimentKey=149575beb906407eacd784e140ed9789',
 'assetId': 'ebb6e7a67520408ab4c331339ea83125'}

### Replace some data in existing

In [68]:
# tmp_df = pd.concat(d, axis=0)
# tmp_df = tmp_df.reset_index()
# tmp_df = tmp_df.rename(columns={tmp_df.columns[0]: DATA})

# tmp_df

In [69]:
# all_models_df = pd.read_csv(f"{DATA}_metrics.csv")
# all_models_df = all_models_df.rename(columns={all_models_df.columns[0]: DATA})
# all_models_df = all_models_df.drop(all_models_df[all_models_df.iloc[:, 0] == 'Hypernet'].index)
# all_models_df = all_models_df.drop(all_models_df[all_models_df.iloc[:, 0] == 'HypernetPCA'].index)

# all_models_df

In [70]:
# all_models_df = pd.concat([all_models_df, tmp_df])
# all_models_df

In [71]:
f"{DATA}_metrics.csv"

'Hill-Valley-with-noise_metrics.csv'

In [72]:
all_models_df = pd.read_csv(f"{DATA}_metrics.csv")

In [73]:
all_models_df.reset_index()

Unnamed: 0.1,index,Unnamed: 0,Class,mean,std,max
0,0,Random forest,0,54.849498,,54.849498
1,1,Random forest,1,47.557003,,47.557003
2,2,Random forest,F1 score,49.659864,,49.659864
3,3,Random forest,Precision,51.957295,,51.957295
4,4,Random forest,Recall,47.557003,,47.557003
5,5,Random forest,Total,51.155116,,51.155116
6,6,Random forest,balanced_accuracy,51.203251,,51.203251
7,7,Random forest,roc_auc,52.000697,,52.000697
8,8,Hypernet,0,80.602007,,80.602007
9,9,Hypernet,1,17.263844,,17.263844


In [74]:
# all_models_df = all_models_df.drop(all_models_df.columns[0], axis=1)
# all_models_df

In [75]:
tmp = all_models_df

In [76]:
tmp = tmp.rename(columns={tmp.columns[0]: DATA})

In [77]:
tmp[tmp['Class'] == "balanced_accuracy"]

Unnamed: 0,Hill-Valley-with-noise,Class,mean,std,max
6,Random forest,balanced_accuracy,51.203251,,51.203251
14,Hypernet,balanced_accuracy,48.932925,,48.932925
22,Dropout_1,balanced_accuracy,50.153062,,50.153062
30,Dropout_2,balanced_accuracy,50.876973,,50.876973
38,Dropout_3,balanced_accuracy,50.0,,50.0
46,Node,balanced_accuracy,50.0,,50.0
54,XGBoost,balanced_accuracy,56.317475,,56.317475
