In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os
import time
import json
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow_addons as tfa
from tensorflow.keras import losses, backend
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler, QuantileTransformer
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss
from sklearn.utils import check_random_state
from sklearn.decomposition import PCA
from typing import Tuple, List, Callable, Any
from tqdm import tqdm
from datetime import datetime
from hyperopt import hp, tpe, space_eval
from hyperopt.fmin import fmin
print('tensorflow ver:', tf.__version__)
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
if gpu_devices:
    for gpu_device in gpu_devices:
        print('device available:', gpu_device)
pd.set_option('display.max_columns', None)

In [None]:
KAGGLE = False
VER = 'v53'
if KAGGLE:
    DATA_PATH = '../input/lish-moa'
    MODELS_PATH = f'../input/moa-models-{VER}'
else:
    DATA_PATH = './data'
    MODELS_PATH = f'./models_{VER}'
    if not os.path.exists(MODELS_PATH):
        os.mkdir(MODELS_PATH)
PARAMS = {
    'VER': VER,
    'SEED': 2053,
    'SEEDS': 8,
    'FOLDS': 8,
    'EPOCHS': 100,
    'BATCH_SIZE': 128,
    'DECAY': True,
    'PATIENCE': 10,
    'UNITS': 1024,
    'DROPOUT': .5,
    'FEAT_IMP': 1, # None or 1, 2, ..., n iterations 
    'PSEUDO_LBL': False,
    'LBL_SMOOTH': 1e-4,
    'REDUCE_COMPS': 12,
    'THRESHOLD': 1e-2,
    'PIPE': 3,
    'DATE': str(datetime.now()),
    'HOPT': False,
    'PIPE_SCALER': 2,
    'QTRANS': False
}
with open(f'{MODELS_PATH}/params.json', 'w') as file:
    json.dump(PARAMS, file)

def seed_all(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

seed_all(PARAMS['SEED'])
start_time = time.time()

In [None]:
train_features_raw = pd.read_csv(f'{DATA_PATH}/train_features.csv')
train_targets_raw = pd.read_csv(f'{DATA_PATH}/train_targets_scored.csv')
del train_targets_raw['sig_id']
test_features_raw = pd.read_csv(f'{DATA_PATH}/test_features.csv')
ssubm = pd.read_csv(f'{DATA_PATH}/sample_submission.csv')
print(
    'train features loaded:', train_features_raw.shape,
    '\ntrain targets loaded:', train_targets_raw.shape,
    '\ntest features loaded:', test_features_raw.shape,
    '\nsubmission loaded:', ssubm.shape,
)

In [None]:
class VarianceThreshold:
    
    def __init__(self, threshold):
        self.threshold = threshold
        
    def fit_transform(self, df, feat_cols):
        self.df = df
        self.var = self.df[feat_cols].var()
        self.drop_cols = [x for x in feat_cols 
                          if x not in self.var[self.var > self.threshold].index.to_list()]
        self.valid_cols = [x for x in feat_cols 
                           if x in self.var[self.var > self.threshold].index.to_list()]
        return self.df.drop(self.drop_cols, axis=1), self.valid_cols
        
    def transform(self, df):
        return df.drop(self.drop_cols, axis=1)

def preprocess(df):
    df = df.copy()
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    del df['sig_id']
    df = df.join(pd.get_dummies(df['cp_time'], drop_first=False, prefix='cp_time'))
    df = df.drop('cp_time', axis=1)
    return df 
    
def pca_train_test(train, test, feat_cols, g_cols, c_cols, r_comps, seed):
    # pca-g feature PCA
    pca_g = PCA(n_components=int(len(g_cols) / r_comps), random_state=seed)
    train_pca = pca_g.fit_transform(train[g_cols])
    train_pca = pd.DataFrame(
        train_pca, 
        columns=[f'pca_g-{i}' for i in range(int(len(g_cols) / r_comps))], 
        index=train.index
    )
    train = pd.concat((train, train_pca), axis=1)
    test_pca = pca_g.transform(test[g_cols])
    test_pca = pd.DataFrame(
        test_pca, 
        columns=[f'pca_g-{i}' for i in range(int(len(g_cols) / r_comps))], 
        index=test.index
    )
    test = pd.concat((test, test_pca), axis=1)
    feat_cols += [f'pca_g-{i}' for i in range(int(len(g_cols) / r_comps))]
    print('added PCA features:', [f'pca_g-{i}' for i in range(int(len(g_cols) / r_comps))])

    # pca-c feature PCA
    pca_c = PCA(n_components=int(len(c_cols) / r_comps), random_state=seed)
    train_pca = pca_c.fit_transform(train[c_cols])
    train_pca = pd.DataFrame(
        train_pca, 
        columns=[f'pca_c-{i}' for i in range(int(len(c_cols) / r_comps))], 
        index=train.index
    )
    train = pd.concat((train, train_pca), axis=1)
    test_pca = pca_c.transform(test[c_cols])
    test_pca = pd.DataFrame(
        test_pca, 
        columns=[f'pca_c-{i}' for i in range(int(len(c_cols) / r_comps))], 
        index=test.index
    )
    test = pd.concat((test, test_pca), axis=1)
    feat_cols += [f'pca_c-{i}' for i in range(int(len(c_cols) / r_comps))]
    print('\nadded PCA features:', [f'pca_c-{i}' for i in range(int(len(c_cols) / r_comps))])
    return train, test, feat_cols

def get_feats_stats(df, g_cols, c_cols):
    df['stats_g_sum'] = df[g_cols].sum(axis=1)
    df['stats_g_mean'] = df[g_cols].mean(axis=1)
    df['stats_g_std'] = df[g_cols].std(axis=1)
    df['stats_g_kurt'] = df[g_cols].kurtosis(axis=1)
    df['stats_g_skew'] = df[g_cols].skew(axis=1)
    df['stats_c_sum'] = df[c_cols].sum(axis=1)
    df['stats_c_mean'] = df[c_cols].mean(axis=1)
    df['stats_c_std'] = df[c_cols].std(axis=1)
    df['stats_c_kurt'] = df[c_cols].kurtosis(axis=1)
    df['stats_c_skew'] = df[c_cols].skew(axis=1)
    df['stats_gc_sum'] = df[g_cols.to_list() + c_cols.to_list()].sum(axis=1)
    df['stats_gc_mean'] = df[g_cols.to_list() + c_cols.to_list()].mean(axis=1)
    df['stats_gc_std'] = df[g_cols.to_list() + c_cols.to_list()].std(axis=1)
    df['stats_gc_kurt'] = df[g_cols.to_list() + c_cols.to_list()].kurtosis(axis=1)
    df['stats_gc_skew'] = df[g_cols.to_list() + c_cols.to_list()].skew(axis=1)
    return df

In [None]:
def get_train_test(train_features, train_targets, test_features,  
                   r_comps, q_flag, pipe_scaler, 
                   seed):
    g_cols = train_features.columns[train_features.columns.str.startswith('g-')]
    c_cols = train_features.columns[train_features.columns.str.startswith('c-')]
    feat_cols = g_cols.to_list() + c_cols.to_list()
    print('g-cols:', len(g_cols), '| c-cols:', len(c_cols))
    
    if q_flag:
        qtrans = QuantileTransformer(n_quantiles=100, random_state=seed, output_distribution='normal')
        train_features = pd.concat([
            train_features.drop(columns=feat_cols), 
            pd.DataFrame(qtrans.fit_transform(train_features[feat_cols]),
                         columns = feat_cols)], axis=1)
        test_features = pd.concat([
            test_features.drop(columns=feat_cols),
            pd.DataFrame(qtrans.transform(test_features[feat_cols]),
                         columns = feat_cols)], axis=1)

    train = preprocess(train_features)
    test = preprocess(test_features)
    train_targets = train_targets.loc[train['cp_type'] == 0].reset_index(drop=True)
    train = train.loc[train['cp_type'] == 0].reset_index(drop=True)
    
    train, test, feat_cols = pca_train_test(train, test, feat_cols, 
                                            g_cols, c_cols,
                                            r_comps, seed)
    train = get_feats_stats(train, g_cols, c_cols)
    test = get_feats_stats(test, g_cols, c_cols)
    feat_cols.extend([x for x in train.columns if 'stats_' in x])
    print('features:', len(feat_cols))
    
    if pipe_scaler == 1:
        scaler = RobustScaler()
    elif pipe_scaler == 2:
        scaler = MinMaxScaler()
    elif pipe_scaler == 3:
        scaler = StandardScaler()
    train[feat_cols] = scaler.fit_transform(train[feat_cols])
    test[feat_cols] = scaler.transform(test[feat_cols])
    
    threshold = train[feat_cols].var().sort_values().quantile(PARAMS['THRESHOLD'])
    print('threshold {:.4f}'.format(threshold))
    print('features total:', len(feat_cols))
    var_thresh = VarianceThreshold(threshold)
    train, feat_cols = var_thresh.fit_transform(train, feat_cols)
    test = var_thresh.transform(test)
    print(f'features total with variance threshold {threshold:.4f}:', len(feat_cols))
    elapsed_time = time.time() - start_time
    print(f'time elapsed: {elapsed_time // 60:.0f} min {elapsed_time % 60:.0f} sec')
    
    return train, train_targets, test, feat_cols

In [None]:
def get_model(num_columns, num_columns_rs=0, units=1024, drop=.4, lbl_smooth=.001, pipe=1):
    if pipe == 0:
        model = tf.keras.Sequential(
            [
                tf.keras.layers.Input(num_columns),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(units, activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(206, activation='sigmoid')
                )
            ]
        )
    elif pipe == 1:
        model = tf.keras.Sequential(
            [
                tf.keras.layers.Input(num_columns),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(units, activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(int(units / 2), activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(206, activation='sigmoid')
                )
            ]
        )
    elif pipe == 2:
        model = tf.keras.Sequential(
            [
                tf.keras.layers.Input(num_columns),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(units, activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(int(units / 2), activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(int(units / 4), activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(206, activation='sigmoid')
                )
            ]
        )
    elif pipe == 3:
        model = tf.keras.Sequential(
            [
                tf.keras.layers.Input(num_columns),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop / 2),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(int(units / 2), activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop / 2),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(int(units / 2), activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop / 2),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(int(units / 2), activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop / 2),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(int(units / 2), activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop / 2),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(206, activation='sigmoid')
                )
            ]
        )
    else:
        raise AttributeError('Cannot recover attribute for model pipe')
    model.compile(
        optimizer=tfa.optimizers.Lookahead(
            tf.keras.optimizers.Adam(learning_rate=0.001),
            sync_period=10
        ),
        loss=losses.BinaryCrossentropy(label_smoothing=lbl_smooth),
        metrics=tf.keras.losses.BinaryCrossentropy(name='score')
    )
    return model

def metric(y_true, y_pred, smooth=.001):
    metrics = []
    y_pred = np.clip(y_pred, smooth, 1 - smooth)
    for _target in y_true.columns:
        metrics.append(
            log_loss(
                y_true.loc[:, _target], 
                y_pred.loc[:, _target].astype(float), 
                labels=[0, 1]
            )
        )
    return np.mean(metrics)

In [None]:
class LogPrintingCallback(tf.keras.callbacks.Callback):
    
    def on_train_begin(self, logs=None):
        self.val_score = []
        self.val_loss = []
        
    def on_epoch_end(self, epoch, logs=None):
        self.val_score.append(logs['val_score'])
        self.val_loss.append(logs['val_loss'])
        if epoch % min(100, PARAMS['PATIENCE']) == 0 or epoch == (PARAMS['EPOCHS'] - 1):
            print(
                f"epoch {epoch + 1} | loss: {logs['loss']:.5f} | score: {logs['score']}",
                f"| val loss: {logs['val_loss']:.5f} | val score: {logs['val_score']}"
            )
            
    def on_train_end(self, lowest_val_loss, logs=None):
        best_epoch = np.argmin(self.val_score) # np.argmin(self.val_loss)
        best_score = self.val_score[best_epoch] # self.val_loss[best_epoch]
        print(f'best model at epoch {best_epoch + 1} | score: {best_score}')
        
def get_lr_callback(batch_size=10, epochs=100, warmup=.025, plot=False):
    lr_start = 1e-5
    lr_max = 1e-2 #10 * lr_start * batch_size
    lr_min = lr_start / 10
    lr_ramp_ep = epochs * warmup
    lr_sus_ep = 0
    lr_decay = .95
    
    def lr_scheduler(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) * lr_decay ** (epoch - lr_ramp_ep - lr_sus_ep) + lr_min
        return lr
        
    if not plot:
        lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=False)
        return lr_callback 
    else: 
        return lr_scheduler
    
if PARAMS['DECAY']:
    lr_scheduler_plot = get_lr_callback(
        batch_size=PARAMS['BATCH_SIZE'], 
        epochs=PARAMS['EPOCHS'], 
        plot=True
    )
    xs = [i for i in range(PARAMS['EPOCHS'])]
    y = [lr_scheduler_plot(x) for x in xs]
    plt.plot(xs, y)
    plt.title(f'lr schedule from {y[0]:.5f} to {max(y):.3f} to {y[-1]:.8f}')
    plt.show()

In [None]:
def iter_shuffled(X, cols_to_shuffle=None, pre_shuffle=False, random_state=None):
    rng = check_random_state(random_state)
    if cols_to_shuffle is None:
        cols_to_shuffle = range(X.shape[1])
    if pre_shuffle:
        X_shuffled = X.copy()
        rng.shuffle(X_shuffled)
    X_res = X.copy()
    for col in tqdm(cols_to_shuffle):
        if pre_shuffle:
            X_res[:, col] = X_shuffled[:, col]
        else:
            rng.shuffle(X_res[:, col])
        yield X_res
        X_res[:, col] = X[:, col]

def get_score_importances(score_func, X, y, n_iter=5, cols_to_shuffle=None, random_state=None):
    rng = check_random_state(random_state)
    base_score = score_func(X, y)
    scores_decreases = []
    for i in range(n_iter):
        scores_shuffled = _get_scores_shufled(
            score_func, 
            X, y, 
            cols_to_shuffle=cols_to_shuffle,
            random_state=rng, 
            base_score=base_score
        )
        scores_decreases.append(scores_shuffled)
    return base_score, scores_decreases

def _get_scores_shufled(score_func, X, y, base_score, cols_to_shuffle=None, random_state=None):
    Xs = iter_shuffled(X, cols_to_shuffle, random_state=random_state)
    res = []
    for X_shuffled in Xs:
        res.append(-score_func(X_shuffled, y) + base_score)
    return res

def _metric(y_true, y_pred, smooth=.001):
    metrics = []
    y_pred = np.clip(y_pred, smooth, 1 - smooth)
    for i in range(y_pred.shape[1]):
        if y_true[:, i].sum() > 1:
            metrics.append(
                log_loss(
                    y_true[:, i], 
                    y_pred[:, i].astype(float),
                    labels=[0, 1]
                )
            )
    return np.mean(metrics)

In [None]:
def train_model(mparams, X_trn, y_trn, X_val, y_val, n_fold, cols,
                X_tst=None, seed=False):
    model = get_model(
            len(cols), 
            units=mparams['UNITS'], 
            drop=mparams['DROPOUT'], 
            lbl_smooth=mparams['LBL_SMOOTH'],
            pipe=mparams['PIPE']
    )
    if seed:
        checkpoint_path = f'{MODELS_PATH}/seed_{seed}_fold_{n_fold}.hdf5'
    else:
        checkpoint_path = f'{MODELS_PATH}/feat_imp_fold_{n_fold}.hdf5'
    earlystopper = EarlyStopping(
        monitor='val_score', 
        patience=mparams['PATIENCE'], 
        verbose=0,
        mode='min'
    )
    lrreducer = ReduceLROnPlateau(
        monitor='val_score', 
        factor=.1, 
        patience=int(mparams['PATIENCE'] / 2), 
        verbose=0, 
        min_lr=1e-5,
        mode='min'
    )
    checkpointer = ModelCheckpoint(
        checkpoint_path, 
        monitor='val_score', 
        verbose=0, 
        save_best_only=True,
        save_weights_only=True, 
        mode='min'
    )
    callbacks = [earlystopper, checkpointer, LogPrintingCallback()]
    if mparams['DECAY']:
        callbacks.append(get_lr_callback(mparams['BATCH_SIZE']))
        print('lr warmup and decay')
    else:
        callbacks.append(lrreducer)
        print('lr reduce on plateau')
    model.fit(
        X_trn, y_trn,
        validation_data=(X_val, y_val),
        epochs=mparams['EPOCHS'], 
        batch_size=mparams['BATCH_SIZE'],
        callbacks=callbacks, 
        verbose=0
    )
    if mparams['PSEUDO_LBL']:
        print('-' * 5, 'pseudo label training', '-' * 5)
        test_predict = model.predict(X_tst)
        model = get_model(
            len(cols), 
            units=mparams['UNITS'], 
            drop=mparams['DROPOUT'], 
            lbl_smooth=mparams['LBL_SMOOTH']
        )
        model.fit(
            np.vstack([X_trn, X_tst]),
            np.vstack([y_trn, test_predict]),
            validation_data=(X_val, y_val),
            epochs=mparams['EPOCHS'], 
            batch_size=mparams['BATCH_SIZE'],
            callbacks=callbacks, 
            verbose=0
        )
    model.load_weights(checkpoint_path)
    return model

In [None]:
if PARAMS['FEAT_IMP']:
    train, train_targets, test, _ = get_train_test(
        train_features_raw, train_targets_raw, test_features_raw, 
        r_comps=PARAMS['REDUCE_COMPS'],
        q_flag=PARAMS['QTRANS'], pipe_scaler=PARAMS['PIPE_SCALER'],
        seed=PARAMS['SEED']
    )
    perm_imps = np.zeros(train.shape[1])
    all_res = []
    mskf = MultilabelStratifiedKFold(
        n_splits=PARAMS['FOLDS'],     
        random_state=PARAMS['SEED'],         
        shuffle=True
    ).split(train_targets, train_targets)
    for n, (tr, te) in enumerate(mskf):
        print('=' * 10, f'feature importances | FOLD {n}', '=' * 10)
        model = train_model(
            mparams=PARAMS, 
            X_trn=train.values[tr], 
            y_trn=train_targets.values[tr], 
            X_val=train.values[te], 
            y_val=train_targets.values[te], 
            n_fold=n, 
            cols=train.columns,
            X_tst=None,
            seed=False
        )

        def _score(X, y):
            pred = model.predict(X)
            return _metric(y, pred, smooth=PARAMS['LBL_SMOOTH'])

        base_score, fold_imp = get_score_importances(
            _score, 
            train.values[te], train_targets.values[te], 
            n_iter=PARAMS['FEAT_IMP'], 
            random_state=PARAMS['SEED']
        )
        all_res.append(fold_imp)
        perm_imps += np.mean(fold_imp, axis=0)
        print('')
    top_feats = np.argwhere(perm_imps < 0).flatten()
    print('found features:', len(top_feats))
    with open(f'{MODELS_PATH}/top_feats.npy', 'wb') as file:
        np.save(file, top_feats)
elapsed_time = time.time() - start_time
print(f'time elapsed: {elapsed_time // 60:.0f} min {elapsed_time % 60:.0f} sec')

In [None]:
space = {
    #'FOLDS': hp.choice('FOLDS', [8]),
    #'BATCH_SIZE': hp.choice('BATCH_SIZE', [32, 64, 128]),
    #'DECAY': hp.choice('DECAY', [True]),
    #'PATIENCE': hp.choice('PATIENCE', [10]),
    #'UNITS': hp.choice('UNITS', [1024]),
    'DROPOUT': hp.quniform('DROPOUT', .1, .5, .01),
    'LBL_SMOOTH': hp.quniform('LBL_SMOOTH', .0001, .005, .00001),
    'REDUCE_COMPS': hp.choice('REDUCE_COMPS', [20, 10]),
    'THRESHOLD':  hp.quniform('THRESHOLD', 0, 5e-2, 1e-3),
    'PIPE_SCALER':  hp.choice('PIPE_SCALER', [1, 2, 3]),
    'QTRANS': hp.choice('QTRANS', [True, False]),
}
MAX_EVALS = 50

In [None]:
if PARAMS['HOPT']:
    PARAMS_OPT = PARAMS.copy()

    def objective(params):
        print('=' * 50, '\n', '=' * 50)
        PARAMS_OPT.update(params)
        train, train_targets, test, _ = get_train_test(
            train_features_raw, train_targets_raw, test_features_raw, 
            r_comps=PARAMS_OPT['REDUCE_COMPS'],
            q_flag=PARAMS_OPT['QTRANS'], pipe_scaler=PARAMS_OPT['PIPE_SCALER'],
            seed=PARAMS_OPT['SEED']
        )
        top_feats = list(range(len(train.columns)))
        print('top features:', len(top_feats))
        res = train_targets.copy()
        res.loc[:, train_targets.columns] = 0
        mskf = MultilabelStratifiedKFold(
            n_splits=PARAMS_OPT['FOLDS'],     
            random_state=PARAMS_OPT['SEED'],
            shuffle=True
        ).split(train_targets, train_targets)
        for n, (tr, te) in enumerate(mskf):
            print('=' * 10, f'HYPEROPT | FOLD {n}', '=' * 10)
            model = train_model(
                mparams=PARAMS_OPT, 
                X_trn=train.values[tr][:, top_feats], 
                y_trn=train_targets.values[tr], 
                X_val=train.values[te][:, top_feats], 
                y_val=train_targets.values[te], 
                n_fold=n, 
                cols=top_feats,
                X_tst=test.values[:, top_feats],
                seed=PARAMS_OPT['SEED']
            )
            val_predict = model.predict(train.values[te][:, top_feats])
            res.loc[te, train_targets.columns] += val_predict
            print('')
        oof_metric = metric(train_targets, res, smooth=PARAMS_OPT['LBL_SMOOTH'])
        print(f'\nparams: {params} | oof metric: {oof_metric}\n')
        return oof_metric

    best_hopt = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=MAX_EVALS)
    print('best search:', best_hopt, '\nbest params:', space_eval(space, best_hopt))
    PARAMS_OPT.update(space_eval(space, best_hopt))
    params_file = f'{MODELS_PATH}/hopt_params.json'
    with open(params_file, 'w') as file:
        json.dump(PARAMS_OPT, file)

elapsed_time = time.time() - start_time
print(f'time elapsed: {elapsed_time // 60:.0f} min {elapsed_time % 60:.0f} sec')

In [None]:
if not PARAMS['HOPT']:
    res = train_targets.copy()
    ssubm.loc[:, train_targets.columns] = 0
    res.loc[:, train_targets.columns] = 0
    for seed in range(PARAMS['SEEDS']):
        mskf = MultilabelStratifiedKFold(
            n_splits=PARAMS['FOLDS'],     
            random_state=PARAMS['SEED'] + seed,
            shuffle=True
        ).split(train_targets, train_targets)
        for n, (tr, te) in enumerate(mskf):
            print('=' * 10, f'SEED {seed} | FOLD {n}', '=' * 10)
            model = train_model(
                mparams=PARAMS, 
                X_trn=train.values[tr][:, top_feats], 
                y_trn=train_targets.values[tr], 
                X_val=train.values[te][:, top_feats], 
                y_val=train_targets.values[te], 
                n_fold=n, 
                cols=top_feats,
                X_tst=test.values[:, top_feats],
                seed=seed
            )
            test_predict = model.predict(test.values[:, top_feats])
            val_predict = model.predict(train.values[te][:, top_feats])
            ssubm.loc[:, train_targets.columns] += test_predict
            res.loc[te, train_targets.columns] += val_predict
            print('')
    ssubm.loc[:, train_targets.columns] /= (PARAMS['FOLDS'] * PARAMS['SEEDS'])
    res.loc[:, train_targets.columns] /= PARAMS['SEEDS']

elapsed_time = time.time() - start_time
print(f'time elapsed: {elapsed_time // 60:.0f} min {elapsed_time % 60:.0f} sec')

In [None]:
if not PARAMS['HOPT']:
    print(
        'params:', PARAMS,
        f'\nOOF metric: {metric(train_targets, res, smooth=PARAMS["LBL_SMOOTH"])}'
    )
    ssubm.loc[test['cp_type'] == 1, train_targets.columns] = 0
    ssubm.to_csv('submission_.csv', index=False)
    
    save_dict = PARAMS
    save_dict['OOF metric'] = metric(train_targets, res)
    if not os.path.exists('results.csv'):
        df_save = pd.DataFrame(save_dict, index=[0])
        df_save.to_csv('results.csv', sep='\t')
    else:
        df_old = pd.read_csv('results.csv', sep='\t', index_col=0)
        df_save = pd.DataFrame(save_dict, index=[df_old.index.max() + 1])
        df_save = df_old.append(df_save, ignore_index=True)
        df_save.to_csv('results.csv', sep='\t')

In [None]:
pd.read_csv('results.csv', sep='\t', index_col=0)