In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import os
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow.keras.backend as K
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
import tensorflow_addons as tfa
from tensorflow.keras import losses, backend
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from iterstrat.ml_stratifiers import MultilabelStratifiedKFold
from sklearn.model_selection import KFold
from sklearn.metrics import log_loss
from typing import Tuple, List, Callable, Any
from sklearn.utils import check_random_state
from tqdm import tqdm
print('tensorflow ver:', tf.__version__)
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
if gpu_devices:
    for gpu_device in gpu_devices:
        print('device available:', gpu_device)

In [None]:
KAGGLE = False
VER = 'v0'
if KAGGLE:
    DATA_PATH = '../input/lish-moa'
    MODELS_PATH = '.'
else:
    DATA_PATH = './data'
    MODELS_PATH = f'./models_{VER}'
    if not os.path.exists(MODELS_PATH):
        os.mkdir(MODELS_PATH)
        
def seed_all(seed=2020):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

PARAMS = {}
PARAMS['SEED'] = 2020
PARAMS['SEEDS'] = 5
PARAMS['FOLDS'] = 5
PARAMS['EPOCHS'] = 100
PARAMS['BATCH_SIZE'] = 128
PARAMS['DECAY'] = True
PARAMS['PATIENCE'] = 40
PARAMS['UNITS'] = 1048
PARAMS['DROPOUT'] = .5
PARAMS['FEAT_IMP'] = False
PARAMS['PSEUDO_LBL'] = False
PARAMS['LBL_SMOOTH'] = .000
PARAMS['WEIGHT_NORM'] = True

seed_all(PARAMS['SEED'])

In [None]:
train_features = pd.read_csv(f'{DATA_PATH}/train_features.csv')
train_targets = pd.read_csv(f'{DATA_PATH}/train_targets_scored.csv')
test_features = pd.read_csv(f'{DATA_PATH}/test_features.csv')
ssubm = pd.read_csv(f'{DATA_PATH}/sample_submission.csv')
print(
    'train features loaded:', train_features.shape,
    '\ntrain targets loaded:', train_targets.shape,
    '\ntest features loaded:', test_features.shape,
    '\nsubmission loaded:', ssubm.shape,
)

In [None]:
def preprocess(df):
    df = df.copy()
    df.loc[:, 'cp_type'] = df.loc[:, 'cp_type'].map({'trt_cp': 0, 'ctl_vehicle': 1})
    df.loc[:, 'cp_dose'] = df.loc[:, 'cp_dose'].map({'D1': 0, 'D2': 1})
    del df['sig_id']
    return df

def get_model(num_columns, units=2048, drop=.4, lbl_smooth=.001):
    if PARAMS['WEIGHT_NORM']:
        model = tf.keras.Sequential(
            [
                tf.keras.layers.Input(num_columns),
                tf.keras.layers.BatchNormalization(),
                #tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(units, activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(int(units / 2), activation='elu')
                ),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tfa.layers.WeightNormalization(
                    tf.keras.layers.Dense(206, activation='sigmoid')
                )
            ]
        )
    else:
        model = tf.keras.Sequential(
            [
                tf.keras.layers.Input(num_columns),
                tf.keras.layers.BatchNormalization(),
                #tf.keras.layers.Dropout(drop),
                tf.keras.layers.Dense(units, activation='elu'),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tf.keras.layers.Dense(int(units / 2), activation='elu'),
                tf.keras.layers.BatchNormalization(),
                tf.keras.layers.Dropout(drop),
                tf.keras.layers.Dense(206, activation='sigmoid')
            ]
        )
    model.compile(
        optimizer=tf.keras.optimizers.Adam(),
        loss=losses.BinaryCrossentropy(label_smoothing=lbl_smooth)
    )
    return model

def metric(y_true, y_pred):
    metrics = []
    for _target in train_targets.columns:
        metrics.append(
            log_loss(
                y_true.loc[:, _target], 
                y_pred.loc[:, _target].astype(float), 
                labels=[0, 1]
            )
        )
    return np.mean(metrics)

In [None]:
train = preprocess(train_features)
test = preprocess(test_features)
del train_targets['sig_id']
train_targets = train_targets.loc[train['cp_type'] == 0].reset_index(drop=True)
train = train.loc[train['cp_type'] == 0].reset_index(drop=True)
top_feats = [x for x in range(1, 875)]
print('total features:', len(top_feats))

In [None]:
class LogPrintingCallback(tf.keras.callbacks.Callback):
    
    def on_train_begin(self, logs=None):
        #self.val_score = []
        self.val_loss = []
        
    def on_epoch_end(self, epoch, logs=None):
        #self.val_score.append(logs['val_score'])
        self.val_loss.append(logs['val_loss'])
        if epoch % min(100, PARAMS['PATIENCE']) == 0 or epoch == (PARAMS['EPOCHS'] - 1):
            '''
            print(
                f"epoch {epoch + 1} | loss: {logs['loss']:.5f} | score: {logs['score']}",
                f"| val loss: {logs['val_loss']:.5f} | val score: {logs['val_score']}"
            )
            '''
            print(
                f"epoch {epoch + 1} | loss: {logs['loss']:.5f} | val loss: {logs['val_loss']:.5f}"
            )
            
    def on_train_end(self, lowest_val_loss, logs=None):
        best_epoch = np.argmin(self.val_loss)
        best_score = self.val_loss[best_epoch]
        print(f'best model at epoch {best_epoch + 1} | score: {best_score}')
        
def get_lr_callback(batch_size=10, epochs=100, warmup=.1, plot=False):
    lr_start = 0.00001
    lr_max = 10 * lr_start * batch_size
    lr_min = lr_start / 10
    lr_ramp_ep = epochs * warmup
    lr_sus_ep = 0
    lr_decay = 0.925
    
    def lr_scheduler(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) * lr_decay ** (epoch - lr_ramp_ep - lr_sus_ep) + lr_min
        return lr
        
    if not plot:
        lr_callback = tf.keras.callbacks.LearningRateScheduler(lr_scheduler, verbose=False)
        return lr_callback 
    else: 
        return lr_scheduler
    
if PARAMS['DECAY']:
    lr_scheduler_plot = get_lr_callback(
        batch_size=PARAMS['BATCH_SIZE'], 
        epochs=PARAMS['EPOCHS'], 
        plot=True
    )
    xs = [i for i in range(PARAMS['EPOCHS'])]
    y = [lr_scheduler_plot(x) for x in xs]
    plt.plot(xs, y)
    plt.title(f'lr schedule from {y[0]:.3f} to {max(y):.5f} to {y[-1]:.8f}')
    plt.show()

In [None]:
def iter_shuffled(X, cols_to_shuffle=None, pre_shuffle=False, random_state=None):
    rng = check_random_state(random_state)
    if cols_to_shuffle is None:
        cols_to_shuffle = range(X.shape[1])
    if pre_shuffle:
        X_shuffled = X.copy()
        rng.shuffle(X_shuffled)
    X_res = X.copy()
    for col in tqdm(cols_to_shuffle):
        if pre_shuffle:
            X_res[:, col] = X_shuffled[:, col]
        else:
            rng.shuffle(X_res[:, col])
        yield X_res
        X_res[:, col] = X[:, col]

def get_score_importances(score_func, X, y, n_iter=5, cols_to_shuffle=None, random_state=None):
    rng = check_random_state(random_state)
    base_score = score_func(X, y)
    scores_decreases = []
    for i in range(n_iter):
        scores_shuffled = _get_scores_shufled(
            score_func, 
            X, y, 
            cols_to_shuffle=cols_to_shuffle,
            random_state=rng, 
            base_score=base_score
        )
        scores_decreases.append(scores_shuffled)
    return base_score, scores_decreases

def _get_scores_shufled(score_func, X, y, base_score, cols_to_shuffle=None, random_state=None):
    Xs = iter_shuffled(X, cols_to_shuffle, random_state=random_state)
    res = []
    for X_shuffled in Xs:
        res.append(-score_func(X_shuffled, y) + base_score)
    return res

def _metric(y_true, y_pred):
    metrics = []
    for i in range(y_pred.shape[1]):
        if y_true[:, i].sum() > 1:
            metrics.append(
                log_loss(
                    y_true[:, i], 
                    y_pred[:, i].astype(float)
                )
            )
    return np.mean(metrics)

In [None]:
%%time
if PARAMS['FEAT_IMP']:
    perm_imps = np.zeros(train.shape[1])
    all_res = []
    mskf = MultilabelStratifiedKFold(
        n_splits=PARAMS['FOLDS'],     
        random_state=PARAMS['SEED'],         
        shuffle=True
    ).split(train_targets, train_targets)

    for n, (tr, te) in enumerate(mskf):
        print('=' * 10, f'feature importances | FOLD {n}', '=' * 10)
        model = get_model(
            len(train.columns), 
            units=PARAMS['UNITS'], 
            drop=PARAMS['DROPOUT'], 
            lbl_smooth=PARAMS['LBL_SMOOTH']
        )
        checkpoint_path = f'{MODELS_PATH}/feat_imp_fold_{n}.hdf5'
        earlystopper = EarlyStopping(
            monitor='val_loss', 
            patience=PARAMS['PATIENCE'], 
            verbose=0,
            mode='min'
        )
        lrreducer = ReduceLROnPlateau(
            monitor='val_loss', 
            factor=.1, 
            patience=int(PARAMS['PATIENCE'] / 2), 
            verbose=1, 
            min_lr=1e-5,
            mode='min'
        )
        checkpointer = ModelCheckpoint(
            checkpoint_path, 
            monitor='val_loss', 
            verbose=0, 
            save_best_only=True,
            save_weights_only=True, 
            mode='min'
        )
        callbacks = [earlystopper, checkpointer, LogPrintingCallback()]
        if PARAMS['DECAY']:
            callbacks.append(get_lr_callback(PARAMS['BATCH_SIZE']))
            print('lr warmup and decay')
        else:
            callbacks.append(lrreducer)
            print('lr reduce on plateau')
        model.fit(
            train.values[tr],
            train_targets.values[tr],
            validation_data=(train.values[te], train_targets.values[te]),
            epochs=PARAMS['EPOCHS'], 
            batch_size=PARAMS['BATCH_SIZE'],
            callbacks=callbacks, 
            verbose=0
        )
        model.load_weights(checkpoint_path)

        def _score(X, y):
            pred = model.predict(X)
            return _metric(y, pred)

        base_score, fold_imp = get_score_importances(
            _score, 
            train.values[te], train_targets.values[te], 
            n_iter=1, 
            random_state=PARAMS['SEED']
        )
        all_res.append(fold_imp)
        perm_imps += np.mean(fold_imp, axis=0)
        print('')
    top_feats = np.argwhere(perm_imps < 0).flatten()
    print('found features:', len(top_feats))
    print('features:\n', top_feats)
else:
    top_feats = [
        2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  29,
        30,  31,  32,  33,  35,  36,  37,  38,  39,  40,  41,  42,  43,
        44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,
        58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  71,
        72,  73,  74,  76,  77,  78,  79,  80,  81,  82,  83,  84,  86,
        88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100,
        101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
        114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
        127, 128, 130, 131, 133, 134, 136, 137, 138, 139, 140, 141, 142,
        143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
        156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
        169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
        183, 184, 185, 186, 188, 189, 190, 191, 192, 193, 195, 196, 197,
        198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210,
        211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 223, 225,
        226, 227, 228, 230, 231, 232, 233, 234, 236, 237, 238, 239, 240,
        241, 242, 243, 244, 245, 247, 248, 249, 250, 251, 252, 253, 254,
        256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268,
        269, 271, 272, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283,
        284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296,
        297, 299, 300, 301, 302, 303, 305, 306, 307, 308, 309, 310, 311,
        312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
        325, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338,
        339, 340, 341, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352,
        353, 354, 355, 356, 357, 358, 360, 361, 362, 363, 364, 365, 366,
        367, 368, 369, 370, 371, 372, 374, 375, 376, 377, 378, 379, 380,
        381, 382, 384, 385, 386, 387, 388, 390, 391, 392, 393, 394, 395,
        396, 397, 398, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409,
        410, 411, 412, 414, 415, 416, 417, 418, 419, 420, 421, 423, 424,
        425, 426, 427, 430, 432, 433, 434, 435, 436, 437, 438, 439, 440,
        441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 453, 454,
        455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467,
        468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 481,
        482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494,
        495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 507, 508,
        509, 510, 511, 512, 513, 514, 515, 517, 518, 519, 520, 521, 522,
        523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535,
        536, 537, 538, 539, 541, 542, 543, 544, 545, 546, 547, 548, 549,
        550, 551, 552, 553, 554, 555, 556, 557, 559, 560, 561, 562, 563,
        565, 566, 567, 568, 569, 570, 571, 572, 573, 575, 576, 577, 578,
        579, 580, 581, 582, 583, 584, 585, 586, 587, 589, 590, 591, 592,
        593, 594, 596, 597, 598, 599, 600, 601, 602, 603, 604, 606, 607,
        608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620,
        622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634,
        635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 648,
        649, 650, 651, 652, 654, 655, 656, 658, 659, 660, 661, 662, 663,
        664, 666, 667, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678,
        679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 690, 691, 692,
        693, 694, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706,
        707, 708, 709, 710, 711, 713, 714, 715, 716, 717, 718, 720, 721,
        723, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 737,
        738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 750, 751,
        752, 753, 754, 755, 756, 758, 759, 760, 761, 762, 764, 765, 766,
        767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779,
        780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 792, 793,
        794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806,
        807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819,
        820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832,
        833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 845, 846, 847,
        848, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861,
        862, 863, 864, 866, 867, 868, 869, 870, 871, 872, 873, 874
    ]
    print('top features loaded')

In [None]:
%%time
res = train_targets.copy()
ssubm.loc[:, train_targets.columns] = 0
res.loc[:, train_targets.columns] = 0
for seed in range(PARAMS['SEEDS']):
    mskf = MultilabelStratifiedKFold(
        n_splits=PARAMS['FOLDS'],     
        random_state=seed,
        shuffle=True
    ).split(train_targets, train_targets)
    for n, (tr, te) in enumerate(mskf):
        print('=' * 10, f'SEED {seed} | FOLD {n}', '=' * 10)
        model = get_model(
            len(top_feats), 
            units=PARAMS['UNITS'], 
            drop=PARAMS['DROPOUT'], 
            lbl_smooth=PARAMS['LBL_SMOOTH']
        )
        checkpoint_path = f'{MODELS_PATH}/seed_{seed}_fold_{n}.hdf5'
        earlystopper = EarlyStopping(
            monitor='val_loss', 
            patience=PARAMS['PATIENCE'], 
            verbose=0,
            mode='min'
        )
        lrreducer = ReduceLROnPlateau(
            monitor='val_loss', 
            factor=.1, 
            patience=int(PARAMS['PATIENCE'] / 2), 
            verbose=1, 
            min_lr=1e-5,
            mode='min'
        )
        checkpointer = ModelCheckpoint(
            checkpoint_path, 
            monitor='val_loss', 
            verbose=0, 
            save_best_only=True,
            save_weights_only=True, 
            mode='min'
        )
        callbacks = [earlystopper, checkpointer, LogPrintingCallback()]
        if PARAMS['DECAY']:
            callbacks.append(get_lr_callback(PARAMS['BATCH_SIZE']))
            print('lr warmup and decay')
        else:
            callbacks.append(lrreducer)
            print('lr reduce on plateau')
        model.fit(
            train.values[tr][:, top_feats],
            train_targets.values[tr],
            validation_data=(train.values[te][:, top_feats], train_targets.values[te]),
            epochs=PARAMS['EPOCHS'], 
            batch_size=PARAMS['BATCH_SIZE'],
            callbacks=callbacks, 
            verbose=0
        )
        if PARAMS['PSEUDO_LBL']:
            print('-' * 5, 'pseudo label training', '-' * 5)
            test_predict = model.predict(test.values[:, top_feats])
            model = get_model(
                len(top_feats), 
                units=PARAMS['UNITS'], 
                drop=PARAMS['DROPOUT'], 
                lbl_smooth=PARAMS['LBL_SMOOTH']
            )
            model.fit(
                np.vstack([train.values[tr][:, top_feats], test.values[:, top_feats]]),
                np.vstack([train_targets.values[tr], test_predict]),
                validation_data=(train.values[te][:, top_feats], train_targets.values[te]),
                epochs=PARAMS['EPOCHS'], 
                batch_size=PARAMS['BATCH_SIZE'],
                callbacks=callbacks, 
                verbose=0
            )
        model.load_weights(checkpoint_path)
        test_predict = model.predict(test.values[:, top_feats])
        val_predict = model.predict(train.values[te][:, top_feats])
        ssubm.loc[:, train_targets.columns] += test_predict
        res.loc[te, train_targets.columns] += val_predict
        print('')
ssubm.loc[:, train_targets.columns] /= ((n + 1) * PARAMS['SEEDS'])
res.loc[:, train_targets.columns] /= PARAMS['SEEDS']

In [None]:
print(
    'params:', PARAMS,
    f'\nOOF metric: {metric(train_targets, res)}'
)
ssubm.loc[test['cp_type'] == 1, train_targets.columns] = 0
ssubm.to_csv('submission.csv', index=False)

params: {'SEED': 2020, 'SEEDS': 5, 'FOLDS': 5, 'EPOCHS': 100, 'BATCH_SIZE': 128, 'DECAY': True, 'PATIENCE': 40, 'UNITS': 1048, 'DROPOUT': 0.5, 'FEAT_IMP': False, 'PSEUDO_LBL': False, 'LBL_SMOOTH': 0.0, 'WEIGHT_NORM': False} 
OOF metric: 0.015605982651243869

params: {'SEED': 2020, 'SEEDS': 5, 'FOLDS': 5, 'EPOCHS': 100, 'BATCH_SIZE': 128, 'DECAY': True, 'PATIENCE': 40, 'UNITS': 1048, 'DROPOUT': 0.5, 'FEAT_IMP': False, 'PSEUDO_LBL': False, 'LBL_SMOOTH': 0.0, 'WEIGHT_NORM': False} 
OOF metric: 0.015545521344440608

params: {'SEED': 2020, 'SEEDS': 5, 'FOLDS': 5, 'EPOCHS': 100, 'BATCH_SIZE': 128, 'DECAY': True, 'PATIENCE': 40, 'UNITS': 1048, 'DROPOUT': 0.5, 'FEAT_IMP': False, 'PSEUDO_LBL': False, 'LBL_SMOOTH': 0.0, 'WEIGHT_NORM': True} 
OOF metric: 0.015582580824242315