In [1]:
import numpy as np
import pandas as pd

import os, gc, random
from sklearn.model_selection import GroupKFold
from sklearn.metrics import f1_score

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import Callback, LearningRateScheduler
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tensorflow.keras import losses, models, optimizers
import tensorflow_addons as tfa
from tf_nn_utils import *

import warnings
warnings.simplefilter('ignore')
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 1000)
pd.set_option('display.max_rows', 500)

# set gpu memory growth
gpus= tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

seed_everything(42)

In [2]:
# configurations and main hyperparammeters
nn_epochs = 100
nn_batch_size = 16
class_num = 6
LR = 0.001

In [3]:
df_train_raw = pd.read_pickle('../features/train_clean.pkl')
df_test_raw = pd.read_pickle('../features/test_clean.pkl')
TARGET = "open_channels"
df_test_raw[TARGET] = 0

# RFC features
Y_train_proba = np.load("../features/Y_train_proba.npy")
Y_test_proba = np.load("../features/Y_test_proba.npy")
Y_train_proba = np.delete(Y_train_proba, list(range(3500000, 4000000)), 0)
for i in range(11):
    df_train_raw[f"proba_{i}"] = Y_train_proba[:, i]
    df_test_raw[f"proba_{i}"] = Y_test_proba[:, i]

print(f"train size:{df_train_raw.shape}, test size:{df_test_raw.shape}")

train size:(4500000, 17), test size:(2000000, 18)


In [4]:
# feature engineering here
def fe(df, is_train):

    df["group"] = df["batch"].astype("str") + "_" + df["mini_batch"].astype("str")
    
    # shift features
    for shift_val in range(1, 4):
        group_on = "batch" if is_train else "group"
        df[f'shift+{shift_val}'] = df.groupby([group_on])['signal'].shift(shift_val).fillna(0)
        df[f'shift_{shift_val}'] = df.groupby([group_on])['signal'].shift(-shift_val).fillna(0)
    
    return df


df_train_raw = fe(df_train_raw, is_train=1)
df_test_raw = fe(df_test_raw, is_train=0)

In [5]:
df_train_raw.head()

Unnamed: 0,time,signal,open_channels,local_time,batch,mini_batch,proba_0,proba_1,proba_2,proba_3,proba_4,proba_5,proba_6,proba_7,proba_8,proba_9,proba_10,group,shift+1,shift_1,shift+2,shift_2,shift+3,shift_3
0,0.0001,-2.76,0,0.0001,1,1,0.966731,0.028343,0.004812,0.000114,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1_1,0.0,-2.8557,0.0,-2.4074,0.0,-3.1404
1,0.0002,-2.8557,0,0.0002,1,1,0.996045,0.003466,0.000426,6.3e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1_1,-2.76,-2.4074,0.0,-3.1404,0.0,-3.1525
2,0.0003,-2.4074,0,0.0003,1,1,0.976313,0.018989,0.004677,2.1e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1_1,-2.8557,-3.1404,-2.76,-3.1525,0.0,-2.6418
3,0.0004,-3.1404,0,0.0004,1,1,0.996002,0.003625,0.000326,4.6e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1_1,-2.4074,-3.1525,-2.8557,-2.6418,-2.76,-2.6993
4,0.0005,-3.1525,0,0.0005,1,1,0.997465,0.002335,0.000158,4.2e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1_1,-3.1404,-2.6418,-2.4074,-2.6993,-2.8557,-2.5935


In [6]:
use_cols = [
    col for col in df_train_raw.columns if col not in
    ["time", "local_time", "open_channels", "batch", "mini_batch", "group"]
]
print("Used columns is", use_cols)

Used columns is ['signal', 'proba_0', 'proba_1', 'proba_2', 'proba_3', 'proba_4', 'proba_5', 'proba_6', 'proba_7', 'proba_8', 'proba_9', 'proba_10', 'shift+1', 'shift_1', 'shift+2', 'shift_2', 'shift+3', 'shift_3']


In [7]:
SEQ_LEN = 500
def chop_seq(df_batch_i, is_train):

    df_batch_i_features = []
    df_batch_i_y = []
    df_batch_i_group = []
    
    WHOLE_LEN = 5e5 if is_train else 1e5
    
    for i in range(int(WHOLE_LEN/SEQ_LEN)):

        # (SEQ_LEN, 5)
        tmp = df_batch_i[(SEQ_LEN * i):(SEQ_LEN * (i + 1))]
        df_batch_i_features.append(tmp[use_cols].values)
        df_batch_i_y.append(tmp[TARGET].values)
        df_batch_i_group.append(tmp["group"].values)

    return df_batch_i_features, df_batch_i_y, df_batch_i_group

In [8]:
# TRAIN
df_train = []
df_train_y = []
groups = []

for batch_i in [1, 2, 6, 9]:
    df_batch_i = df_train_raw[df_train_raw.batch == batch_i]
    df_batch_i_features, df_batch_i_y, df_batch_i_group = chop_seq(df_batch_i, is_train=1)
    df_train.append(df_batch_i_features)
    df_train_y.append(df_batch_i_y)
    groups.append(df_batch_i_group)

df_train = np.array(df_train).reshape(
    [-1, SEQ_LEN, np.array(df_train).shape[-1]])
df_train_y = np.array(df_train_y).reshape([-1, SEQ_LEN])
groups = np.array(groups).reshape([-1, SEQ_LEN])[:,0]

print("TRAIN:", df_train.shape, df_train_y.shape)

TRAIN: (4000, 500, 18) (4000, 500)


In [9]:
# TEST
df_test = []
df_test_y = []
df_test_groups = []

mini_batch_list = [[3,3]]
for batch_i, mini_batch_i in mini_batch_list:
    df_batch_i = df_test_raw[(df_test_raw.batch == batch_i) & (df_test_raw.mini_batch == mini_batch_i)]
    df_batch_i_features, df_batch_i_y, df_test_batch_i_group = chop_seq(df_batch_i, is_train=0)
    df_test.append(df_batch_i_features)
    df_test_y.append(df_batch_i_y)
    df_test_groups.append(df_test_batch_i_group)

df_test = np.array(df_test).reshape(
    [-1, SEQ_LEN, np.array(df_test).shape[-1]])
df_test_y = np.array(df_test_y).reshape([-1, SEQ_LEN])
df_test_groups = np.array(df_test_groups).reshape([-1, SEQ_LEN])[:,0]

print("TEST:", df_test.shape, df_test_y.shape)

TEST: (200, 500, 18) (200, 500)


In [10]:
# model function (very important, you can try different arquitectures to get a better score. I believe that top public leaderboard is a 1D Conv + RNN style)
def Classifier(shape_):
    
    def cbr(x, out_layer, kernel, stride, dilation):
        x = Conv1D(out_layer, kernel_size=kernel, dilation_rate=dilation, strides=stride, padding="same")(x)
        x = BatchNormalization()(x)
        x = Activation("relu")(x)
        return x
    
    def wave_block(x, filters, kernel_size, n):
        dilation_rates = [2**i for i in range(n)]
        x = Conv1D(filters = filters,
                   kernel_size = 1,
                   padding = 'same')(x)
        res_x = x
        for dilation_rate in dilation_rates:
            tanh_out = Conv1D(filters = filters,
                              kernel_size = kernel_size,
                              padding = 'same', 
                              activation = 'tanh', 
                              dilation_rate = dilation_rate)(x)
            sigm_out = Conv1D(filters = filters,
                              kernel_size = kernel_size,
                              padding = 'same',
                              activation = 'sigmoid', 
                              dilation_rate = dilation_rate)(x)
            x = Multiply()([tanh_out, sigm_out])
            x = Conv1D(filters = filters,
                       kernel_size = 1,
                       padding = 'same')(x)
            res_x = Add()([res_x, x])
        return res_x
    
    inp = Input(shape = (shape_))
    x = cbr(inp, 64, 5, 1, 1)
    x = BatchNormalization()(x)
    x = wave_block(x, 32, 3, 8)
    x = BatchNormalization()(x)
    x = wave_block(x, 64, 3, 4)
    x = BatchNormalization()(x)
    x = wave_block(x, 128, 3, 1)
    x = cbr(x, 32, 5, 1, 1)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    out = Dense(class_num, activation = 'softmax', name = 'out')(x)
    
    model = models.Model(inputs = inp, outputs = out)
    
    opt = Adam(lr = LR)
    opt = tfa.optimizers.SWA(opt)
    model.compile(loss = losses.CategoricalCrossentropy(), optimizer = opt, metrics = ['accuracy'])
    return model

In [11]:
K.clear_session()
config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,inter_op_parallelism_threads=1)
config.gpu_options.allow_growth = True
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=config)
tf.compat.v1.keras.backend.set_session(sess)
oof_ = np.zeros([df_train.shape[0], df_train.shape[1], class_num])
preds_ = np.zeros((df_test.shape[0] * df_test.shape[1], class_num))

In [12]:
df_train_y = pd.get_dummies(df_train_y.reshape([-1])).values.reshape([-1, SEQ_LEN, class_num])
df_test_y = np.zeros([df_train_y.shape[0], df_train_y.shape[1], class_num])

In [13]:
gkf = GroupKFold(n_splits=5)

for index, (tr_idx, val_idx) in enumerate(gkf.split(df_train, df_train_y, groups)):
    train_x, train_y = df_train[tr_idx], df_train_y[tr_idx]
    valid_x, valid_y = df_train[val_idx], df_train_y[val_idx]
    print(f'Our training dataset shape is {train_x.shape}')
    print(f'Our validation dataset shape is {valid_x.shape}')
    print(train_x.shape, train_y.shape)
    print("Evaluate on", np.unique(groups[val_idx]))
    shape_ = (None, train_x.shape[2])
    model = Classifier(shape_)
    cb_lr_schedule = LearningRateScheduler(lr_schedule)
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1)

    model.fit(train_x,train_y,
              epochs = nn_epochs,
              callbacks = [cb_lr_schedule, early_stop], #MacroF1(model, valid_x, valid_y) 
              batch_size = nn_batch_size,verbose = 2,
              validation_data = (valid_x,valid_y))
    preds_f = model.predict(valid_x)
    f1_score_ = f1_score(np.argmax(valid_y, axis=2).reshape(-1),  np.argmax(preds_f, axis=2).reshape(-1), average = 'macro') 
    print(f'Training fold {index + 1} completed. macro f1 score : {f1_score_ :1.5f}')
    oof_[val_idx] += preds_f
    te_preds = model.predict(df_test)
    te_preds = te_preds.reshape(-1, te_preds.shape[-1])           
    preds_ += te_preds / 5

Our training dataset shape is (3200, 500, 18)
Our validation dataset shape is (800, 500, 18)
(3200, 500, 18) (3200, 500, 6)
Evaluate on ['1_5' '2_5' '6_5' '9_5']
Train on 3200 samples, validate on 800 samples
Epoch 1/100
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
3200/3200 - 13s - loss: 0.1395 - accuracy: 0.9658 - val_loss: 0.1029 - val_accuracy: 0.9869
Epoch 2/100
3200/3200 - 4s - loss: 0.0527 - accuracy: 0.9878 - val_loss: 0.0400 - val_accuracy: 0.9887
Epoch 3/100
3200/3200 - 4s - loss: 0.0572 - accuracy: 0.9862 - val_loss: 0.0413 - val_accuracy: 0.9886
Epoch 4/100
3200/3200 - 3s - loss: 0.0440 - accuracy: 0.9880 - val_loss: 0.0369 - val_accuracy: 0.9888
Epoch 5/100
3200/3200 - 3s - loss: 0.0414 - accuracy: 0.9881 - val_loss: 0.0348 - val_accuracy: 0.9889
Epoch 6/100
3200/3200 - 3s - loss: 0.0405 - accuracy: 0.9881 - val_loss: 0.0351 - val_accuracy: 0.9889
Epoch 7/100
3200/3200 - 4s - loss: 0.0398 - accuracy: 0.9882 - val_loss: 0.0339 - val_accur

Epoch 17/100
3200/3200 - 3s - loss: 0.0358 - accuracy: 0.9884 - val_loss: 0.0352 - val_accuracy: 0.9885
Epoch 18/100
3200/3200 - 3s - loss: 0.0357 - accuracy: 0.9885 - val_loss: 0.0329 - val_accuracy: 0.9887
Epoch 19/100
3200/3200 - 3s - loss: 0.0355 - accuracy: 0.9884 - val_loss: 0.0330 - val_accuracy: 0.9888
Epoch 20/100
3200/3200 - 3s - loss: 0.0347 - accuracy: 0.9885 - val_loss: 0.0325 - val_accuracy: 0.9886
Epoch 21/100
3200/3200 - 4s - loss: 0.0341 - accuracy: 0.9885 - val_loss: 0.0325 - val_accuracy: 0.9888
Epoch 22/100
3200/3200 - 3s - loss: 0.0341 - accuracy: 0.9886 - val_loss: 0.0324 - val_accuracy: 0.9886
Epoch 23/100
3200/3200 - 3s - loss: 0.0340 - accuracy: 0.9887 - val_loss: 0.0322 - val_accuracy: 0.9888
Epoch 24/100
3200/3200 - 3s - loss: 0.0336 - accuracy: 0.9886 - val_loss: 0.0324 - val_accuracy: 0.9886
Epoch 25/100
3200/3200 - 3s - loss: 0.0334 - accuracy: 0.9887 - val_loss: 0.0322 - val_accuracy: 0.9887
Epoch 26/100
3200/3200 - 3s - loss: 0.0335 - accuracy: 0.9886 - 

In [14]:
# goal: NA
# for now: 0.9824
print("oof F1 score is", f1_score(oof_.reshape([-1,class_num]).argmax(axis=1), df_train_y.reshape([-1,class_num]).argmax(axis=1), average = 'macro'))

oof F1 score is 0.9824311667162924


In [16]:
oof_pred = oof_.reshape([-1,class_num]).argmax(axis=1)
test_pred = preds_.argmax(axis=1)

print(f"oof shape is {oof_pred.shape}, test pred shape is {test_pred.shape}")
# # save oof and prediction
# np.save('oof/oof_model_7.npy', oof_pred)
# np.save('pred/pred_model_7.npy', test_pred)

oof shape is (2000000,), test pred shape is (100000,)
