In [None]:
import numpy as np
import pandas as pd
from google.colab import drive
#drive.mount('/content/gdrive')

import tensorflow as tf
from keras import layers, models, optimizers, Sequential
from sklearn.metrics import accuracy_score, log_loss, make_scorer
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import OneHotEncoder

train = pd.read_csv('/content/gdrive/MyDrive/dacon_exercise/train_features.csv')
test = pd.read_csv('/content/gdrive/MyDrive/dacon_exercise/test_features.csv')
label = pd.read_csv('/content/gdrive/MyDrive/dacon_exercise/train_labels.csv')

# add the feature(total energy of acc)
train['acc_t']  = train.apply(lambda x : (x['acc_x']**2 + x['acc_y'] **2 +  x['acc_z'] ** 2 )**(1/3), axis=1)
test['acc_t']  = test.apply(lambda x : (x['acc_x']**2 + x['acc_y'] **2 +  x['acc_z'] ** 2 )**(1/3), axis=1)

# convert the data to 1d (600, 1, 7) shape
def convert_1d(data):
    colname = data.columns
    converted = np.dstack((data.loc[:, colname[0]], data.loc[:, colname[1]],data.loc[:, colname[2]], data.loc[:, colname[3]], data.loc[:, colname[4]], data.loc[:, colname[5]], data.loc[:, colname[6]]))
    return converted


def data_to_img(data):
    train_sub = []
    

    for i in range(len(data)//600):
        data_sub = data.iloc[i*600:(i+1)*600, 2:].reset_index(drop=True)
        data_sub_converted = convert_1d(data_sub)
        train_sub.append(data_sub_converted)

    return np.array(train_sub).reshape(len(train_sub),600, 7)

train_img = data_to_img(train)
test_img = data_to_img(test)

# stratified split of train data for class imbalance
strat = StratifiedShuffleSplit(n_splits=1, test_size = 0.2, random_state=1004)
for train_index, valid_index in strat.split(train_img, label['label']):
    x_train, x_valid = train_img[train_index], train_img[valid_index]
    y_train, y_valid = label_onehot[train_index], label_onehot[valid_index]
    
# residual block
def Residual_Block(x, n_ch, kernel_size, leaky_alpha):
    skip_connection = x
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha=leaky_alpha)(x)
    x = layers.Conv1D(n_ch, kernel_size=kernel_size, strides=1, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.LeakyReLU(alpha = leaky_alpha)(x)
    x = layers.Conv1D(n_ch, kernel_size=kernel_size, strides=1, padding='same')(x)
    x = layers.add([x, skip_connection])
    
    return x
# make the hypermodel for 1d-resnet
from kerastuner import HyperModel, Objective
import tensorflow as tf
from kerastuner.tuners import BayesianOptimization
import keras.backend as K

class MyHyperModel(HyperModel):
    
    def build(self, hp):
        inputs = layers.Input(shape=(600,7), name='inputs')
        filter = hp.Int('num_filters',min_value = 32, max_value = 128, step = 32, default = 64)
        x = layers.Conv1D(filters = filter, kernel_size = hp.Choice('kernel1', [1, 3, 5, 7]), padding = 'valid')(inputs)
        x = layers.AveragePooling1D(2)(x)
        num_layers = hp.Int('num_layers', 1, 3)
        for i in range(num_layers):
            kernel_size = hp.Int('kernel2', 1, 7, default = 3)
            x = Residual_Block(x, filter, kernel_size = hp.Choice('kernel3', [1, 3, 5, 7]),leaky_alpha = hp.Float('alpha1', 0.0, 1.0, step = 0.01, default = 0.03))
            x = Residual_Block(x, filter, kernel_size = hp.Choice('kernel4', [1, 3, 5, 7]),leaky_alpha = hp.Float('alpha2', 0.0, 1.0, step = 0.01, default = 0.03))
            x = layers.BatchNormalization()(x)
            
            filter = filter*2

            if i != num_layers-1:
                x = layers.Conv1D(filter, kernel_size = hp.Choice('kernel5', [1, 3, 5, 7]), strides=1, padding='same')(x)
            else:
                x = x

        x = layers.GlobalAveragePooling1D(name = 'GlobalAveragePooling')(x)
        output = layers.Dense(61, activation='softmax')(x)

        model = models.Model([inputs],[output])
        model.compile(optimizer = optimizers.Adam(learning_rate=hp.Float('lr', 0.001, 0.01, step = 0.001)), loss = 'categorical_crossentropy', metrics = ['accuracy'])

        return model

hypermodel = MyHyperModel()

from keras.callbacks import EarlyStopping
es = EarlyStopping(monitor = 'val_loss', patience = 4)

tuner_BO = BayesianOptimization(
            hypermodel,
            objective='val_loss',
            max_trials=50,
            seed=42,
            executions_per_trial=2,
        )
tuner_BO.search(x_train, y_train, epochs=50, validation_data=(x_valid, y_valid), verbose=1, callbacks = [es], batch_size = 15)
best_model = tuner_BO.get_best_models(num_models=1)[0]
best_model.evaluate(x_valid, y_valid)

best_model.save('/content/gdrive/MyDrive/dacon_exercise/resnet_7channel_BO.h5')

# define the CNN model which has the output of layer GlobalAvergaPooling(get the 128 features from CNN)
best_model = models.load_model('/content/gdrive/MyDrive/dacon_exercise/resnet_7channel_BO.h5')
model_global = models.Model(best_model.input, best_model.get_layer('GlobalAveragePooling').output)
train_global = model_global.predict(train_img)
test_global = model_global.predict(test_img)

# gridsearch for catboost
from catboost import CatBoostClassifier
from sklearn.model_selection import GridSearchCV
clf = CatBoostClassifier(task_type='GPU', border_count=None)
params = {'iterations': [600, 700, 800, 900, 1000],
          'depth': [4, 5, 6],
          'loss_function': ['MultiClass'],
          'l2_leaf_reg': np.logspace(-20, -19, 3),
          'leaf_estimation_iterations': [10],
           'eval_metric': ['Accuracy'],
          'logging_level':['Silent'],
          'random_seed': [42]
         }

from sklearn.model_selection import PredefinedSplit
split_index = [-1 if x in train_index else 0 for x in np.arange(3125)]
pds = PredefinedSplit(test_fold = split_index)

from sklearn.metrics import log_loss, make_scorer
LogLoss = make_scorer(log_loss, greater_is_better=False, needs_proba=True)

clf_grid = GridSearchCV(estimator=clf, param_grid=params, scoring=LogLoss, cv=pds, verbose = 10)
clf_grid.fit(X = train_global, y = label['label'])

from catboost import CatBoostClassifier
cat = CatBoostClassifier(task_type = 'GPU', loss_function='MultiClass', depth=4, eval_metric='Accuracy', iterations=800, l2_leaf_reg=1e-20, leaf_estimation_iterations=10, random_seed=42, verbose = 10)

for train_index, valid_index in strat.split(train_img, label['label']):
    x_train, x_valid = train_global[train_index], train_global[valid_index]
    y_train, y_valid = label['label'][train_index], label['label'][valid_index]

cat.fit(x_train, y_train)

# submission file
id = pd.DataFrame(test['id'].unique(), columns = ['id'])
test_pred = cat.predict_proba(test_global)
test_pred = pd.concat([id, pd.DataFrame(test_pred)], axis = 1)
test_pred.to_csv('/content/gdrive/MyDrive/dacon_exercise/resnet_7channel_BO_cat_2.csv', index = False)
