In [None]:
#######################
###  Global paths   ###
#######################
custom_module_path = f'/content/drive/MyDrive/MoA/utilites'
dataset_path = f'/content/drive/MyDrive/MoA/dataset'

In [None]:
#######################
### Library imports ###
#######################
# standard library
import os
import sys

# data packages
import numpy as np
import pandas as pd

# tensorflow
import tensorflow as tf

# sklearn 
from sklearn.metrics import f1_score
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split

#custom tooling
sys.path.append(custom_module_path)
import preprocess
import resnet 

In [None]:
###################
###  Utilities  ###
###################

# Implementation BCEWithLogitsLoss of pytorch with keras
# https://stackoverflow.com/questions/59669860/implementing-bcewithlogitsloss-from-pytorch-in-keras

def split_data(X,y, size_test=0.1):
    X_tr, X_test, y_tr, y_test = train_test_split(X, y, test_size=size_test, random_state=19)
    return X_tr,X_test,y_tr,y_test 

def predict_proba(preds):
    preds_proba = 1 / (1 + np.exp(-preds))
    return preds_proba.astype("float32")

def multi_log_loss(y_pred, y_true):
    losses = -y_true * np.log(y_pred + 1e-15) - (1 - y_true) * np.log(1 - y_pred + 1e-15)
    return np.mean(losses)

def preprocess_data(X,y):
    transformer = preprocess.Preprocessor() 
    transformer.fit(X)
    X = transformer.transform(X)
    y = y.drop(["sig_id"], axis = 1).values.astype("float32") 
    return pd.DataFrame(X),pd.DataFrame(y)

def reshape_data(data):
    nrows, nclos = data.shape
    return data.reshape(nrows, nclos,1)

def get_f1_score(model, X_val, y_val, type_):
    y_predict = np.argmax(model.predict(X_val), axis=1) 
    y_true = np.argmax(y_val,axis=1)
    f1 = f1_score(y_true, y_predict, average = type_)
    return f1 

def cross_validation(X,Y,models):
    kf = KFold(n_splits = 5, shuffle= True)

    for m, values in models.items():
        print(f'Cross Validation for model {values[0]}\n')

        for train_index, val_index in kf.split(X):
          X_train, X_val = X.iloc[train_index,], X.iloc[val_index,]
          y_train, y_val = Y.iloc[train_index], Y.iloc[val_index]
          
          X_train, X_val = np.array(X_train), np.array(X_val)
          y_train, y_val = y_train.values.astype("float32"), y_val.values.astype("float32")
          
          opt = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-5)
          m.compile(opt, loss='binary_crossentropy', metrics=['accuracy'])
          m.fit(X_train, y_train, validation_data=(X_val, y_val), batch_size=32, epochs=3, verbose=1)

          values.append(get_f1_score(m, X_val, y_val,'weighted'))

        print(f'Done model {values[0]}\n')
    print(f'Done')

In [None]:
###################
###    Models   ###
###################
length = 877  # Length of each Segment
model_name = ''  
model_width = 64  # Width of the Initial Layer, subsequent layers start from here
num_channel = 1   # Number of Input Channels in the Model
problem_type = 'Classification' 
output_nums = 206 

models = {
    resnet.ResNet(length, num_channel, model_width, problem_type=problem_type, output_nums=output_nums, pooling='avg',dropout_rate=0.2).ResNet18():['ResNet_18'],
    resnet.ResNet(length, num_channel, model_width, problem_type=problem_type, output_nums=output_nums, pooling='avg',dropout_rate=0.2).ResNet_18_LSTM():['ResNet_18_LSTM'],
    resnet.ResNet(length, num_channel, model_width, problem_type=problem_type, output_nums=output_nums, pooling='avg',dropout_rate=0.2).ResNet34():['ResNet_34'],
    resnet.ResNet(length, num_channel, model_width, problem_type=problem_type, output_nums=output_nums, pooling='avg',dropout_rate=0.2).ResNet50():['ResNet_50'],
    #resnet.ResNet(length, num_channel, model_width, problem_type=problem_type, output_nums=output_nums, pooling='avg',dropout_rate=0.2).ResNet101():['ResNet_101'],
    #resnet.ResNet(length, num_channel, model_width, problem_type=problem_type, output_nums=output_nums, pooling='avg',dropout_rate=0.2).ResNet152():['ResNet_152']
}

In [None]:
drugs = pd.read_csv(f'{dataset_path}/train_drug.csv')
train_drug = pd.read_csv(f'{dataset_path}/train_drug.csv')
X = pd.read_csv(f'{dataset_path}/train_features.csv')
y = pd.read_csv(f'{dataset_path}/train_targets_scored.csv')

X,y = preprocess_data(X,y)
X_train, X_test, y_train, y_test = split_data(X,y)
#X_train, X_val, y_train, y_val = split_data(X_train,y_train,0.2)

In [None]:
cross_validation(X_train,y_train,models)

Cross Validation for model ResNet_18

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Done model ResNet_18

Cross Validation for model ResNet_18_LSTM

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Done model ResNet_18_LSTM

Cross Validation for model ResNet_34

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Done model ResNet_34

Cross Validation for model ResNet_50

Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Epoch 1/3
Epoch 2/3
Epoch 3/3
Done model ResNet_50

Done


In [None]:
for m,values in models.items():
    print(f' model {values[0]} weighted f1-score mean is {np.mean(values[1:])}')

 model ResNet_18 weighted f1-score mean is 0.10366114297063249
 model ResNet_18_LSTM weighted f1-score mean is 0.00032719568655407126
 model ResNet_34 weighted f1-score mean is 0.06086424453373086
 model ResNet_50 weighted f1-score mean is 0.02576946283198725
