# Convolutional AE con Keras



In [None]:
from keras.models import Model

In [None]:
# Generic
# numpy and pandas
import numpy as np
import pandas as pd
import math

# Generic
import os
import matplotlib.pyplot as plt
from IPython.display import clear_output

# Images
from PIL import Image
from skimage.transform import resize
import cv2
# import talos as ta

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, confusion_matrix, plot_confusion_matrix, roc_curve


# Tensorflow
import tensorflow as tf
# import tensorflow.compat.v1 as tf

# Keras
from keras.layers import Input, Dense, Dropout, BatchNormalization, Conv2D, MaxPooling2D, UpSampling2D
from keras.utils import print_summary
from keras.models import Model, load_model
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping, ReduceLROnPlateau, Callback
from keras.applications.densenet import DenseNet121
from keras.applications.densenet import preprocess_input
from keras.preprocessing import image
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K
from keras import backend as K

## Load Path

In [None]:
path_images = '/home/jesusprada/proyecto_python/x-ray/data/covid_images'
path_data = '/home/jesusprada/proyecto_python/x-ray/data'
path_results = '/home/jesusprada/proyecto_python/x-ray/results_mbit'

# Load Classes and Functions

### CheXnet

In [3]:
# chexNet weights
chexnet_weights = '/home/ygala/TFM_UOC/scripts/chexnet/best_weights.h5'

def chexnet_preprocess_input(value):
    return preprocess_input(value)


def get_chexnet_model():
    input_shape = (224, 224, 3)
    img_input = Input(shape=input_shape)
    base_weights = 'imagenet'

    # create the base pre-trained model
    base_model = DenseNet121(
        include_top=False,
        input_tensor=img_input,
        input_shape=input_shape,
        weights=base_weights,
        pooling='avg'
    )

    x = base_model.output
    # add a logistic layer -- let's say we have 14 classes
    predictions = Dense(
        14,
        activation='sigmoid',
        name='predictions')(x)

    # this is the model we will use
    model = Model(
        inputs=img_input,
        outputs=predictions,
    )

    # load chexnet weights
    model.load_weights(chexnet_weights)

    # return model
    return base_model, model

### Auxiliary functions

In [4]:

def get_class_weight(csv_file_path, target_class):
    df = pd.read_csv(csv_file_path, sep=';')
    total_counts = df.shape[0]
    class_weight = []

    ratio_pos = df.loc[(df[target_class] == 'Y')].shape[0] / total_counts
    ratio_neg = df.loc[(df[target_class] == 'N')].shape[0] / total_counts
    class_weight = np.array((ratio_pos, ratio_neg))
        
    return class_weight

def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true, y_pred)[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc


def print_confidence_intervals(statistics):
    df = pd.DataFrame(columns=["Mean AUC (CI 5%-95%)"])
    mean = statistics.mean()
    max_ = np.quantile(statistics, .95)
    min_ = np.quantile(statistics, .05)
    df.loc["Exitus"] = ["%.2f (%.2f-%.2f)" % (mean, min_, max_)]
    return df


def get_model(learning_rate):
    # get base model, model
    base_model, chexnet_model = get_chexnet_model()

    x = base_model.output
    
    # Regularization layer
    x = BatchNormalization()(x)
    
    # Dense layer
    x = Dense(128, 
              activation='relu',
              kernel_regularizer=tf.keras.regularizers.l1_l2(0.5, 0.0001))(x)
    
    # Regularization layer
    x = BatchNormalization()(x)

    
    # add a logistic layer -- let's say we have 6 classes
    predictions = Dense(
        1,
        activation='sigmoid')(x)

    # this is the model we will use
    model = Model(
        inputs=base_model.input,
        outputs=predictions,
    )

    # first: train only the top layers (which were randomly initialized)

    for layer in base_model.layers:
        layer.trainable = False

    # initiate an Adam optimizer
    opt = Adam(
        lr=learning_rate,
        beta_1=0.9,
        beta_2=0.999,
        decay=0.0,
        amsgrad=False
    )

    # Let's train the model using Adam
    model.compile(
        loss='binary_crossentropy',
        optimizer=opt,
        metrics=[metrics.BinaryAccuracy(name = "acc"),
                metrics.AUC(name = "auc")])

    return base_model, model


class print_learning_rate(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        lr = self.model.optimizer.lr
        print(f'Learning rate = {K.eval(lr):.5f}')
print_lr = print_learning_rate()

### Print functions

In [5]:
def grafica_entrenamiento(tr_auc, val_auc, tr_loss, val_loss, best_i,
                          figsize=(10,5), path_results = None):
    plt.figure(figsize=figsize)
    ax = plt.subplot(1,2,1)
    plt.plot(1+np.arange(len(tr_loss)), np.array(tr_loss))
    plt.plot(1+np.arange(len(val_loss)), np.array(val_loss))
    plt.plot(1+best_i, val_loss[best_i], 'or')
    plt.title('loss del modelo', fontsize=18)
    plt.ylabel('loss', fontsize=12)
    plt.xlabel('época', fontsize=18)        
    plt.legend(['entrenamiento', 'validación'], loc='upper left')
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    
    plt.subplot(1,2,2)
    
    plt.plot(1+np.arange(len(tr_auc)),  np.array(tr_auc))
    plt.plot(1+np.arange(len(val_auc)), np.array(val_auc))
    plt.plot(1+best_i, val_auc[best_i], 'or')
    plt.title('AUC', fontsize=18)
    plt.ylabel('AUC', fontsize=12)
    plt.xlabel('época', fontsize=18)    
    plt.legend(['entrenamiento', 'validación'], loc='upper left')
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    if (path_results != None):
        plt.savefig(os.path.join(path_results, 'auc_loss.png'))
    plt.show()
    
class TrainingPlot(Callback):
    
    # This function is called when the training begins
    def on_train_begin(self, logs={}):
        # Initialize the lists for holding the logs, losses and accuracies
        self.losses = []
        self.auc = []
        self.val_losses = []
        self.val_auc = []
        self.logs = []
    
    # This function is called at the end of each epoch
    def on_epoch_end(self, epoch, logs={}):
        
        # Append the logs, losses and accuracies to the lists
        self.logs.append(logs)
        self.losses.append(logs.get('loss'))
        self.auc.append(logs.get('auc'))
        self.val_losses.append(logs.get('val_loss'))
        self.val_auc.append(logs.get('val_auc'))
        
        # Before plotting ensure at least 2 epochs have passed
        if len(self.val_auc) > 1:
            best_i = np.argmax(self.val_auc)
            grafica_entrenamiento(self.auc, self.val_auc, self.losses, self.val_losses, best_i)

plot_losses = TrainingPlot()



### Heatmaps functions

In [7]:
### function for painting heatmaps

def show_heatmap(model, im, es_maligna, predictions):

    imag = np.reshape(im, (1, im.shape[0], im.shape[1], im.shape[2]))
        
    # This is the "benign" entry in the prediction vector
    output = model.output[0, 0]
    
    # The is the output feature map of the last convolutional layer
    last_conv_layer = model.get_layer('bn')
    
    # This is the gradient of the "benign" class with regard to
    # the output feature map of last convolutional layer
    grads = K.gradients(output, last_conv_layer.output)[0]
    
    
    # This function allows us to access the values of the quantities we just defined:
    iterate = K.function([model.input], [last_conv_layer.output, grads])
    
    # These are the values of these two quantities, as Numpy arrays,
    # given our sample image
    output, grads_val = iterate(imag)
    conv_layer_output_value, pooled_grads_value = output[0, :], grads_val[0, :, :, :]   
    
   
    
      
    # The channel-wise mean of the resulting feature map
    # is our heatmap of class activation
    weights = np.mean(pooled_grads_value, axis=(0, 1))
    cam = np.dot(conv_layer_output_value, weights)
    heatmap = np.maximum(cam, 0)
    heatmap /= np.max(heatmap)
    plt.matshow(heatmap)
    plt.show()
    
    # load the original image
    img = imag[0]
    
    # Process CAM
    cam = cv2.resize(cam, (img.shape[1], img.shape[0]), cv2.INTER_LINEAR)
    cam = np.maximum(cam, 0)
    cam = cam / cam.max()  


    
    # We resize the heatmap to have the same size as the original image
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    
    # We convert the heatmap to RGB
    heatmap = np.uint8(255 * heatmap)
    
    # We apply the heatmap to the original image
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    
    superimposed_img = heatmap * 0.8 / 255 + 0.8*img

    
    plt.figure(figsize=(15,5))
    plt.subplot(1,3,1)
    plt.imshow(img, vmin=0, vmax=1)
    plt.subplot(1,3,2)
    plt.imshow(heatmap, vmin=0, vmax=1)
    plt.colorbar()
    plt.subplot(1,3,3)
    plt.imshow(img,
                       cmap='gray')
    plt.imshow(cam, cmap='jet', alpha=min(0.5, predictions[0]))
    plt.colorbar()
    plt.show()
    print("- Probabilidad de Exitus:", predictions[0])
    print("-", "Clase real:", "No sobrevive" if es_maligna else "Sobrevive")
    print("\n\n\n")
    return heatmap, superimposed_img

## Load Data

In [None]:
X_train = np.load(os.path.join(path_data, 'X_train_filter.npy'))
X_val = np.load(os.path.join(path_data, 'X_val_filter.npy'))
X_test = np.load(os.path.join(path_data, 'X_test_filter.npy'))

ytrain = np.load(os.path.join(path_data, 'ytrain_filter.npy'), allow_pickle=True)
yval = np.load(os.path.join(path_data, 'yval_filter.npy'), allow_pickle=True)
ytest = np.load(os.path.join(path_data, 'ytest_filter.npy'), allow_pickle=True)

## Split patient data death and survival

In [None]:
X_train_1 = X_train[ytrain==1]
X_train_0 = X_train[ytrain==0]

## Set class weights

### Train

In [None]:
ratio_pos = np.count_nonzero(y_train == 0) / len(y_train)
ratio_neg = np.count_nonzero(y_train == 1) / len(y_train)
class_weight_train = np.array((ratio_pos, ratio_neg))
print(class_weight_train)

### Validation

In [None]:
ratio_pos = np.count_nonzero(y_val == 0) / len(y_val)
ratio_neg = np.count_nonzero(y_val == 1) / len(y_val)
class_weight_val = np.array((ratio_pos, ratio_neg))

### Test

In [None]:
ratio_pos = np.count_nonzero(y_test == 0) / len(y_test)
ratio_neg = np.count_nonzero(y_test == 1) / len(y_test)
class_weight_test = np.array((ratio_pos, ratio_neg))

## Autoencoders Model

In [None]:
# MODELO

# ENCODER
input_img = Input(shape=(28, 28, 1))  
x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

# DECODER
x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy')

In [0]:
# DATOS
from keras.datasets import mnist
import numpy as np

(x_train, _), (x_test, _) = mnist.load_data()

x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1)) 
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))  

In [19]:
# ENTRENAMIENTO
autoencoder.fit(x_train, x_train,
                epochs=50,
                batch_size=128,
                shuffle=True,
                validation_data=(x_test, x_test))

Train on 60000 samples, validate on 10000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x7f6bea1e7ef0>

In [0]:
# TEST
decoded_imgs = autoencoder.predict(x_test)

In [None]:
import matplotlib.pyplot as plt

n = 10  
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruccion
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)mentation


## Data augmentation

In [None]:
datagen = ImageDataGenerator(featurewise_center=True, 
                             featurewise_std_normalization=True, 
                             rotation_range=90)
datagen.fit(X_train)

# Data Modeling

## Define CNN structure

In [None]:
# Fixed
base_model, model = get_model(learning_rate)

# Show layers
print_summary(model)

### Original Data

In [None]:
# read created data by GANs

import random

previous_val_auc = 0


df = pd.DataFrame()

for n in size_ae:
    print ("Numero imagenes GANs %d" % n)
    
    X_train = np.load(os.path.join(path_images, 'X_train.npy'))
    X_val = np.load(os.path.join(path_images, 'X_val.npy'))
    X_test = np.load(os.path.join(path_images, 'X_test.npy'))
    y_train = np.load(os.path.join(path_images, 'y_train.npy'))
    y_val = np.load(os.path.join(path_images, 'y_val.npy'))
    y_test = np.load(os.path.join(path_images, 'y_test.npy'))
    
    if n != 0:
        X_train_0_ae = np.load(os.path.join(path_ae, 'AE_covid_0_num_imag_%d.npy' % n))
        X_train_1_ae = np.load(os.path.join(path_ae, 'AE_covid_1_num_imag_%d.npy'% n))

        ratio_neg = np.count_nonzero(y_train == 1) / len(y_train)
        size = round(ratio_neg * X_train_1_ae.shape[0])
        lista = range(X_train_1_ae.shape[0]-1)


        ind = random.sample(lista, size)
        X_train_1_ae = X_train_1_ae[ind,:]

        # create target
        y_train_1_ae = np.ones(X_train_1_ae.shape[0], dtype=int)
        y_train_0_ae = np.zeros(X_train_0_ae.shape[0], dtype=int)


        # join data

        X_train_ae = np.concatenate((X_train_1_ae, X_train_0_ae))
        y_train_ae = np.concatenate((y_train_1_ae, y_train_0_ae))

        X_train = np.concatenate((X_train, X_train_ae))
        y_train = np.concatenate((y_train, y_train_ae))
    
    
    # train
    ratio_pos = np.count_nonzero(y_train == 0) / len(y_train)
    ratio_neg = np.count_nonzero(y_train == 1) / len(y_train)
    class_weight_train = np.array((ratio_pos, ratio_neg))

    # val
    ratio_pos = np.count_nonzero(y_val == 0) / len(y_val)
    ratio_neg = np.count_nonzero(y_val == 1) / len(y_val)
    class_weight_val = np.array((ratio_pos, ratio_neg))

    
    out = model.fit(X_train, y_train,
                     validation_data=(X_val, y_val),
                     steps_per_epoch=len(X_train) / batch_size, 
                     epochs=epochs,
                     class_weight=class_weight_train,
                     callbacks = callbacks_list,       
                     verbose=1)
    
    if(len(out.history)):
        acum_tr_auc.append(out.history['auc'][0])
        acum_val_auc.append(out.history['val_auc'][0])
        acum_tr_loss.append(out.history['loss'][0])
        acum_val_loss.append(out.history['val_loss'][0])
                
        acum = pd.DataFrame([acum_tr_auc, acum_val_auc, acum_tr_loss, acum_val_loss])
        acum.to_csv(os.path.join(path_results,'acum_results_no_augmentation.csv'))
                    
        #if len(acum_tr_auc) > 1:
        clear_output()
        best_i = np.argmax(acum_val_auc)
        grafica_entrenamiento(acum_tr_auc, acum_val_auc, acum_tr_loss, acum_val_loss, best_i)
            ### save loss and auc of train and val     
        stopped_epoch = early_stopping.stopped_epoch
        train_loss = out.history['loss'][stopped_epoch-1]
        val_loss = out.history['val_loss'][stopped_epoch-1]
        train_auc = out.history['auc'][stopped_epoch-1]
        val_auc = out.history['val_auc'][stopped_epoch-1]
        model = out.model

        pred_train = model.predict(X_train)
        pred_val = model.predict(X_val)

        train_auc = roc_auc_score(y_true = y_train, y_score = pred_train)
        val_auc = roc_auc_score(y_true = y_val, y_score = pred_val)

        
        res = pd.DataFrame([n, epochs, batch_size, stopped_epoch, train_auc, val_auc])
        df = pd.concat([df, res], axis=1)

        df.to_csv(os.path.join(path_results,'model_results_augmentation.csv')) 
                
        model.save(os.path.join(path_results, 'model_augmentation_%d.h5' % n))
            
        if(previous_val_auc < val_auc):
            save_dir = os.path.join(
                    os.getcwd(),
                    '../model_results_y')
            if not os.path.isdir(save_dir):
                    os.makedirs(save_dir)
                model.save(os.path.join(path_results, 'model_no_augmentation_%d.h5' % n))
                
            previous_val_auc = val_auc
                

                           
df.index = ['size_ae', 'epochs', 'batch_size','early_stopping', 'train_auc', 'val_auc']
df.to_csv(os.path.join(path_results,'model_results_augmentation.csv'))  

### Augmented dataset

In [None]:
# read created data by GANs

import random

previous_val_auc = 0


df = pd.DataFrame()

for n in size_ae:
    print ("Numero imagenes autoencoders %d" % n)
    
    X_train = np.load(os.path.join(path_images, 'X_train.npy'))
    X_val = np.load(os.path.join(path_images, 'X_val.npy'))
    X_test = np.load(os.path.join(path_images, 'X_test.npy'))
    y_train = np.load(os.path.join(path_images, 'y_train.npy'))
    y_val = np.load(os.path.join(path_images, 'y_val.npy'))
    y_test = np.load(os.path.join(path_images, 'y_test.npy'))
    
    if n != 0:
        X_train_0_ae= np.load(os.path.join(path_ae, 'AE_covid_0_num_imag_%d.npy' % n))
        X_train_1_ae = np.load(os.path.join(path_ae, 'AE_covid_1_num_imag_%d.npy'% n))

        ratio_neg = np.count_nonzero(y_train == 1) / len(y_train)
        size = round(ratio_neg * X_train_1_ae.shape[0])
        lista = range(X_train_1_ae.shape[0]-1)


        ind = random.sample(lista, size)
        X_train_1_ae = X_train_1_ae[ind,:]

        # create target
        y_train_1_ae = np.ones(X_train_1_ae.shape[0], dtype=int)
        y_train_0_ae = np.zeros(X_train_0_ae.shape[0], dtype=int)


        # join data

        X_train_ae = np.concatenate((X_train_1_ae, X_train_0_ae))
        y_train_ae = np.concatenate((y_train_1_ae, y_train_0_ae))

        X_train = np.concatenate((X_train, X_train_ae))
        y_train = np.concatenate((y_train, y_train_ae))
    
    
    # train
    ratio_pos = np.count_nonzero(y_train == 0) / len(y_train)
    ratio_neg = np.count_nonzero(y_train == 1) / len(y_train)
    class_weight_train = np.array((ratio_pos, ratio_neg))

    # val
    ratio_pos = np.count_nonzero(y_val == 0) / len(y_val)
    ratio_neg = np.count_nonzero(y_val == 1) / len(y_val)
    class_weight_val = np.array((ratio_pos, ratio_neg))

    datagen = ImageDataGenerator(featurewise_center=True, 
                             featurewise_std_normalization=True,
                             rotation_range=90,
                             brightness_range = (0.25, 0.75))
    datagen.fit(X_train)
    
    base_model_augmented, model_augmented = get_model(learning_rate)
    
    out = model_augmented.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size, seed=seed),
                     validation_data=(X_val, y_val),
                     steps_per_epoch=len(X_train) / batch_size, 
                     epochs=epochs,
                     class_weight=class_weight_train,
                     callbacks = callbacks_list,       
                     verbose=1)
    
    if(len(out.history)):
        acum_tr_auc.append(out.history['auc'][0])
        acum_val_auc.append(out.history['val_auc'][0])
        acum_tr_loss.append(out.history['loss'][0])
        acum_val_loss.append(out.history['val_loss'][0])
                
        acum = pd.DataFrame([acum_tr_auc, acum_val_auc, acum_tr_loss, acum_val_loss])
        acum.to_csv(os.path.join(path_results,'acum_results_no_augmentation.csv'))
                    
        #if len(acum_tr_auc) > 1:
        clear_output()
        best_i = np.argmax(acum_val_auc)
        grafica_entrenamiento(acum_tr_auc, acum_val_auc, acum_tr_loss, acum_val_loss, best_i)
            ### save loss and auc of train and val     
        stopped_epoch = early_stopping.stopped_epoch
        train_loss = out.history['loss'][stopped_epoch-1]
        val_loss = out.history['val_loss'][stopped_epoch-1]
        train_auc = out.history['auc'][stopped_epoch-1]
        val_auc = out.history['val_auc'][stopped_epoch-1]
        model = out.model

        pred_train = model.predict(X_train)
        pred_val = model.predict(X_val)

        train_auc = roc_auc_score(y_true = y_train, y_score = pred_train)
        val_auc = roc_auc_score(y_true = y_val, y_score = pred_val)

        
        res = pd.DataFrame([n, epochs, batch_size, stopped_epoch, train_auc, val_auc])
        df = pd.concat([df, res], axis=1)

        df.to_csv(os.path.join(path_results,'model_results_augmentation.csv')) 
                
        model.save(os.path.join(path_results, 'model_augmentation_%d.h5' % n))
            
        if(previous_val_auc < val_auc):
            save_dir = os.path.join(
                    os.getcwd(),
                    '../model_results_y')
            if not os.path.isdir(save_dir):
                    os.makedirs(save_dir)
                model.save(os.path.join(path_results, 'model_no_augmentation_%d.h5' % n))
                
            previous_val_auc = val_auc
                

                           
df.index = ['size_ae', 'epochs', 'batch_size','early_stopping', 'train_auc', 'val_auc']
df.to_csv(os.path.join(path_results,'model_results_augmentation.csv'))  

### Predict

In [None]:
pred_train = model.predict(X_train)
pred_val = model.predict(X_val)
pred_test = model.predict(X_test)

### Metrics

In [None]:
auc_train = roc_auc_score(y_true = y_train, y_score = pred_train)
auc_val = roc_auc_score(y_true = y_val, y_score = pred_val)
auc_test = roc_auc_score(y_true = y_test, y_score = pred_test)
print('AUC train = %s - AUC val = %s - AUC test = %s' % (str(auc_train), str(auc_val), str(auc_test)))

In [None]:
y_labels_train = (pred_train >= 0.5).astype(int)
y_labels_val = (pred_val >= 0.5).astype(int)
y_labels_test = (pred_test >= 0.5).astype(int)
cm_train = confusion_matrix(y_pred = y_labels_train, y_true = y_train)
cm_val = confusion_matrix(y_pred = y_labels_val, y_true = y_val)
cm_test = confusion_matrix(y_pred = y_labels_test, y_true = y_test)
print(cm_train)
print(cm_val)
print(cm_test)

In [None]:

fpr_train, tpr_train, threshold_train = roc_curve(y_train, pred_train)
roc_auc_train = roc_auc_score(y_true = y_train, y_score = pred_train)
fpr_val, tpr_val, threshold_val = roc_curve(y_val, pred_val)
roc_auc_val = roc_auc_score(y_true = y_val, y_score = pred_val)
fpr_test, tpr_test, threshold_test = roc_curve(y_test, pred_test)
roc_auc_test = roc_auc_score(y_true = y_test, y_score = pred_test)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_train, tpr_train, 'r', label = 'AUC = %0.2f' % roc_auc_test)
plt.plot(fpr_val, tpr_val, 'g', label = 'AUC = %0.2f' % roc_auc_val)
plt.plot(fpr_test, tpr_test, 'b', label = 'AUC = %0.2f' % roc_auc_test)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'k--')
plt.xlim([-0.01, 1])
plt.ylim([0, 1.01])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_train, tpr_train, 'b', label = 'AUC = %0.2f' % roc_auc_train)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'r--')
plt.xlim([-0.01, 1])
plt.ylim([0, 1.01])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

### Save output

In [None]:
X_train.to_csv(os.path.join('../predictions_y', 'X_train.csv'))
np.savetxt(os.path.join('../predictions_y', 'predictions_train.csv'), pred_train, delimiter=";")
np.savetxt(os.path.join('../predictions_y', 'y_train.csv'), y_train, delimiter=";")
X_val.to_csv(os.path.join('../predictions_y', 'X_val.csv'))
np.savetxt(os.path.join('../predictions_y', 'predictions_val.csv'), pred_val, delimiter=";")
np.savetxt(os.path.join('../predictions_y', 'y_val.csv'), y_val, delimiter=";")
X_test.to_csv(os.path.join('../predictions_y', 'X_test.csv'))
np.savetxt(os.path.join('../predictions_y', 'predictions_test.csv'),pred_test, delimiter=";")
np.savetxt(os.path.join('../predictions_y', 'y_test.csv'), y_test, delimiter=";")

# Results

## AUC confidence intervals

In [None]:
AUC_CI_train = bootstrap_auc(y_train, pred_train, bootstraps = 100, fold_size = 1000,)
AUC_CI_val = bootstrap_auc(y_val, pred_val, bootstraps = 100, fold_size = 1000)
AUC_CI_test = bootstrap_auc(y_test, pred_test, bootstraps = 100, fold_size = 1000,)
AUC_CI = print_confidence_intervals(AUC_CI_train,)
AUC_CI = AUC_CI.append(print_confidence_intervals(AUC_CI_val), ignore_index=True)
AUC_CI = AUC_CI.append(print_confidence_intervals(AUC_CI_test), ignore_index=True)
AUC_CI.index = ['Train', 'Val', 'Test'];
AUC_CI

## Plot AUC

In [None]:
fpr_train, tpr_train, threshold_train = roc_curve(y_train, pred_train)
roc_auc_train = roc_auc_score(y_true = y_train, y_score = pred_train)
fpr_val, tpr_val, threshold_val = roc_curve(y_val, pred_val)
roc_auc_val = roc_auc_score(y_true = y_val, y_score = pred_val)
fpr_test, tpr_test, threshold_test = roc_curve(y_test, pred_test)
roc_auc_test = roc_auc_score(y_true = y_test, y_score = pred_test)

plt.title('Receiver Operating Characteristic')
plt.plot(fpr_train, tpr_train, 'r', label = 'AUC = %0.2f' % roc_auc_train)
plt.plot(fpr_val, tpr_val, 'g', label = 'AUC = %0.2f' % roc_auc_val)
plt.plot(fpr_test, tpr_test, 'b', label = 'AUC = %0.2f' % roc_auc_test)
plt.legend(loc = 'lower right')
plt.plot([0, 1], [0, 1],'k--')
plt.xlim([0, 1])
plt.ylim([0, 1])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()

## Heatmap / Gradcam

In [None]:
for i in np.where(y_test == 1)[0]:
    print('index ' + str(i));
    heat_map, superimposed_img = show_heatmap(model, X_test[i], y_test[i], pred_test[i])