In [None]:
print('Alzheimer Detection - 8')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io
import os 
from os.path import join
import tqdm
from glob import glob
import tensorflow as tf

from tqdm import tqdm
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split

from skimage.io import imread, imshow
from skimage.transform import resize
from skimage.color import grey2rgb

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, BatchNormalization, Dropout, Flatten, Dense, Activation, MaxPool2D, AveragePooling2D, MaxPooling2D, Conv2D, SeparableConv2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical,plot_model
from tensorflow.keras.applications.resnet50 import ResNet50

from sklearn.metrics import accuracy_score, balanced_accuracy_score, roc_auc_score, \
    confusion_matrix, precision_score, recall_score, f1_score

import PIL
import random
import cv2 as cv
import seaborn as sns

In [None]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Device:', tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print('Number of replicas:', strategy.num_replicas_in_sync)
    
print(tf.__version__)

In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE # controls optimization of computational power
BATCH_SIZE = 32 * strategy.num_replicas_in_sync # subset of the data to calculate gradient
IMAGE_SIZE = [224, 224] # input image size for the first layer of the model
EPOCHS = 100 # number of epochs

In [None]:
data_folder = "/content/drive/MyDrive/MS-Upgrad/alzheimers-1" # including both synthetic and real data

## images NonDemented
df_NonDemented_imgs = pd.DataFrame({
    "X": sorted(glob(join(data_folder, "NonDemented", "*"))),
    "y": 0,
    "class": "NonDementia"
})
shuffled_non = df_NonDemented_imgs.sample(frac=1)
testsize_non = int(0.2 * len(shuffled_non))
# Test
non_test = shuffled_non[:testsize_non]
# Train and validation
non_trainval = shuffled_non[testsize_non:]
trainsize_non = int(0.8 * len(non_trainval))
# Train
shuffled_non_train = non_trainval[:trainsize_non]
#Validation
shuffled_non_val = non_trainval[trainsize_non:]


## images VeryMildDemented
df_VeryMildDemented_imgs = pd.DataFrame({
    "X": sorted(glob(join(data_folder, "VeryMildDemented", "*"))),
    "y": 1,
    "class": "VeryMildDementia"
})
shuffled_verymild = df_VeryMildDemented_imgs.sample(frac=1)
testsize_verymild = int(0.2 * len(shuffled_verymild))

# Test
verymild_test = shuffled_verymild[:testsize_verymild]

# Train and validation
verymild_trainval = shuffled_verymild[testsize_verymild:]
trainsize_verymild = int(0.8 * len(verymild_trainval))

# Train
shuffled_verymild_train = verymild_trainval[:trainsize_verymild]

#Validation
shuffled_verymild_val = verymild_trainval[trainsize_verymild:]


## images MildDemented
df_MildDemented_imgs = pd.DataFrame({
    "X": sorted(glob(join(data_folder, "MildDemented", "*"))),
    "y": 2,
    "class": "MildDementia"
})
shuffled_mild = df_MildDemented_imgs.sample(frac=1)
testsize_mild = int(0.2 * len(shuffled_mild))

# Test
mild_test = shuffled_mild[:testsize_mild]

# Train and validation
mild_trainval = shuffled_mild[testsize_mild:]
trainsize_mild = int(0.8 * len(mild_trainval))

# Train
shuffled_mild_train = mild_trainval[:trainsize_mild]

#Validation
shuffled_mild_val = mild_trainval[trainsize_mild:]

## images ModerateDemented
df_ModerateDemented_imgs = pd.DataFrame({
    "X": sorted(glob(join(data_folder, "ModerateDemented", "*"))),
    "y": 3,
    "class": "ModerateDementia"
})
shuffled_moderate = df_ModerateDemented_imgs.sample(frac=1)
testsize_moderate = int(0.2 * len(shuffled_moderate))

# Test
moderate_test = shuffled_moderate[:testsize_moderate]

# Train and validation
moderate_trainval = shuffled_moderate[testsize_moderate:]
trainsize_moderate = int(0.8 * len(moderate_trainval))

# Train
shuffled_moderate_train = moderate_trainval[:trainsize_moderate]

#Validation
shuffled_moderate_val = moderate_trainval[trainsize_moderate:]


## Number of images
print("TOTAL:")
print("# of images with NonDemented Alzheimer =", len(shuffled_non))
print("# of images with VeryMildDemented Alzheimer =", len(shuffled_verymild))
print("# of images with MildDemented Alzheimer =", len(shuffled_mild))
print("# of images with ModerateDemented Alzheimer =", len(shuffled_moderate))
print("------------")
print("\nTest:")
print("# of images with NonDemented Alzheimer =", len(non_test))
print("# of images with VeryMildDemented Alzheimer =", len(verymild_test))
print("# of images with MildDemented Alzheimer =", len(mild_test))
print("# of images with ModerateDemented Alzheimer =", len(moderate_test))
print("------------")
print("\nTraining:")
print("# of images with NonDemented Alzheimer =", len(shuffled_non_train))
print("# of images with VeryMildDemented Alzheimer =", len(shuffled_verymild_train))
print("# of images with MildDemented Alzheimer =", len(shuffled_mild_train))
print("# of images with ModerateDemented Alzheimer =", len(shuffled_moderate_train))
print("------------")
print("\nValidation:")
print("# of images with NonDemented Alzheimer =", len(shuffled_non_val))
print("# of images with VeryMildDemented Alzheimer =", len(shuffled_verymild_val))
print("# of images with MildDemented Alzheimer =", len(shuffled_mild_val))
print("# of images with ModerateDemented Alzheimer =", len(shuffled_moderate_val))


In [None]:
# class imbalance in the dataset

heights = [len(shuffled_non), len(shuffled_verymild), len(shuffled_mild), len(shuffled_moderate)]

fig, ax = plt.subplots()
height = heights
bars = ('Non', 'Very Mild', 'Mild', 'Moderate')
y_pos = np.arange(len(bars))
plt.bar(y_pos, height)
plt.xticks(y_pos, bars)
plt.show()

In [None]:
def load_img(fname):
    img = cv.imread(fname)
    img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
    resize = (IMAGE_SIZE[0], IMAGE_SIZE[1])
    img = cv.resize(img, resize)
    return img

# one_hot_encoding
def one_hot_encoding(class_number):
    ha = [0,0,0,0]
    ha[class_number] = 1
    return ha

In [None]:
## TRAIN SET
# concatenate
train_concat = pd.concat([shuffled_mild_train, shuffled_moderate_train, shuffled_non_train, shuffled_verymild_train])
# shuffle
train_concat = train_concat.sample(frac=1)
# load image
train_concat["X"] = train_concat["X"].apply(load_img)

# extract label before one hot in order to use for class weights
y_train_lab = train_concat["y"]
y_train_lab = np.array(y_train_lab) # convert into a numpy array

# apply one-hot encoding
train_concat["y"] = train_concat["y"].apply(one_hot_encoding)

# stack images and labels
xtrain = np.stack(train_concat["X"])
ytrain = np.stack(train_concat["y"])


## VALIDATION SET
val_concat = pd.concat([shuffled_mild_val, shuffled_moderate_val, shuffled_non_val, shuffled_verymild_val])
val_concat = val_concat.sample(frac=1)
val_concat["X"] = val_concat["X"].apply(load_img)
val_concat["y"] = val_concat["y"].apply(one_hot_encoding)
xval = np.stack(val_concat["X"])
yval = np.stack(val_concat["y"])

In [None]:
# plot some images per class

ncols = 10

fig, axs = plt.subplots(nrows=4, ncols=ncols, figsize=(20, 10))

for fname,ax in zip(shuffled_non.loc[:ncols, "X"], axs[0,:]):
    im = load_img(fname)
    ax.imshow(im)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.grid(False)

for fname,ax in zip(shuffled_verymild.loc[:ncols, "X"], axs[1,:]):
    im = load_img(fname)
    ax.imshow(im)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.grid(False)
    
for fname,ax in zip(shuffled_mild.loc[:ncols, "X"], axs[2,:]):
    im = load_img(fname)
    ax.imshow(im)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.grid(False)

for fname,ax in zip(shuffled_moderate.loc[:ncols, "X"], axs[3,:]):
    im = load_img(fname)
    ax.imshow(im)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.grid(False)

axs[0,0].set_ylabel("Non Dementia")
axs[1,0].set_ylabel("Very Mild Dementia")
axs[2,0].set_ylabel("Mild Dementia")
axs[3,0].set_ylabel("Moderate Dementia")

In [None]:
# Transfer Learning Model
base_model = ResNet50(input_shape=(224,224,3), 
                   include_top=False,
                   weights="imagenet")

In [None]:
for layer in base_model.layers:
    layer.trainable=False

In [None]:
# Convolutional Block
def conv_block(filters):
    block = tf.keras.Sequential([
        tf.keras.layers.SeparableConv2D(filters, 3, activation='elu', padding='same'),
        tf.keras.layers.SeparableConv2D(filters, 3, activation='elu', padding='same'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPool2D()
    ]
    )
    
    return block

# Dense Block
def dense_block(units, dropout_rate):
    block = tf.keras.Sequential([
        tf.keras.layers.Dense(units, activation='elu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(dropout_rate)
    ])
    
    return block

# Function to build the model
def build_model():
    model = tf.keras.Sequential([
        # tf.keras.Input(shape=(*IMAGE_SIZE, 3)),
        tf.keras.applications.resnet50.ResNet50(
             include_top=False,
             weights='imagenet',
             input_shape=(224,224,3),
        ),

        tf.keras.layers.Dropout(0.2),
        
        conv_block(32),
        tf.keras.layers.MaxPool2D(pool_size=1),

        conv_block(64),
        tf.keras.layers.MaxPool2D(pool_size=1),
        tf.keras.layers.Dropout(0.2),

        conv_block(128),
        tf.keras.layers.MaxPool2D(pool_size=1),
        
        conv_block(256),
        tf.keras.layers.MaxPool2D(pool_size=1),
        tf.keras.layers.Dropout(0.2),

        conv_block(512),
        tf.keras.layers.MaxPool2D(pool_size=1),

        tf.keras.layers.AveragePooling2D(pool_size=1),
        
        tf.keras.layers.Flatten(),
        dense_block(512, 0.7),
        dense_block(128, 0.5),
        dense_block(64, 0.3),
        
        tf.keras.layers.Dense(4, activation='softmax')
    ])
    
    return model

In [None]:
with strategy.scope():
    model = build_model()

    METRICS = [tf.keras.metrics.AUC(name='auc')]
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss=tf.losses.CategoricalCrossentropy(),
        metrics=METRICS
    )

In [None]:
model.summary()

In [None]:
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("alzheimer_model.h5",
                                                    save_best_only=True)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10,
                                                     restore_best_weights=True)

reduce_lr = ReduceLROnPlateau(monitor='auc', factor=0.5, verbose=1, mode="auto",
                              cooldown=5, patience=10, min_lr=0.00001)

# Adding class weights to compensate class imabalance
# class_weights = { 0: 1,
#                   1: 1.5,
#                   2: 3,
#                   3: 20
# }


In [None]:

# Fit the model
# history = model.fit(xtrain, ytrain, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=[checkpoint_cb, early_stopping_cb, reduce_lr], validation_data=(xval, yval), class_weight = class_weights)
history = model.fit(xtrain, ytrain, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=[checkpoint_cb, early_stopping_cb, reduce_lr], validation_data=(xval, yval))

In [None]:
fig, ax = plt.subplots(3, 1, figsize=(20, 20))
ax = ax.ravel()

for i, met in enumerate(['auc', 'loss']):
    ax[i].plot(history.history[met])
    ax[i].plot(history.history['val_' + met])
    ax[i].set_title('Model {}'.format(met))
    ax[i].set_xlabel('epochs')
    ax[i].set_ylabel(met)
    ax[i].legend(['train', 'val'])

ax[2].plot(history.history['lr'])
ax[2].set_title('Model lr')
ax[2].set_xlabel('epochs')
ax[2].set_ylabel('lr')

In [None]:
## TEST SET
test_concat = pd.concat([mild_test, moderate_test, non_test, verymild_test])
test_concat = test_concat.sample(frac=1)
test_concat["X"] = test_concat["X"].apply(load_img)
test_concat["y"] = test_concat["y"].apply(hot_array)
xtest = np.stack(val_concat["X"])
ytest = np.stack(val_concat["y"])

In [None]:
# evaluation
_ = model.evaluate(xtest,ytest)

In [None]:
# Federico Agostini notebook Snippet for metrics

def print_metrics(model, X, y, cm_labels="auto"):
    """
    Print the following metrics: accuracy, balanced accuracy, precision, recall, f1.
    If the model is able to predict probabilities, also auc is calculated.
    Moreover, the confusion matrix is plotted.

    Parameters
    ----------
    model : sklearn estimator
        Sklearn estimator or similar which implements the method `predict`
        and optionally `predict_proba`.
    X : array like
        Input features.
    y : array like
        Target labels.
    cm_labels : list [default="auto"]
        Optional labels to be used in the confusion matrix.

    Returns
    -------
    metrics : dict
        Dictionary with the calculated metrics.

    """
    y_pred = model.predict(X)
    y_pred = np.squeeze(y_pred)
    y_pred = np.argmax(y_pred,1).astype(int)

    metrics = {
        "Accuracy"    : accuracy_score(y, y_pred),
        "Bal Accuracy": balanced_accuracy_score(y, y_pred),
        "Precision"   : precision_score(y, y_pred, average="macro"),
        "Recall"      : recall_score(y, y_pred, average="macro"),
        "f1"          : f1_score(y, y_pred, average="macro")
    }
    # add AUC if the classifier is able to predict probabilities
    try:
        y_pred_proba = model.predict(X)
        metrics["AUC"] = roc_auc_score(y, y_pred_proba, multi_class="ovr", average="macro")
    except:
        metrics["AUC"] = np.nan
    
    for k,v in metrics.items():
        print("{:12s} = {}".format(k,v))  
    print("\n")
    
    # confusion matrix
    cm = confusion_matrix(y, y_pred, normalize="true")
    fig, ax = plt.subplots(figsize=(6,6))
    sns.heatmap(cm, ax=ax, square=True, vmin=0, vmax=1, annot=True, 
                linewidths=.05, fmt=".2f", cbar_kws={"shrink":.8}, 
                xticklabels=cm_labels, yticklabels=cm_labels)
    plt.xticks([0.5, 1.5, 2.5, 3.5], ['Non', 'Very Mild', 'Mild', 'Moderate'])
    plt.yticks([0.5, 1.5, 2.5, 3.5], ['Non', 'Very Mild', 'Mild', 'Moderate'])
    ax.set_ylabel("True")
    ax.set_xlabel("Predicted")

    metrics["cm"] = cm

    return metrics

In [None]:
# visualize model metrics
y_test = ytest.argmax(axis=1)
met = print_metrics(model, xtest, y_test)