In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import io

#import cv2
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, ConfusionMatrixDisplay
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing import image as Image
from tensorflow.keras.applications import VGG19
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.utils import plot_model
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, CSVLogger
from keras.callbacks import ReduceLROnPlateau
import warnings
from datetime import datetime
import shutil
warnings.filterwarnings('ignore')
root = "../input/lyme-disease-dataset-cleaned/"

# Data Loading

In [None]:
def load_from_directory(path, color_mode='rgb', normalize=True, augment=False):
    images = []
    labels = []
    DIMS = (224, 224, 3)
    path_root = root + path
    print(path_root)
    categories = ["Negative", "Positive"]

    for category in categories:
        path = os.path.join(path_root, "Lyme_" + category)
        for img in os.listdir(path):
            try:
                img_path = os.path.join(path, img)
                img = Image.load_img(img_path, target_size=(DIMS[0], DIMS[1]), color_mode=color_mode)
                img_tensor = Image.img_to_array(img)
    #             image = np.array(dis_img).flatten()
                
                images.append(img_tensor)
                labels.append("Lyme_" + category) #Lyme_Negative
                
                if augment == True:
                    aug_img_tensor = augment_img(img_tensor)
                    #aug_img_tensor = Image.img_to_array(aug_img)
                    images.append(aug_img_tensor)
                    labels.append("Lyme_" + category)
            except Exception as e:
                print(e)
    images.pop(0)
    labels.pop(0)
    
    images = np.array(images)
    labels = np.array(labels)
    plt.imshow(Image.array_to_img(images[0]))
    if normalize:
        images /= 255.
    le = LabelEncoder()
    labels = le.fit_transform(labels)
    return (images, labels)

In [None]:
!pwd

# Augment and save

In [None]:
def augment_img(image):
    image_new = tf.image.random_brightness(image, max_delta=0.5)
    image_new = tf.image.random_crop(image_new, size=[image.shape[0], image.shape[1], 3])
    image_new = tf.image.random_flip_left_right(image_new)
    return image_new

# Functions to Visualizations

# Function to return Plot for Confusion Matrix

In [None]:
def getConfusionMatrix(model, validation, isLabelEncoded=False):
    Y_pred = model.predict(validation[0]) # [0, 1, 1, 0...], [[0], [1], [1], [1]..]
#     print(Y_pred)
    y_pred = np.argmax(Y_pred, axis=-1) # [0 ,1, 2]
    labels = ["Negative", "Positive"]
    conf_mat = confusion_matrix(validation[1], y_pred) #[[TN, FP], [FN, TP]]
#     print(conf_mat)
    disp = ConfusionMatrixDisplay(confusion_matrix = conf_mat, display_labels=labels)
    cl_report = classification_report(validation[1], y_pred)
    return (disp, cl_report)

# Plot Trainning and Validation Loss, Accuracy -- For Keras Models

In [None]:
def plot_train_val_accuracy(model, history, train, val, epoch, disp=None, save_name="uknown"):
    """
    Function to plot the loss, accuracy, confusion matrix for both training and testing data on the model  
    """
    epochs_range = range(1, epoch+1)
    
    fig, ax = plt.subplots(2, 2, figsize=(20, 10))
    
    ax[0][0].plot(epochs_range, history.history['accuracy'])
    ax[0][0].plot(epochs_range, history.history['val_accuracy'])
    ax[0][0].set_title('Model Accuracy')
    ax[0][0].set_ylabel('Accuracy')
    ax[0][0].set_xlabel('Epoch')
    ax[0][0].legend(['Train Accuracy', 'Validation Accuracy'])
    
    if disp != None:
        disp.plot(ax=ax[0][1])
        disp.ax_.set_title("Confusion Matrix")
    
    ax[1][0].plot(epochs_range, history.history['loss'])
    ax[1][0].plot(epochs_range, history.history['val_loss'])
    ax[1][0].set_title('Model Loss')
    ax[1][0].set_ylabel('Loss')
    ax[1][0].set_xlabel('Epoch')
    ax[1][0].legend(['Train Loss', 'Validation Loss'])
    
    ax[1][1].plot(epochs_range, history.history['auc'])
    ax[1][1].plot(epochs_range, history.history['val_auc'])
    ax[1][1].set_title('Model AUC')
    ax[1][1].set_ylabel('AUC')
    ax[1][1].set_xlabel('Epoch')
    ax[1][1].legend(['Train AUC', 'Validation AUC'])
    plt.show()
    fig.savefig("./" + save_name + ".png", bbox_inches='tight')
        

# Plot Classification Reports of Several Models

In [None]:
def plot_model_comparison(cr_list, model_names, labels=["Positive", "Negative"]):
    """
    This function plots the different performance metrics for each class 
    for each models and saves the figures
    
        Parameters:
            cr_list (array): Array of classification reports of the models
            model_names (array): Array of model names
    """
    
    # We will save the f1 score, recalls and precision and support in following structure
    # curr_metric[class] = [cr_1.curr_metric, cr_2.curr_metric, ...]
    
    f1_scores = {}
    recalls = {}
    precisions = {}
    supports = {}
    
    for cls in labels:
        f1_scores[cls] = list([cr[cls]["f1-score"] for cr in cr_list])
        recalls[cls] = list([cr[cls]["recall"] for cr in cr_list])
        precisions[cls] = list([cr[cls]["precision"] for cr in cr_list])
        supports[cls] = list([cr[cls]["support"] for cr in cr_list])
        
    fig, axes = plt.subplots(2, 2, figsize=(20, 10))
    X = np.arange(len(model_names))
    
    axes[0][0].bar(X, precisions["Positive"], color="crimson", width = 0.25)
    axes[0][0].bar(X + 0.25, precisions["Negative"], color="darkcyan", width=0.25)
    axes[0][0].set_title("Precision Comparison")
    axes[0][0].set_xlabel("Models")
    axes[0][0].set_ylabel("Precision")
    
    axes[0][1].bar(X, f1_scores["Positive"], color="crimson", width=0.25)
    axes[0][1].bar(X + 0.25, f1_scores["Negative"], color="darkcyan", width=0.25)
    axes[0][1].set_title("F1 Score Comparison")
    axes[0][1].set_xlabel("Models")
    axes[0][1].set_ylabel("f1 score")
    
    axes[1][0].bar(X, recalls["Positive"], color="crimson", width=0.25)
    axes[1][0].bar(X + 0.25, recalls["Negative"], color="darkcyan", width=0.25)
    axes[1][0].set_title("Recall Comparison")
    axes[1][0].set_xlabel("Models")
    axes[1][0].set_ylabel("recall")
    
    axes[1][1].bar(X, supports["Positive"], color="crimson", width=0.25)
    axes[1][1].bar(X + 0.25, supports["Negative"], color="darkcyan", width=0.25)
    axes[1][1].set_title("Support Comparison")
    axes[1][1].set_xlabel("Models")
    axes[1][1].set_ylabel("support")
    
    for i in range(2):
        for j in range(2):
            axes[i][j].set_xticks([i + 0.25 for i in range(len(model_names))], model_names)
            axes[i][j].legend(['Positive', 'Negative'])
            
    fig.tight_layout(pad=2.0)
    fig.savefig("./comparison.png", bbox_inches="tight")

# Loading the data from directory

In [None]:
train_path = "RashData/Train/Train_2_Cases"
test_path = "RashData/Validation/Validation_2_Cases"

In [None]:
train_images, train_labels = load_from_directory(train_path)
test_images, test_labels = load_from_directory(test_path)

images = np.concatenate((train_images, test_images), axis=0)
labels = np.concatenate((train_labels, test_labels), axis=0)

In [None]:
print(train_labels[0:10])

In [None]:
print(train_images.shape)
print(test_images.shape)
print(len(train_labels[train_labels == 1]))

In [None]:
if not os.path.exists('./History'):
    os.makedirs('./History')

if not os.path.exists('./Figures'):
    os.makedirs('./Figures')

In [None]:
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.3, shuffle=True)
y_train_en = keras.utils.to_categorical(y_train)
y_test_en = keras.utils.to_categorical(y_test)

print(y_test[0:5])
print(y_test_en[0:5])

In [None]:
vgg19 = VGG19(input_shape=(224, 224, 3), weights='imagenet', include_top=False)
for layer in vgg19.layers:
    layer.trainable = False

x = vgg19.output

x = keras.layers.Conv2D(1024, (2, 2), activation='sigmoid', kernel_initializer='he_uniform')(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Conv2D(2048, (1, 1), activation='sigmoid', kernel_initializer='he_uniform')(x)
x = keras.layers.BatchNormalization()(x)

x = keras.layers.Flatten()(x)
model = keras.Model(inputs=vgg19.input, outputs=x)
#model.summary()

dense = keras.layers.Dense(200, activation='sigmoid')(model.output)
dense = keras.layers.Dense(2, activation='softmax')(dense)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
logger = CSVLogger('./History/vgg_with_dense.csv', separator=',')
callbacks = [lr_reducer, logger]

model_new = keras.Model(inputs = model.input, outputs=dense)
model_new.summary()
model_new.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'AUC'])
hist = model_new.fit(x_train, y_train_en, epochs=30, validation_data=(x_test, y_test_en), shuffle=True, callbacks=callbacks)
# x_train_rf = vgg19.predict(train_images)
# x_test_rf = vgg19.predict(test_images)

# print(x_train_rf.shape)

In [None]:
def get_table(hist, steps=10):
    hist_new = hist.copy()
    hist_new['accuracy'] = hist['accuracy'] * 100
    hist_new['val_accuracy'] = hist['val_accuracy'] * 100
    hist_new.drop('lr', axis=1, inplace=True)
    hist_new.drop('epoch', axis=1, inplace=True)
    display(hist_new.iloc[::steps])
    display(hist_new.tail(1))

In [None]:
hist_vgg_dense = pd.read_csv("./History/vgg_with_dense.csv")
get_table(hist_vgg_dense, 3)

In [None]:
plot_train_val_accuracy(model_new, hist, (x_train, y_train_en), (x_test, y_test_en), epoch=30)

In [None]:
plot_model(model, to_file='vgg-features.png', show_shapes=True, show_dtype=True, show_layer_names=True)

In [None]:
# model_new.save("./vgg19_with_conv_dense.h5")
model.save("./vgg19_features.h5")

In [None]:
disp, cl_report_dense = getConfusionMatrix(model_new, (x_test, y_test))
disp.plot()
plt.savefig("./vgg_dense_conf_mat.png", bbox_inches="tight")
# plot_train_val_accuracy(model_new, hist, None, None, 30, disp, "VGG-CONV-DENSE")
print(cl_report_dense)

In [None]:

x_train_rf = model.predict(x_train)
x_test_rf = model.predict(x_test)

rf_vgg = RandomForestClassifier(n_estimators=150, random_state=42)
rf_vgg.fit(x_train_rf, train_labels)
score = rf_vgg.score(x_test_rf,test_labels)
print(score)

In [None]:
y_pred = rf_vgg.predict(x_test_rf)
classes = ['Negative', 'Positive']
conf_mat = confusion_matrix(test_labels, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix = conf_mat, display_labels=classes)
cl_report = classification_report(test_labels, y_pred, output_dict=True)
disp.plot()
plt.savefig("./rf-cm-vgg.png", bbox_inches="tight")
print(cl_report)
pickle.dump(cl_report, open("rf-cl-report.sav", "rb"))


In [None]:
import pickle
# pickle.dump(rf_vgg, open("model_rf_vgg19.sav", "wb"))

In [None]:
model_svm = SVC(kernel='poly', gamma='auto', C=50)
model_svm.fit(x_train_rf, y_train)

In [None]:
y_pred = model_svm.predict(x_test_rf)
classes = ['Negative', 'Positive']
conf_mat = confusion_matrix(test_labels, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix = conf_mat, display_labels=classes)
cl_report_svm = classification_report(test_labels, y_pred, output_dict=True)
disp.plot()
plt.savefig("./svm-cm-vgg19.png", bbox_inches="tight")
print(cl_report_svm)

In [None]:
import pickle 
pickle.dump(model_svm, open("model_svm_vgg19.sav", "wb"))

In [None]:
y_pred = rf_resnet.predict(x_test_rf)
classes = ['Negative', 'Positive']
conf_mat = confusion_matrix(test_resnet.classes, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix = conf_mat, display_labels=classes)
cl_report = classification_report(test_resnet.classes, y_pred)
disp.plot()
print(cl_report)

In [None]:
obj = max(result_folds, key=lambda elem:elem['score'])

In [None]:
display(obj['classification_report'])

# Below code is to prevent Kaggle from timing out

In [None]:
from time import sleep
import random

temp = open("temp.txt", "w")
for i in range(100000):
    val = random.randint(4, 100000)
    temp.write(str(val) + "\n")

In [None]:
with open ("temp.txt") as test:
    lines = test.readlines() # read all lines into list

while True:
    for line in lines:
        print(line)
        sleep(10)

In [None]:
from yellowbrick.model_selection import learning_curve

In [None]:
Approach 1
Result

Approach 2
Result

Approach 3
Result

# Yellow Brick To plot graphs for classical ml models

In [None]:
x_train_rf = model.predict(x_train)
x_test_rf = model.predict(x_test)

rf_vgg = RandomForestClassifier(n_estimators=150, random_state=42)
print(learning_curve(rf_vgg, x_train_rf, train_labels, cv=5, scoring='accuracy'))
plt.savefig("./rf_vgg.png", bbox_inches="tight")

In [None]:
model_svm = SVC(kernel='poly', gamma='auto', C=50)
# model_svm.fit(x_train_rf, y_train)
print(learning_curve(model_svm, x_train_rf, train_labels, cv=5, scoring='accuracy'))

# Balanced Dataset Codes

In [None]:
def getTrainTest(x, y, size=0.2):
    x_positive = x[y == 1]
    x_negative = x[y == 0]
    
    np.random.shuffle(x_positive)
    np.random.shuffle(x_negative)
    x_neg_bal = x_negative[:x_positive.shape[0]]
    x_neg_rest = x_negative[x_positive.shape[0]:]
    y_neg_bal = np.zeros(x_positive.shape[0], dtype='int')
    
    y_neg_rest = np.zeros(x_negative.shape[0] - x_positive.shape[0], dtype='int')
    
    x_bal = np.concatenate((x_positive, x_neg_bal), axis=0)
    y_bal = np.concatenate((np.ones(x_positive.shape[0], dtype='int'), y_neg_bal))
    print(x_bal.shape)
    print(y_bal.shape)
    
    x_train, x_test, y_train, y_test = train_test_split(x_bal, y_bal, test_size=size, shuffle=True)
    print(x_train.shape)
    print(y_train.shape)
    x_test = np.concatenate((x_test, x_neg_rest), axis=0)
    y_test = np.concatenate((y_test, y_neg_rest))
    print(x_test.shape)
    print(y_test.shape)
    return (x_train, x_test, y_train, y_test)

    

In [None]:
x_train, x_test, y_train, y_test = getTrainTest(images, labels)
y_train_en = keras.utils.to_categorical(y_train)
y_test_en = keras.utils.to_categorical(y_test)
print(y_train[0:5])
print(y_train_en[0:5])

In [None]:
vgg19 = VGG19(input_shape=(224, 224, 3), weights='imagenet', include_top=False)
for layer in vgg19.layers:
    layer.trainable = False

x = vgg19.output

x = keras.layers.Conv2D(1024, (2, 2), activation='sigmoid', kernel_initializer='he_uniform')(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Conv2D(2048, (1, 1), activation='sigmoid', kernel_initializer='he_uniform')(x)
x = keras.layers.BatchNormalization()(x)

x = keras.layers.Flatten()(x)
model = keras.Model(inputs=vgg19.input, outputs=x)
#model.summary()

dense = keras.layers.Dense(200, activation='sigmoid')(model.output)
dense = keras.layers.Dense(2, activation='softmax')(dense)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6)
logger = CSVLogger('./History/vgg_with_dense.csv', separator=',')
callbacks = [lr_reducer, logger]

model_new = keras.Model(inputs = model.input, outputs=dense)
model_new.summary()
model_new.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'AUC'])
hist = model_new.fit(x_train, y_train_en, epochs=30, validation_data=(x_test, y_test_en), shuffle=True, callbacks=callbacks)
# x_train_rf = vgg19.predict(train_images)
# x_test_rf = vgg19.predict(test_images)

# print(x_train_rf.shape)

In [None]:
disp, cl_report_dense = getConfusionMatrix(model_new, (x_test, y_test))
disp.plot()
plt.savefig("./vgg_dense_conf_mat.png", bbox_inches="tight")
# plot_train_val_accuracy(model_new, hist, None, None, 30, disp, "VGG-CONV-DENSE")
print(cl_report_dense)

In [None]:
x_train_rf = model.predict(x_train)
x_test_rf = model.predict(x_test)

rf_vgg = RandomForestClassifier(n_estimators=150, random_state=42)
rf_vgg.fit(x_train_rf, y_train)
score = rf_vgg.score(x_test_rf, y_test)
print(score)

In [None]:
y_pred = rf_vgg.predict(x_test_rf)
classes = ['Negative', 'Positive']
conf_mat = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix = conf_mat, display_labels=classes)
cl_report = classification_report(y_test, y_pred, output_dict=True)
disp.plot()
plt.savefig("./rf-cm-vgg.png", bbox_inches="tight")
print(cl_report)
pickle.dump(cl_report, open("rf-cl-report.sav", "rb"))


In [None]:
plot_train_val_accuracy(model_new, hist, (x_train, y_train_en), (x_test, y_test_en), epoch=30)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(5)
knn.fit(x_train_rf, y_train)

In [None]:
pred = knn.predict(x_train_rf)
cnf = confusion_matrix(y_train, pred)
print(cnf)

In [None]:
pred = knn.predict(x_test_rf)
cnf = confusion_matrix(y_test, pred)
print(cnf)

In [None]:
clf = classification_report(y_test, pred)
print(clf)