In [2]:
# Import of basic libraries
import tensorflow as tf
from glob import glob
import pandas as pd
import cv2
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from builtins import range, input
from tensorflow import keras
from tensorflow.keras.layers import Input, Lambda, Dense, Flatten, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.applications import DenseNet201
from tensorflow.keras.applications.densenet import preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import confusion_matrix, roc_curve

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

### Classification

In [3]:
# Define paths
covid_path = ".\\kaggle\\COVID"
noncovid_path = ".\\kaggle\\non-COVID"

# Use 'glob' to retrieve all pathnames 
covid_files = glob(covid_path + '\*')
noncovid_files = glob(noncovid_path + '\*')

In [4]:
# Define the size to which images are to be resized
IMAGE_SIZE = [224, 224]

# Fetch images and Class labels from files
covid_labels = []
noncovid_labels = []

covid_images=[]
noncovid_images=[]

# Covid
for i in range(len(covid_files)):
  image = cv2.imread(covid_files[i])             # read file 
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # arrange format as per kera
  image = cv2.resize(image, IMAGE_SIZE)          # resize as per model
  covid_images.append(image)                     # append image
  covid_labels.append('CT_COVID')                # append class label

# Non-Covid
for i in range(len(noncovid_files)):
  image = cv2.imread(noncovid_files[i])
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  image = cv2.resize(image, IMAGE_SIZE)
  noncovid_images.append(image)
  noncovid_labels.append('CT_NonCOVID')

In [5]:
# Normalization process - Convert to array and normalize to interval of [0,1] 
covid_images = np.array(covid_images) / 255
noncovid_images = np.array(noncovid_images) / 255

In [7]:
# Split into train and test sets for both types of images
covid_x_train, covid_x_test, covid_y_train, covid_y_test = train_test_split(
    covid_images, ['CT_Covid' for file in covid_files], test_size=0.2)

noncovid_x_train, noncovid_x_test, noncovid_y_train, noncovid_y_test = train_test_split(
    noncovid_images, ['CT_NonCovid' for file in noncovid_files], test_size=0.2)

# Merge sets for both types of images
X_train = np.concatenate((noncovid_x_train, covid_x_train), axis=0)
X_test = np.concatenate((noncovid_x_test, covid_x_test), axis=0)

y_train = np.concatenate((noncovid_y_train, covid_y_train), axis=0)
y_test = np.concatenate((noncovid_y_test, covid_y_test), axis=0)

# Make labels into categories - either 0 or 1
y_train = LabelBinarizer().fit_transform(y_train)
y_train = to_categorical(y_train)

y_test = LabelBinarizer().fit_transform(y_test)
y_test = to_categorical(y_test)

In [26]:
# Create a folder with the validation data - This code snippet was used only once
import os
import shutil
for index, image in enumerate(X_train):
  shutil.copyfile(image, os.path.join(r'.\kaggle\val_set_kaggle', os.path.basename(image)))

In [None]:
# look at random images (covid and non-covid)
def plot_images(images, title):
    nrows, ncols = 5, 8
    figsize = [10, 6]

    fig, ax = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize, facecolor=(1, 1, 1))

    for i, axi in enumerate(ax.flat):
        axi.imshow(images[i])
        axi.set_axis_off()

    plt.suptitle(title, fontsize=24)
    plt.tight_layout(pad=0.2, rect=[0, 0, 1, 0.9])
    plt.show()
    
plot_images(covid_x_train, 'X_train')
plot_images(covid_x_test, 'X_test')

In [None]:
# Find the amount of classes and calculate the train and test data
unq_values_train, counts_train = np.unique(y_train, 
                                           return_counts=True 
                                          )
unq_values_test, counts_test = np.unique(y_test, return_counts=True)

# Graph for train data
fig, axis = plt.subplots(nrows=1, ncols=2,
                         sharey=True, 
                         figsize=(20,7))

axis[0].bar(unq_values_train, 
            counts_train, 
            align="center", 
            alpha=0.4 
           )
axis[0].set_xlabel("Class") 
axis[0].set_ylabel("Frequency") 
axis[0].set_title("Train Set") 

# Graph for test data
axis[1].bar(unq_values_test, counts_test, align="center", alpha=0.4)
axis[1].set_xlabel("Class")
axis[1].set_title("Test Set")

In [10]:
# Building Model
# Pre-trained DenseNet201
densenet201Model = DenseNet201(weights="imagenet", include_top=False,
    input_tensor=Input(shape=(224, 224, 3)))

outputs = densenet201Model.output

# Add the extra layers
outputs = GlobalAveragePooling2D()(outputs)
outputs = Flatten(name="flatten")(outputs)
outputs = Dense(128, activation='relu')(outputs)
outputs = Dropout(0.2)(outputs)
outputs = Dense(64, activation='relu')(outputs)
outputs = Dropout(0.3)(outputs)
outputs = Dense(2, activation="sigmoid")(outputs)

model = Model(inputs=densenet201Model.input, outputs=outputs)

for layer in densenet201Model.layers:
    layer.trainable = False

# Define loss function, optimizer and metric
model.compile(
        loss='categorical_crossentropy', 
        optimizer='adam', 
        metrics=['accuracy']
)

In [11]:
# Image Augmentation
# To train on images at different positions, angles, flips, e.t.c.
train_aug = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

In [None]:
# Τraining config:
epochs = 500
batch_size = 32

# Train the model
history = model.fit(train_aug.flow(X_train, y_train, batch_size=batch_size),
                    validation_data=(X_test, y_test),
                    validation_steps=len(X_test) / batch_size,
                    steps_per_epoch=len(X_train) / batch_size,
                    epochs=epochs)

In [13]:
# Save Model and Weights
model.save('densenet201_ct.h5')
model.save_weights('densenet201_weights_ct.hdf5')

In [14]:
# Load saved model
model = load_model('densenet201_ct.h5')

In [None]:
# Print the structure of the model
model.summary()

In [None]:
batch_size = 32
y_pred = model.predict(X_test, batch_size=batch_size)

### Make some predictions


In [None]:
prediction=y_pred[0:5]
for index, probability in enumerate(prediction):
  if probability[1] > 0.5:
        plt.title('%.2f' % (probability[1]*100) + '% COVID')
  else:
        plt.title('%.2f' % ((1-probability[1])*100) + '% NonCOVID')
  plt.imshow(X_test[index])
  plt.show()

In [18]:
# Convert to Binary classes
y_pred_bin = np.argmax(y_pred, axis=1)
y_test_bin = np.argmax(y_test, axis=1)

In [None]:
# Create ROC curve
fpr, tpr, thresholds = roc_curve(y_test_bin, y_pred_bin)
plt.plot(fpr, tpr)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.0])
plt.rcParams['font.size'] = 12
plt.title('ROC curve for our model')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.grid(True)

In [None]:
df = pd.DataFrame({"Y_Actual": y_test_bin, 
                  "Y_Predicted": y_pred_bin
                  }
                 )

# Create the Confusion Matrix
conf_mat = pd.crosstab(df["Y_Actual"],     # actual classes
                       df["Y_Predicted"],  # predicted
                       rownames=["Actual"],
                       colnames=["Predicted"],
                       margins=True
                      )  

print(f"The confusion matrix of the classification is: \n{conf_mat}")
print("-"*100)

conf_mat = conf_mat.drop("All", axis=0) 
conf_mat = conf_mat.drop("All", axis=1) 

# Create gragh
f, axis = plt.subplots(nrows=1, ncols=1, figsize=(10, 6)) 
sns.heatmap(conf_mat, 
            annot=True, 
            fmt="d", 
            cmap="YlGnBu", # initialize color map (YlGnBu - > Yello Green Blue)
            ax=axis
            )
axis.set_xlabel("\nPredicted Class")
axis.set_ylabel("Actual Class")
axis.set_title("Confusion Matrix of the Classification")

plt.show()

In [None]:
import sklearn.metrics
from sklearn.metrics import classification_report

# Calculate the accuracy
test_accuracy_nn = sklearn.metrics.accuracy_score(y_test_bin, y_pred_bin)

print("The Accuracy of the Neural Network on the Test Data is :", test_accuracy_nn)

In [None]:
# Print the classification report
print(classification_report(y_test_bin, y_pred_bin))

In [None]:
fig, axis = plt.subplots(nrows=1,ncols=2, figsize=(20,6))

# Create the first gragh (Loss -> Cross-Entropy)
axis[0].plot(history.epoch, history.history['loss']) # Cross Entropy - Training
axis[0].plot(history.epoch, history.history['val_loss']) # Cross Entropy - Validation
axis[0].set_xlabel("Epochs") 
axis[0].set_ylabel("Value") 
axis[0].legend(["Loss", "Val_Loss"]) 
axis[0].set_title("Training Process - Loss") 

# Create the second gragh (Accuracy)
axis[1].plot(history.epoch, history.history['accuracy'])
axis[1].plot(history.epoch, history.history['val_accuracy'])
axis[1].set_xlabel("Epochs") 
axis[1].set_ylabel("Value") 
axis[1].legend(["Accuracy", "Val_Accuracy"])
axis[1].set_title("Training Process - Accuracy")
plt.show()