In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random
import cv2
import matplotlib.pyplot as plt

# Dl framwork - tensorflow, keras a backend 
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization 
from tensorflow.keras.layers import Conv2D, SeparableConv2D, MaxPool2D, LeakyReLU, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

from os import listdir
from os.path import isfile, join
from PIL import Image
import glob
#from helpers import load_folder
#from helpers import gen_data

#data_folder = '/home/wabbas/shared/datasets/covid19/chest_xray'
# data folder should contain 3 more folders named 'train', 'val' and 'test'
# each of these sub-folders should contain two folders, one containing images of control group named "NORMAL" 
# the other containing x-ray images of pneumonia patients, named "PNEUMONIA"

In [None]:
import tensorflow as tf
config = tf.compat.v1.ConfigProto( device_count = {'GPU': 1 , 'CPU': 3}, log_device_placement=True ) 
sess = tf.compat.v1.Session(config=config) 

In [None]:
def crop_and_concat(x1,x2):
    with tf.name_scope("crop_and_concat"):
        return tf.concat([x1, x2], 3)

In [None]:
train_folder = os.path.join(data_folder,'train')
test_folder = os.path.join(data_folder,'test')
val_folder = os.path.join(data_folder,'val')
train_normal_path = os.path.join(train_folder,'NORMAL')
train_pneumonia_path = os.path.join(train_folder,'PNEUMONIA')

val_normal_path = os.path.join(val_folder,'NORMAL')
val_pneumonia_path = os.path.join(val_folder,'PNEUMONIA')

test_normal_path = os.path.join(test_folder,'NORMAL')
test_pneumonia_path = os.path.join(test_folder,'PNEUMONIA')

In [None]:
input_path = "/home/wabbas/shared/datasets/covid19/chest_xray/"

# data folder should contain 3 more folders named 'train', 'val' and 'test'
# each of these sub-folders should contain two folders, one containing images of control group named "NORMAL" 
# the other containing x-ray images of pneumonia patients, named "PNEUMONIA"

    
for _set in ['train', 'test', 'val']:
    nrml = len(os.listdir(input_path + _set + '/NORMAL'))
    pnm = len(os.listdir(input_path + _set + '/PNEUMONIA'))
    print('{}, Normal images: {}, Pneumonia images: {}'.format(_set, nrml, pnm))

In [None]:
def process_data(img_dims, batch_size):
    # Data generation objects - thorugh rescalling, veticle flip, zoom range
    train_datagen = ImageDataGenerator(
                        rescale = 1./255,
                      # featurewise_center=True,
                      # featurewise_std_normalization=True,
                        zoom_range = 0.5,
                        vertical_flip = True,
                        horizontal_flip=True,
                        featurewise_center=True,
                        featurewise_std_normalization=True,
                        rotation_range=30,
                        width_shift_range=0.2,
                        height_shift_range=0.2)
    
    test_datagen = ImageDataGenerator(
                          zoom_range = 0.5,
                        vertical_flip = True,
                        horizontal_flip=True,
                        featurewise_center=True,
                        featurewise_std_normalization=True,
                        rotation_range=30,
                        width_shift_range=0.2,
                        height_shift_range=0.2,
                        rescale=1./255)
    
    # This is fed to the network in the specified batch sizes and image dimensions
    train_gen = train_datagen.flow_from_directory(
                                directory = train_folder, 
                                target_size = (img_dims, img_dims), 
                                batch_size = batch_size, 
                                class_mode = 'binary', 
                                shuffle=True)

    test_gen = test_datagen.flow_from_directory(
                                directory=test_folder, 
                                target_size=(img_dims, img_dims), 
                                batch_size=batch_size, 
                                class_mode='binary', 
                                shuffle=True)
    
    # Making predictions off of the test set in one batch size
    # This is useful to be able to get the confusion matrix
    test_data = []
    test_labels = []

    for cond in ['/NORMAL/', '/PNEUMONIA/']:
        for img in (os.listdir(os.path.join(test_folder + cond))):
            img = plt.imread(os.path.join(test_folder + cond + img))
            img = cv2.resize(img, (img_dims, img_dims))
            img = np.dstack([img, img, img])
            img = img.astype('float32') / 255
            if cond=='/NORMAL/':
                label = 0
            elif cond=='/PNEUMONIA/':
                label = 1
            test_data.append(img)
            test_labels.append(label)
        
    test_data = np.array(test_data)
    test_labels = np.array(test_labels)
    
    return train_gen, test_gen, test_data, test_labels

In [None]:
img_dims = 256
epochs = 20
batch_size = 16

# Getting the data
train_gen, test_gen, test_data, test_labels = process_data(img_dims, batch_size)

In [None]:
def conv_block(inputs,num_filters,bn):
    x1 = SeparableConv2D(filters=num_filters, kernel_size=(3, 3), activation='relu', padding='same')(inputs)
    x1 = SeparableConv2D(filters=num_filters, kernel_size=(5, 5), activation='relu', padding='same')(x1)
    x1 = SeparableConv2D(filters=num_filters, kernel_size=(7, 7), activation='relu', padding='same')(x1)

    x2 = SeparableConv2D(filters=num_filters, kernel_size=(3, 3), activation='relu', padding='same')(x1)
    x2 = SeparableConv2D(filters=4, kernel_size=(5, 5), activation='relu', padding='same')(x2)

    x3 = SeparableConv2D(filters=num_filters, kernel_size=(3, 3), activation='relu', padding='same')(inputs)

    x = crop_and_concat(x1,x2)
    x = crop_and_concat(x,x3)
    x = MaxPool2D(pool_size=(2,2))(x)
    if bn:
        x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, trainable=True) (x)
    #x = Activation(tf.nn.sigmoid)(x)
    return x

    
    
    
inputs = Input(shape=(img_dims, img_dims, 3))

x = conv_block(inputs,16,0)
x = conv_block(x,32,1)
x = conv_block(x,64,1)
x = conv_block(x,32,1)
x = conv_block(x,16,1)





# FC layer
x = Flatten()(x)
x = Dense(units=128, activation='relu')(x)
x = Dropout(rate=0.5)(x)
x = Dense(units=32, activation='relu')(x)
x = Dropout(rate=0.5)(x)



# Output layer
output = Dense(units=1, activation='sigmoid')(x)

# Creating model and compiling
model = Model(inputs=inputs, outputs=output)
adamc = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(optimizer= adamc, loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
checkpoint = ModelCheckpoint(filepath='best_weights.hdf5', save_best_only=True, save_weights_only=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, verbose=2, mode='max')
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.1, patience=1, mode='min')

In [None]:
model.summary()

In [None]:
history = model.fit_generator(
           train_gen, steps_per_epoch=train_gen.samples // batch_size, 
           epochs=epochs, 
           validation_data=test_gen, 
           validation_steps=test_gen.samples // batch_size,
           callbacks=[checkpoint, lr_reduce])

In [None]:

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix
from mlxtend.plotting import plot_confusion_matrix

preds = model.predict(test_data)

accuracy = accuracy_score(test_labels, np.round(preds))*100
conf_mat = confusion_matrix(test_labels, np.round(preds))
true_negative, false_postive, false_negative, true_posiitve = conf_mat.ravel()

plot_confusion_matrix(conf_mat,figsize=(12,8), hide_ticks=True,cmap=plt.cm.Blues)
plt.xticks(range(2), ['Normal', 'Pneumonia'], fontsize=16)
plt.yticks(range(2), ['Normal', 'Pneumonia'], fontsize=16)
plt.show()

In [None]:
model.save('xray-pneumona-skip-convolution.h5')