In [None]:
import pandas as pd
import matplotlib as mat
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
%matplotlib inline

pd.options.display.max_colwidth = 100

import random
import os

from numpy.random import seed
seed(42)

random.seed(42)
os.environ['PYTHONHASHSEED'] = str(42)
os.environ['TF_DETERMINISTIC_OPS'] = '1'

from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import accuracy_score

import tensorflow as tf
print(tf.test.is_gpu_available())
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print(tf.__version__)
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import callbacks
from tensorflow.keras.models import Modelfrom tensorflow.keras.preprocessing.image import ImageDataGenerator

import glob


from tensorflow.random import set_seed
set_seed(42)

import warnings
warnings.filterwarnings('ignore')

In [None]:
IMG_SIZE = 150
BATCH = 32
SEED = 42
main_path_original = "./resize_xray/chest_xray"
main_path_gan = "./resize_xray/chest_xray_gan"
main_path_diffusion = "./resize_xray/chest_xray_diffusion"

train_path = os.path.join(main_path_original,"train")
val_path=os.path.join(main_path_original,"val")
test_path=os.path.join(main_path_original,"test")
og_train_normal = glob.glob(train_path+"/NORMAL/*.jpeg")
og_train_pneumonia = glob.glob(train_path+"/PNEUMONIA/*.jpeg")
og_train_covid19 = glob.glob(train_path+"/COVID19/*.jpg")
og_train_tb = glob.glob(train_path+"/TURBERCULOSIS/*.png")

test_normal = glob.glob(test_path+"/NORMAL/*.jpeg")
test_pneumonia = glob.glob(test_path+"/PNEUMONIA/*.jpeg")
test_covid19 = glob.glob(test_path+"/COVID19/*.jpg")
test_tb = glob.glob(test_path+"/TURBERCULOSIS/*.png")

val_normal = glob.glob(val_path+"/NORMAL/*.jpeg")
val_pneumonia = glob.glob(val_path+"/PNEUMONIA/*.jpeg")
val_covid19 = glob.glob(val_path+"/COVID19/*.jpg")
val_tb = glob.glob(val_path+"/TURBERCULOSIS/*.png")

gan_train_normal = glob.glob(main_path_gan+"/NORMAL/*.png")
gan_train_covid = glob.glob(main_path_gan+"/COVID19/*.png")
gan_train_tb = glob.glob(main_path_gan+"/TB/*.png")

diffusion_train_normal = glob.glob(main_path_diffusion+"/NORMAL/*.png")
diffusion_train_covid = glob.glob(main_path_diffusion+"/COVID19/*.png")
diffusion_train_tb = glob.glob(main_path_diffusion+"/TB/*.png")

og_train_list = [x for x in og_train_normal]
og_train_list.extend([x for x in og_train_pneumonia] + [x for x in og_train_covid19] + [x for x in og_train_tb])

og_df_train = pd.DataFrame(np.concatenate([['Normal']*len(og_train_normal) , ['Pneumonia']*len(og_train_pneumonia), ['COVID-19']*len(og_train_covid19), ['Tuberculosis']*len(og_train_tb)]), columns = ['class'])
og_df_train['image'] = [x for x in og_train_list]

test_list = [x for x in test_normal]
test_list.extend([x for x in test_pneumonia]+ [x for x in test_covid19] + [x for x in test_tb])

df_test = pd.DataFrame(np.concatenate([['Normal']*len(test_normal) , ['Pneumonia']*len(test_pneumonia), ['COVID-19']*len(test_covid19), ['Tuberculosis']*len(test_tb)]), columns = ['class'])
df_test['image'] = [x for x in test_list]

val_list = [x for x in val_normal]
val_list.extend([x for x in val_pneumonia]+ [x for x in val_covid19] + [x for x in val_tb])

df_val = pd.DataFrame(np.concatenate([['Normal']*len(val_normal) , ['Pneumonia']*len(val_pneumonia), ['COVID-19']*len(val_covid19), ['Tuberculosis']*len(val_tb)]), columns = ['class'])
df_val['image'] = [x for x in val_list]

og_df_train['class'].value_counts()

class
Pneumonia       3875
Normal          1341
Tuberculosis     650
COVID-19         460
Name: count, dtype: int64

In [None]:
df_val['class'].value_counts()

class
Tuberculosis    12
COVID-19        10
Normal           8
Pneumonia        8
Name: count, dtype: int64

In [None]:
df_test['class'].value_counts()

class
Pneumonia       390
Normal          234
COVID-19        106
Tuberculosis     41
Name: count, dtype: int64

In [None]:
#getting df for original + gan

gan_train_list = [x for x in gan_train_normal]
gan_train_list.extend([x for x in gan_train_covid] + [x for x in gan_train_tb])

gan_df_train = pd.DataFrame(np.concatenate([['Normal']*len(gan_train_normal) , ['COVID-19']*len(gan_train_covid), ['Tuberculosis']*len(gan_train_tb)]), columns = ['class'])
gan_df_train['image'] = [x for x in gan_train_list]

gan_df_train = pd.concat([og_df_train, gan_df_train], ignore_index=True)
gan_df_train = gan_df_train.sample(frac=1, random_state=42).reset_index(drop=True)

print(gan_df_train['class'].value_counts())
display(diffusion_df_train)

class
Tuberculosis    3875
Normal          3875
COVID-19        3875
Pneumonia       3875
Name: count, dtype: int64


Unnamed: 0,class,image
0,Pneumonia,./resize_xray/chest_xray\train/PNEUMONIA\person826_bacteria_2737.jpeg
1,COVID-19,./resize_xray/chest_xray\train/COVID19\COVID19(327).jpg
2,COVID-19,./resize_xray/chest_xray_gan/COVID19\image_1529.png
3,Normal,./resize_xray/chest_xray\train/NORMAL\NORMAL2-IM-1350-0001.jpeg
4,Normal,./resize_xray/chest_xray\train/NORMAL\IM-0553-0001-0001.jpeg
...,...,...
15495,Tuberculosis,./resize_xray/chest_xray_gan/TB\image_468.png
15496,Pneumonia,./resize_xray/chest_xray\train/PNEUMONIA\person1711_bacteria_4527.jpeg
15497,COVID-19,./resize_xray/chest_xray_gan/COVID19\image_1294.png
15498,Tuberculosis,./resize_xray/chest_xray_gan/TB\image_879.png


In [None]:
#create df for original + diffusion

diffusion_train_list = [x for x in diffusion_train_normal]
diffusion_train_list.extend([x for x in diffusion_train_covid] + [x for x in diffusion_train_tb])

diffusion_df_train = pd.DataFrame(np.concatenate([['Normal']*len(diffusion_train_normal) , ['COVID-19']*len(diffusion_train_covid), ['Tuberculosis']*len(diffusion_train_tb)]), columns = ['class'])
diffusion_df_train['image'] = [x for x in diffusion_train_list]

diffusion_df_train = pd.concat([og_df_train, diffusion_df_train], ignore_index=True)
diffusion_df_train = gan_df_train.sample(frac=1, random_state=42).reset_index(drop=True)

print(diffusion_df_train['class'].value_counts())
display(diffusion_df_train)

class
Pneumonia       3875
COVID-19        3875
Normal          3875
Tuberculosis    3875
Name: count, dtype: int64


Unnamed: 0,class,image
0,Pneumonia,./resize_xray/chest_xray\train/PNEUMONIA\person826_bacteria_2737.jpeg
1,COVID-19,./resize_xray/chest_xray\train/COVID19\COVID19(327).jpg
2,COVID-19,./resize_xray/chest_xray_gan/COVID19\image_1529.png
3,Normal,./resize_xray/chest_xray\train/NORMAL\NORMAL2-IM-1350-0001.jpeg
4,Normal,./resize_xray/chest_xray\train/NORMAL\IM-0553-0001-0001.jpeg
...,...,...
15495,Tuberculosis,./resize_xray/chest_xray_gan/TB\image_468.png
15496,Pneumonia,./resize_xray/chest_xray\train/PNEUMONIA\person1711_bacteria_4527.jpeg
15497,COVID-19,./resize_xray/chest_xray_gan/COVID19\image_1294.png
15498,Tuberculosis,./resize_xray/chest_xray_gan/TB\image_879.png


In [None]:
#need to split the train and validation data
#ds_train, ds_val,

IMG_SIZE = 150
BATCH = 32
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)



base_model = tf.keras.applications.ResNet152V2(
    weights='imagenet',
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
    include_top=False)

base_model.trainable = False

def get_pretrained():

    #Input shape = [width, height, color channels]
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))

    x = base_model(inputs)

    # Head
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.1)(x)

    #Final Layer (Output)
    output = layers.Dense(1, activation='sigmoid')(x)

    model = keras.Model(inputs=[inputs], outputs=output)

    return model
keras.backend.clear_session()

model_pretrained = get_pretrained()
model_pretrained.compile(loss='binary_crossentropy'
              , optimizer = keras.optimizers.Adam(learning_rate=5e-5), metrics='binary_accuracy')

model_pretrained.summary()
history = model_pretrained.fit(ds_train,
          batch_size = BATCH, epochs = 50,
          validation_data=ds_val,
          #callbacks=[early_stopping, plateau],
          steps_per_epoch=(len(train_df)/BATCH),
          validation_steps=(len(val_df)/BATCH))

In [None]:
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history.epoch, y = history.history['loss'])
sns.lineplot(x = history.epoch, y = history.history['val_loss'])
ax.set_title('Learning Curve (Loss)')
ax.set_ylabel('Loss')
ax.set_xlabel('Epoch')
ax.set_ylim(0, 0.5)
ax.legend(['train', 'val'], loc='best')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(20,8))
sns.lineplot(x = history.epoch, y = history.history['binary_accuracy'])
sns.lineplot(x = history.epoch, y = history.history['val_binary_accuracy'])
ax.set_title('Learning Curve (Accuracy)')
ax.set_ylabel('Accuracy')
ax.set_xlabel('Epoch')
ax.set_ylim(0.80, 1.0)
ax.legend(['train', 'val'], loc='best')
plt.show()

In [None]:
score = model_pretrained.evaluate(ds_val, steps = len(val_df)/BATCH, verbose = 0)
print('Val loss:', score[0])
print('Val accuracy:', score[1])

In [None]:
score = model_pretrained.evaluate(ds_test, steps = len(df_test), verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])