In [1]:
import numpy as np 
import os
import pandas as pd
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import auc
import keras
import tensorflow as tf
from keras import backend as K
from keras import metrics
from keras.regularizers import l2
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, InputLayer, Activation
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.metrics import AUC
from keras.optimizers import Adam

In [2]:
seed_value= 42

In [3]:
## Set file paths to image files
#project_path = "C:/Users/Zeel soni/Downloads/archive"
train_path = "C:/Users/Zeel soni/Downloads/archive/chest_xray/train"
val_path =  "C:/Users/Zeel soni/Downloads/archive/chest_xray/val"
test_path = "C:/Users/Zeel soni/Downloads/archive/chest_xray/test"

## Set up hyperparameters that will be used later
hyper_dimension = 64
hyper_batch_size = 128
hyper_epochs = 100
hyper_channels = 1
hyper_mode = 'grayscale'

## Generate batches of image data (train, validation, and test) with data augmentation
train_datagen = ImageDataGenerator(rescale=1.0/255.0, 
                                   shear_range = 0.2,
                                   zoom_range = 0.2, 
                                   horizontal_flip = True)
val_datagen = ImageDataGenerator(rescale=1.0/255.0) 
test_datagen = ImageDataGenerator(rescale=1.0/255.0) 


train_generator = train_datagen.flow_from_directory(directory = train_path, 
                                                    target_size = (hyper_dimension, hyper_dimension),
                                                    batch_size = hyper_batch_size, 
                                                    color_mode = hyper_mode,
                                                    class_mode = 'binary', 
                                                    seed = 42)
val_generator = val_datagen.flow_from_directory(directory = val_path, 
                                                 target_size = (hyper_dimension, hyper_dimension),
                                                 batch_size = hyper_batch_size, 
                                                 class_mode = 'binary',
                                                 color_mode = hyper_mode,
                                                 shuffle=False,
                                                 seed = 42)
test_generator = test_datagen.flow_from_directory(directory = test_path, 
                                                 target_size = (hyper_dimension, hyper_dimension),
                                                 batch_size = hyper_batch_size, 
                                                 class_mode = 'binary',
                                                 color_mode = hyper_mode,
                                                 shuffle=False,
                                                 seed = 42)

test_generator.reset()

Found 5216 images belonging to 2 classes.
Found 16 images belonging to 2 classes.
Found 624 images belonging to 2 classes.


In [4]:
cnn = Sequential()
cnn.add(InputLayer(input_shape=(hyper_dimension, hyper_dimension, hyper_channels)))#Input Layer

cnn.add(Conv2D(filters=32, kernel_size=3, activation='relu'))#Hidden Layers
cnn.add(MaxPooling2D(pool_size=(2,2)))

cnn.add(Conv2D(filters=32, kernel_size=3, activation='relu'))#Hidden Layers
cnn.add(MaxPooling2D(pool_size=(2,2)))

cnn.add(Conv2D(filters=32, kernel_size=3, activation='relu'))#Hidden Layers
cnn.add(MaxPooling2D(pool_size=(2,2)))

cnn.add(Flatten())

cnn.add(Dense(activation='relu', units=128))
cnn.add(Dense(activation='sigmoid', units=1))#Output Layer

cnn.compile(optimizer= 'adam', loss='binary_crossentropy', metrics=[AUC()])
cnn_model = cnn.fit(
    train_generator,
    steps_per_epoch=5,  # Reduce for debugging
    epochs=20,           # Fewer epochs for debugging
    validation_data=val_generator,
    validation_steps=2, # Reduce for debugging
    verbose=1
)




Epoch 1/20


  self._warn_if_super_not_called()


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 3s/step - auc: 0.4986 - loss: 0.6629 - val_auc: 0.3359 - val_loss: 0.9819
Epoch 2/20


  self.gen.throw(typ, value, traceback)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2s/step - auc: 0.4293 - loss: 0.6643 - val_auc: 0.3984 - val_loss: 0.7320
Epoch 3/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2s/step - auc: 0.5163 - loss: 0.5949 - val_auc: 0.3672 - val_loss: 0.8388
Epoch 4/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2s/step - auc: 0.5112 - loss: 0.5849 - val_auc: 0.3828 - val_loss: 0.8328
Epoch 5/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2s/step - auc: 0.5746 - loss: 0.5924 - val_auc: 0.5547 - val_loss: 0.7386
Epoch 6/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2s/step - auc: 0.6518 - loss: 0.5259 - val_auc: 0.5547 - val_loss: 0.8755
Epoch 7/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2s/step - auc: 0.7377 - loss: 0.5428 - val_auc: 0.7734 - val_loss: 0.6971
Epoch 8/20
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 234ms/step - auc: 0.8603 - loss: 0.5009 - val_auc: 0.

In [5]:
# Train & validation loss
train_loss = cnn_model.history['loss']
val_loss = cnn_model.history['val_loss']

# Train & validation AUC
train_auc = cnn_model.history[list(cnn_model.history.keys())[3]]
val_auc = cnn_model.history[list(cnn_model.history.keys())[1]]

# True labels and predictions
y_true = test_generator.classes
Y_pred = cnn.predict(test_generator, steps=len(test_generator))
y_pred = (Y_pred > 0.5).flatten()
y_pred_prob = Y_pred.flatten()

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step


In [7]:
## Summary Statistics
cm = confusion_matrix(y_true, y_pred)
TN, FP, FN, TP = cm.ravel()
accuracy = (TP + TN) / np.sum(cm)
precision = TP / (TP + FP)
recall = TP / (TP + FN)
specificity = TN / (TN + FP)
f1 = 2 * precision * recall / (precision + recall)
# Print Summary
print(f'[Summary Statistics]\n'
      f'Accuracy = {accuracy:.2%} | Precision = {precision:.2%} | '
      f'Recall = {recall:.2%} | Specificity = {specificity:.2%} | '
      f'F1 Score = {f1:.2%}')

[Summary Statistics]
Accuracy = 86.38% | Precision = 86.75% | Recall = 92.31% | Specificity = 76.50% | F1 Score = 89.44%
