In [1]:
import os
import zipfile
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix

In [2]:
# *Unzip the dataset*
zip_file_path = 'X_ray.zip'
extracted_folder_path = 'extracted_data'

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder_path)

# *List the extracted files*
print(f"Extracted files: {os.listdir(extracted_folder_path)}")

Extracted files: ['chest_xray']


In [3]:
# *Define parameters*
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
EPOCHS = 10

In [4]:
# *Preprocess the data*
train_datagen = ImageDataGenerator(rescale=1.0/255, validation_split=0.2)

train_generator = train_datagen.flow_from_directory(
    'extracted_data/chest_xray/train',  # Update this path
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='training'
)

validation_generator = train_datagen.flow_from_directory(
    'extracted_data/chest_xray/val',  # Update this path
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='binary',
    subset='validation'
)

Found 4173 images belonging to 2 classes.
Found 2 images belonging to 2 classes.


In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
# *Build the ANN Model*
ann_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [7]:
# *Compile the model*
ann_model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])

# *Train the model*
history = ann_model.fit(
    train_generator,
    epochs=EPOCHS,
    steps_per_epoch=train_generator.n // BATCH_SIZE,
    validation_data=validation_generator,
    validation_steps=validation_generator.n // BATCH_SIZE
)

Epoch 1/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m150s[0m 1s/step - accuracy: 0.8027 - loss: 0.5117 - val_accuracy: 1.0000 - val_loss: 0.0910
Epoch 2/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 1.0000 - loss: 0.0395 - val_accuracy: 1.0000 - val_loss: 0.0933
Epoch 3/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 993ms/step - accuracy: 0.9638 - loss: 0.1030 - val_accuracy: 0.5000 - val_loss: 0.5378
Epoch 4/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9688 - loss: 0.0414 - val_accuracy: 1.0000 - val_loss: 0.2384
Epoch 5/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 959ms/step - accuracy: 0.9738 - loss: 0.0705 - val_accuracy: 0.5000 - val_loss: 0.5524
Epoch 6/10
[1m130/130[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 1.0000 - loss: 0.0139 - val_accuracy: 0.5000 - val_loss: 0.5530
Epoch 7/10
[1m130/

In [8]:
from keras.layers import Dense, Activation, Dropout

In [9]:
# ------------------ CNN Model ------------------
cnn_model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3)),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [10]:
# Compile CNN
cnn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [11]:
# Train CNN
cnn_history = cnn_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator
)

Epoch 1/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 959ms/step - accuracy: 0.7856 - loss: 0.5657 - val_accuracy: 0.5000 - val_loss: 0.5848
Epoch 2/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 972ms/step - accuracy: 0.9430 - loss: 0.1512 - val_accuracy: 1.0000 - val_loss: 0.1942
Epoch 3/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 1s/step - accuracy: 0.9600 - loss: 0.1004 - val_accuracy: 1.0000 - val_loss: 0.1692
Epoch 4/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 1s/step - accuracy: 0.9701 - loss: 0.0904 - val_accuracy: 1.0000 - val_loss: 0.0426
Epoch 5/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 983ms/step - accuracy: 0.9717 - loss: 0.0711 - val_accuracy: 1.0000 - val_loss: 0.0466
Epoch 6/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 967ms/step - accuracy: 0.9716 - loss: 0.0798 - val_accuracy: 1.0000 - val_loss: 0.2832
Epoch 7/10

In [12]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Flatten, Dense

In [13]:
from tensorflow.keras.layers import GlobalAveragePooling2D


In [14]:
# ------------------ Transfer Learning (VGG16) ------------------
vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
vgg_base.trainable = False

vgg_model = Sequential([
    vgg_base,
    GlobalAveragePooling2D(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

In [15]:
# Compile VGG16-based model
vgg_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [16]:
# Train VGG16-based model
vgg_history = vgg_model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=validation_generator
)

Epoch 1/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m321s[0m 2s/step - accuracy: 0.7400 - loss: 0.5349 - val_accuracy: 1.0000 - val_loss: 0.3460
Epoch 2/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m309s[0m 2s/step - accuracy: 0.9102 - loss: 0.2447 - val_accuracy: 0.5000 - val_loss: 0.4663
Epoch 3/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m333s[0m 3s/step - accuracy: 0.9354 - loss: 0.1800 - val_accuracy: 0.5000 - val_loss: 0.4490
Epoch 4/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m323s[0m 2s/step - accuracy: 0.9464 - loss: 0.1596 - val_accuracy: 0.5000 - val_loss: 0.4685
Epoch 5/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m320s[0m 2s/step - accuracy: 0.9508 - loss: 0.1371 - val_accuracy: 0.5000 - val_loss: 0.5100
Epoch 6/10
[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m334s[0m 3s/step - accuracy: 0.9514 - loss: 0.1248 - val_accuracy: 0.5000 - val_loss: 0.3520
Epoch 7/10
[1m131/131

In [21]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense
from tensorflow.keras import mixed_precision
import numpy as np
import gc

# Enable mixed precision (float16)
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

In [22]:
# Assuming temporal sequences of 5 images per patient
SEQUENCE_LENGTH = 5
IMG_SIZE = (64, 64)  # Reduced image size
BATCH_SIZE = 8

In [23]:
# Function to prepare data using a generator
def prepare_lstm_data(generator, sequence_length):
    X, y = [], []
    for batch_images, batch_labels in generator:
        for i in range(len(batch_images) - sequence_length + 1):
            X.append(batch_images[i:i+sequence_length])
            y.append(batch_labels[i+sequence_length-1])
        # Clear memory after processing each batch
        gc.collect()
    return np.array(X, dtype=np.float16), np.array(y, dtype=np.float16)

In [None]:
# Assuming train_generator and validation_generator are already defined
#train_data, train_labels = prepare_lstm_data(train_generator, SEQUENCE_LENGTH)

In [None]:
#val_data, val_labels = prepare_lstm_data(validation_generator, SEQUENCE_LENGTH)

# Define the model
lstm_model = Sequential([
    TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(SEQUENCE_LENGTH, IMG_SIZE[0], IMG_SIZE[1], 3)),
    TimeDistributed(MaxPooling2D((2, 2))),
    TimeDistributed(Flatten()),
    LSTM(64),
    Dense(1, activation='sigmoid')
])

# Compile the model
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
lstm_model.fit(train_data, train_labels, validation_data=(val_data, val_labels), epochs=10, batch_size=BATCH_SIZE)

---Ignore----

# from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# ------------------ LSTM Model ------------------
# Prepare data for LSTM

def prepare_lstm_data(generator, sequence_length):
    data, labels = [], []
    batch_data, batch_labels = [], []
    for batch in generator:
        batch_data.append(batch[0])
        batch_labels.append(batch[1])
        if len(batch_data) >= sequence_length:
            data.append(np.stack(batch_data[-sequence_length:]))
            labels.append(batch_labels[-1])
    return np.array(data), np.array(labels)

to reduce memory usage:

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense
from tensorflow.keras import mixed_precision

# Enable mixed precision (float16)
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Assuming temporal sequences of 5 images per patient
SEQUENCE_LENGTH = 5
IMG_SIZE = (64, 64)  # Reduced image size (height, width)
train_data, train_labels = prepare_lstm_data(train_generator, SEQUENCE_LENGTH)
val_data, val_labels = prepare_lstm_data(validation_generator, SEQUENCE_LENGTH)

# Define the model
lstm_model = Sequential([
    TimeDistributed(Conv2D(16, (3, 3), activation='relu'), input_shape=(SEQUENCE_LENGTH, IMG_SIZE[0], IMG_SIZE[1], 3)),  # Fewer filters
    TimeDistributed(MaxPooling2D(pool_size=(2, 2))),
    TimeDistributed(Flatten()),
    LSTM(64, activation='tanh'),  # Reduced LSTM units
    Dense(1, activation='sigmoid')  # For binary classification
])

lstm_model 

# Compile LSTM
lstm_model .compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train LSTM
lstm_history = lstm_model.fit(
    train_data, train_labels,
    epochs=EPOCHS,
    validation_data=(val_data, val_labels)
)

In [31]:
# ------------------ Autoencoder ------------------
# Build autoencoder
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D

IMG_SIZE = (128, 128)  # Example image size (height, width)

# Define the autoencoder
autoencoder_input = Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
encoded = Conv2D(32, (3, 3), activation='relu', padding='same')(autoencoder_input)
encoded = MaxPooling2D((2, 2), padding='same')(encoded)

# Decoding
decoded = Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
decoded = UpSampling2D((2, 2))(decoded)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(decoded)

# Create the autoencoder model
autoencoder = Model(inputs=autoencoder_input, outputs=decoded)

In [32]:
# Compile autoencoder
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

In [33]:
# Model summary
autoencoder.summary()

# Convert a small number of batches to arrays (if needed)
train_data = np.array([batch for batch in train_generator])
val_data = np.array([batch for batch in val_generator])
# Clear memory after processing each batch
gc.collect()

# Ensure the input and target are the same for autoencoder training
autoencoder.fit(train_data, train_data,  # Using input images as both input and target
                validation_data=(val_data, val_data),  # Validation images as both input and target
                epochs=10,
                batch_size=32)

# Use encoder for feature extraction
encoder = Model(autoencoder_input, encoded)
encoded_features = encoder.predict(train_generator)

In [39]:
# ------------------ Evaluation ------------------
def evaluate_model(model, generator):
    generator.reset()
    y_pred = (model.predict(generator) > 0.5).astype("int32")
    y_true = generator.classes
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=generator.class_indices.keys()))
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

In [40]:
# Evaluate all models
evaluate_model(ann_model, validation_generator)
evaluate_model(cnn_model, validation_generator)
evaluate_model(vgg_model, validation_generator)
#evaluate_model(lstm_model, validation_generator)  # Evaluate LSTM model
#evaluate_model(autoencoder_model, validation_generator)  # Evaluate AutoEncoder model

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step

Classification Report:
              precision    recall  f1-score   support

      NORMAL       1.00      1.00      1.00         1
   PNEUMONIA       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2


Confusion Matrix:
[[1 0]
 [0 1]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 736ms/step

Classification Report:
              precision    recall  f1-score   support

      NORMAL       1.00      1.00      1.00         1
   PNEUMONIA       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2


Confusion Matrix:
[[1 0]
 [0 1]]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step

Classification R