In [None]:
# Core
import os
import cv2
import numpy as np
import pandas as pd
import collections
import time

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Deep Learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping ,ReduceLROnPlateau, EarlyStopping

from tensorflow.keras.applications import VGG16,ResNet50


# Evaluation
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

import warnings
warnings.filterwarnings('ignore')
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [None]:
tf.config.list_physical_devices()

In [None]:
# import kaggle
# kaggle.api.authenticate()
# kaggle.api.dataset_download_files('pranavraikokte/covid19-image-dataset',unzip=True)

In [None]:
# Load Training Data (80%)
train_ds = keras.utils.image_dataset_from_directory(
    directory='./Covid19-dataset/train',
    labels='inferred',
    label_mode='int',
    validation_split=0.2,
    subset='training',
    batch_size=32,
    image_size=(128, 128),
    seed=7
)

# Load Validation Data (20%)
val_ds = keras.utils.image_dataset_from_directory(
    directory='./Covid19-dataset/train',
    labels='inferred',
    label_mode='int',
    validation_split=0.2,
    subset='validation',
    batch_size=32,
    image_size=(128, 128),
    seed=7
)

# Load Test Data (separate folder)
test_ds = keras.utils.image_dataset_from_directory(
    directory='./Covid19-dataset/test',
    labels='inferred',
    label_mode='int',
    batch_size=32,
    image_size=(128, 128),
    shuffle=False
)

In [None]:
class_names = train_ds.class_names
print("Classes:", class_names)

In [None]:
for images, labels in train_ds.take(1):
    print(images.shape)
    print(labels.shape)


In [None]:
# Sample data
plt.figure(figsize=(8,6))
for images, labels in train_ds.take(1):
    for i in range(6):
        plt.subplot(2,3,i+1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(class_names[labels[i]])
        plt.axis("off")
plt.show()

In [None]:
# Class Distribution
label_count = collections.Counter()

for _, labels in train_ds:
    label_count.update(labels.numpy())

sns.barplot(x=list(label_count.keys()),
            y=list(label_count.values()))
plt.xticks(range(len(class_names)), class_names)
plt.title("Class Distribution")
plt.show()

In [None]:
print("Training samples per class:")
for label, count in label_count.items():
    print(f"{class_names[label]}: {count}")

---

### üîç EDA Observations

* The dataset shows class imbalance across categories.
* COVID-19 and Pneumonia chest X-ray images share visual similarities, making classification challenging.
* Normal chest X-ray images generally exhibit clearer lung structures with fewer dense regions.

---

In [None]:
# Normalizing the images

In [None]:
normalization_layer = keras.layers.Rescaling(1./255)
train_ds = train_ds.map(
    lambda x, y: (normalization_layer(x), y)
)
test_ds = test_ds.map(
    lambda x, y: (normalization_layer(x), y)
)
val_ds = val_ds.map(
    lambda x, y: (normalization_layer(x), y)
)

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.1),
    layers.RandomZoom(0.1),
    layers.RandomContrast(0.1),
])

In [18]:
def evaluate_model(model, test_ds, class_names):
    loss, accuracy = model.evaluate(test_ds, verbose=0)
    print(f"Test Loss: {loss:.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")

    y_true = []
    y_pred_probs = []

    for images, labels in test_ds:
        probs = model.predict(images, verbose=0)
        y_pred_probs.extend(probs)
        y_true.extend(labels.numpy())

    y_true = np.array(y_true)
    y_pred_probs = np.array(y_pred_probs)
    y_pred_classes = np.argmax(y_pred_probs, axis=1)

    roc_score = roc_auc_score(y_true, y_pred_probs, multi_class='ovr')
    print(f"\nROC-AUC Score: {roc_score:.4f}")

    print("\n--- Classification Report ---")
    print(classification_report(y_true, y_pred_classes, target_names=class_names))

    plt.figure(figsize=(8, 6))
    cm = confusion_matrix(y_true, y_pred_classes)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=class_names,
                yticklabels=class_names)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.title('Confusion Matrix')
    plt.show()

In [None]:
def plot_history(h):
    fig, ax = plt.subplots(1, 2, figsize=(10,4))

    ax[0].plot(h.history['accuracy'], label='Train')
    ax[0].plot(h.history.get('val_accuracy', []), label='Val')
    ax[0].set_title('Accuracy')
    ax[0].legend()

    ax[1].plot(h.history['loss'], label='Train')
    ax[1].plot(h.history.get('val_loss', []), label='Val')
    ax[1].set_title('Loss')
    ax[1].legend()

    plt.tight_layout()
    plt.show()

# Basic CNN

In [None]:
model_basic = keras.Sequential([
    layers.Input(shape=(128, 128, 3)),
    
    # Conv Block 1
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    
    # Conv Block 2
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    
    layers.GlobalAveragePooling2D(),
    
    # Small Dense Layer
    layers.Dense(16, activation='relu'),
    
    # Output
    layers.Dense(3, activation='softmax')
])

model_basic.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model_basic.summary()

# 3. Train
history_basic = model_basic.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20, 
    callbacks=[
        EarlyStopping(patience=5, restore_best_weights=True)
    ]
)


evaluate_model(model_basic, test_ds, class_names)

In [None]:
plot_history(history_basic)

# Tuned CNN

In [None]:
model = keras.Sequential([
    # Input Layer (Explicit definition helps with debugging)
    layers.Input(shape=(128, 128, 3)),
    
    # Data Augmentation (Active only during training)
    data_augmentation,
    
    # Block 1
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),

    # Block 2
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),

    # Block 3
    layers.Conv2D(128, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),

    # Classification Head
    layers.GlobalAveragePooling2D(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(3, activation='softmax')
])

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.0001), 
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

In [None]:
start_time = time.time()

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.2,      
    patience=3,       
    min_lr=0.00001   
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True
)

# Train
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=500,
    callbacks=[early_stopping, reduce_lr] 
)

end_time = time.time()
total_time = end_time - start_time

print(f"Training time: {total_time:.2f} seconds")
print(f"Training time: {total_time/60:.2f} minutes")

evaluate_model(model, test_ds, class_names)

In [None]:
plot_history(history)

# PreTrained Model (VGG16)

In [None]:
base_model = VGG16(
    weights='imagenet',
    include_top=False, 
    input_shape=(128, 128, 3)
)
base_model.trainable = False # Freeze weights

model_vgg = models.Sequential([
    # Input Layer
    layers.Input(shape=(128, 128, 3)),
    data_augmentation,
    base_model,

    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(3, activation='softmax')
])

# 3. Compile
model_vgg.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), 
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# 4. Train
history_vgg = model_vgg.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,
    callbacks=[
        EarlyStopping(patience=5, restore_best_weights=True)
    ]
)

evaluate_model(model_vgg,test_ds,class_names)

In [None]:
plot_history(history_vgg)

# PreTrained Model (ResNet50)

In [None]:
base_model_resnet = ResNet50(
    weights='imagenet',
    include_top=False, 
    input_shape=(128, 128, 3)
)

base_model_resnet.trainable = False 

# 2. Build Model
model_resnet = keras.Sequential([
    layers.Input(shape=(128, 128, 3)),
    data_augmentation,
    base_model_resnet,
    
    layers.GlobalAveragePooling2D(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(3, activation='softmax')
])

# 3. Compile
model_resnet.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), 
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# 4. Train
history_resnet = model_resnet.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30,
    callbacks=[
        EarlyStopping(patience=5, restore_best_weights=True)
    ]
)

evaluate_model(model_resnet, test_ds, class_names)

In [None]:
plot_history(history_resnet)