<a href="https://colab.research.google.com/github/vinodr2/LungDisorderDetect/blob/main/Lung_Disorder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**LUNG DISORDER DETECTION SYSTEM**

Before downloading the dataset Go to runtime in colab Notebook and change runtime type to T4 GPU

**IMPORT DATASET FROM KAGGLE**

1.Import Library: import kagglehub loads the kagglehub library used to download datasets directly from Kaggle.

2.Download Dataset: kagglehub.dataset_download("preetviradiya/covid19-radiography-dataset") fetches the COVID-19 Radiography Dataset from Kaggle and saves it locally.

3.Show Path: print("Path to dataset files:", path) displays the local path where the dataset was downloaded.


To install kagglehub (in built in colab)

!pip install kagglehub

In [None]:
import kagglehub

# Download COVID-19 Radiography dataset from Kaggle using kagglehub
path = kagglehub.dataset_download("preetviradiya/covid19-radiography-dataset")

# Print local download path for verification
print("Path to dataset files:", path)

**MOVE DOWNLOADED PATH TO NOTEBOOK WORKING DIRECTORY**

In [None]:
import shutil
import os

# Source path where KaggleHub saved the dataset
source_path = "/root/.cache/kagglehub/datasets/preetviradiya/covid19-radiography-dataset/versions/2"

# Destination path in Colab
destination_path = "/content/covid_dataset"

# Create the destination directory if it doesn't exist
os.makedirs(destination_path, exist_ok=True)

# Copy all contents from source to destination
shutil.copytree(source_path, destination_path, dirs_exist_ok=True)

print("Dataset moved to:", destination_path)


**DATASET PROPERTIES AND SELECTION OF IMAGES**

1.After Downloading and Moving the dataset to working directory number of categories and number of images located in each categories can be viewed

2.Also slection of images for training and testing can be done here based on system requirements this can be adjusted

In [None]:
import os
import shutil

# Input dataset path
SOURCE_DIR = "/content/covid_dataset/COVID-19_Radiography_Dataset/COVID-19_Radiography_Dataset"

# Output dataset path
DEST_DIR = "/content/selected_dataset"
os.makedirs(DEST_DIR, exist_ok=True)

# Get category folders
categories = [cat for cat in os.listdir(SOURCE_DIR) if os.path.isdir(os.path.join(SOURCE_DIR, cat))]

print(" Available Categories:\n")
for i, category in enumerate(categories):
    print(f"{i+1}. {category}")

# User selection
selected_counts = {}
total_selected = 0

print("\n Enter number of images to copy from each category:")

for category in categories:
    source_path = os.path.join(SOURCE_DIR, category)
    all_images = [f for f in os.listdir(source_path) if f.lower().endswith(('.jpg', '.png', '.jpeg'))]
    max_images = len(all_images)

    # Get user input
    while True:
        try:
            count = int(input(f"➡ {category} (max {max_images}): "))
            if 0 <= count <= max_images:
                selected_counts[category] = count
                total_selected += count
                break
            else:
                print(f" Please enter between 0 and {max_images}")
        except:
            print(" Invalid input, try again.")

# Copy images
print("\n Copying selected images...")
for category, count in selected_counts.items():
    src_folder = os.path.join(SOURCE_DIR, category)
    dst_folder = os.path.join(DEST_DIR, category)
    os.makedirs(dst_folder, exist_ok=True)

    images = [f for f in os.listdir(src_folder) if f.lower().endswith(('.jpg', '.png', '.jpeg'))][:count]

    for img in images:
        shutil.copy(os.path.join(src_folder, img), os.path.join(dst_folder, img))

print("\n All selected images copied to:", DEST_DIR)

# Summary
print("\n📝 Summary:")
for cat, count in selected_counts.items():
    print(f" {cat:<20} ➤ {count} images copied")

print(f"\n Total images copied: {total_selected}")


**IMAGE PREPROCESSING**

libraries Needed (N/A for Colab) (**pip install numpy opencv-python matplotlib**)

1.Load images from 4 folders: COVID, Lung_Opacity, Normal, Viral Pneumonia.

2.Convert to grayscale, enhance contrast (CLAHE), remove noise (median blur), and resize to 150×150.

3.Normalize pixel values to 0–1 scale for better training.

4.Assign labels (0, 1, 2, 3) based on folder order and pair each image with its label.

Save processed data into x.pickle and y.pickle for training.

In [None]:
import os
import cv2
import random
import pickle
import numpy as np
import matplotlib.pyplot as plt

# Dataset path
DATA_DIR = "/content/selected_dataset"
CATEGORIES = ['COVID', 'Lung_Opacity', 'Normal', 'Viral Pneumonia']


# Image size
IMG_SIZE = 150

# Data list
data = []

def preprocess_image(img_path):
    img = cv2.imread(img_path)
    if img is None:
        return None, None

    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # CLAHE - Contrast enhancement
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    clahe_img = clahe.apply(gray)

    # Median blur - noise reduction
    median = cv2.medianBlur(clahe_img, 3)

    # Resize
    resized = cv2.resize(median, (IMG_SIZE, IMG_SIZE))

    # Normalize
    normalized = resized / 255.0

    return img, normalized

# Load and preprocess
for label, category in enumerate(CATEGORIES):
    path = os.path.join(DATA_DIR, category)
    for img_file in os.listdir(path):
        img_path = os.path.join(path, img_file)
        original, processed = preprocess_image(img_path)
        if processed is not None:
            data.append((processed, label))

# Shuffle
random.shuffle(data)

# Separate features and labels
X, y = zip(*data)
X = np.array(X).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
y = np.array(y)

# Save
with open("x.pickle", "wb") as f:
    pickle.dump(X, f)

with open("y.pickle", "wb") as f:
    pickle.dump(y, f)

print("✅ Saved enhanced x.pickle and y.pickle")


**SAMPLE IMAGE PREVIEW**

The Code below is used to preview the preprocessed image.This code will randomly select images form the preprocessed data


In [None]:
# Show a sample comparison: original vs preprocessed
sample_index = random.randint(0, len(data)-1)
category = CATEGORIES[y[sample_index]]
original_sample_path = os.path.join(DATA_DIR, category, os.listdir(os.path.join(DATA_DIR, category))[0])
original_img = cv2.imread(original_sample_path)
original_img = cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB)

# Resize original image to match preprocessing size
original_img_resized = cv2.resize(original_img, (IMG_SIZE, IMG_SIZE))

_, preprocessed_img = preprocess_image(original_sample_path)

plt.figure(figsize=(5,4))
plt.subplot(1,2,1)
plt.imshow(original_img_resized)
plt.title("Original Image (Resized)")
plt.axis("off")

plt.subplot(1,2,2)
plt.imshow(preprocessed_img, cmap='gray')
plt.title("Preprocessed Image")
plt.axis("off")

plt.show()

**CNN RESNET 50 TRAINING**
To install Libraries(**pip install numpy matplotlib seaborn opencv-python scikit-learn tensorflow**) N/A for Colab

1.Loads grayscale images and labels, converts images to 3-channel for ResNet50.

2.Applies ResNet50-specific preprocessing for proper input scaling.

3.Splits data into training and testing sets with balanced labels.

4.Uses data augmentation (rotate, zoom, shift, flip) to reduce overfitting.

5.Builds and trains a model using frozen ResNet50 + dense layers, then evaluates and saves it.

6.Key Hyperparameters
Epochs: 30 — number of training cycles

Batch size: 32 — samples per update

Learning rate: 0.0001 — optimizer step size

Dropout: 0.5 — prevents overfitting

Augmentation: rotation ±15°, zoom ±10%, shift ±10%, flip — increases data diversity

In [None]:
import os
import pickle
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import ResNet50, resnet50
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# === Constants ===
IMG_SIZE = 150
CATEGORIES = ['COVID', 'Lung_Opacity', 'Normal', 'Viral Pneumonia']

# Save category order for testing
with open("categories.json", "w") as f:
    json.dump(CATEGORIES, f)

# === Load Data ===
with open("x.pickle", "rb") as f:
    X = pickle.load(f)  # shape (N, 150, 150, 1)
with open("y.pickle", "rb") as f:
    y = pickle.load(f)

# Convert grayscale to 3-channel
X_rgb = np.repeat(X, 3, axis=3)

# Convert to 0–255 uint8 and preprocess for ResNet50
X_rgb_uint8 = (X_rgb * 255).astype(np.uint8)
X_preprocessed = resnet50.preprocess_input(X_rgb_uint8)

# Prepare labels
num_classes = len(CATEGORIES)
y_cat = to_categorical(y, num_classes)

# === Train-Test Split ===
X_train, X_test, y_train, y_test = train_test_split(
    X_preprocessed, y_cat, test_size=0.2, random_state=42, stratify=y)

# === Data Augmentation to Reduce Overfitting ===
datagen = ImageDataGenerator(
    rotation_range=15,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True
)
datagen.fit(X_train)

# === Load ResNet50 Base ===
resnet_base = ResNet50(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
resnet_base.trainable = False  # Freeze all layers

# Optionally: unfreeze last 20 layers
# for layer in resnet_base.layers[-20:]:
#     layer.trainable = True

# === Build Model ===
model = Sequential([
    resnet_base,
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# === Compile ===
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# === Train ===
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=32),
    validation_data=(X_test, y_test),
    epochs=30
)

# === Plot Accuracy and Loss ===
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Acc')
plt.plot(history.history['val_accuracy'], label='Val Acc')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# === Evaluate ===
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=CATEGORIES,
            yticklabels=CATEGORIES)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# Classification Report
print("\nClassification Report:")
print(classification_report(y_true, y_pred_classes, target_names=CATEGORIES, digits=4))

# === Save Model and Category List ===
model.save("resnet50_150_model.h5")
print("✅ Model saved as resnet50_150_model.h5")


**MODEL SUMMARY**

1.ResNet50 base:

Outputs feature maps of shape (5, 5, 2048)

Has ~23.6 million parameters, all frozen (non-trainable)

2.Flatten layer:

Flattens the output into a 1D vector of size 51,200

No trainable parameters here

3.Dense layer (256 units):

Fully connected layer with 256 neurons

Learns ~13.1 million parameters (trainable)

4.Dropout layer (rate 0.5):

Randomly drops 50% of neurons during training to reduce overfitting

No trainable parameters

5.Output Dense layer (4 units):

Final classification layer with 4 neurons (one per class)

Learns 1,028 parameters

6.Total parameters: ~36.7 million

Trainable: ~13.1 million (Dense layers only)

Non-trainable: ~23.6 million (ResNet50 frozen layers)

In [None]:
model.summary()

**RESNET TEST CODE**

1.Preprocess input image exactly like training (grayscale → CLAHE → blur → resize → convert to 3-channel) and prepare it for ResNet50.

2.Load saved model and predict class probabilities for the preprocessed image.

3.Display original image, prediction, confidence scores, and predicted classes for clear interpretation.

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.resnet50 import preprocess_input

# === Constants ===
IMG_SIZE = 150
MODEL_PATH = "/content/resnet50_150_model.h5"  # Change if needed
CATEGORIES = ['COVID', 'Lung_Opacity', 'Normal', 'Viral_Pneumonia']

# === Preprocessing Function (MUST match training) ===
def preprocess_image(img_path):
    img = cv2.imread(img_path)
    if img is None:
        print(f"❌ Error loading image: {img_path}")
        return None, None

    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # CLAHE - enhance contrast
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    clahe_img = clahe.apply(gray)

    # Median blur
    median = cv2.medianBlur(clahe_img, 3)

    # Resize to match model input
    resized = cv2.resize(median, (IMG_SIZE, IMG_SIZE))

    # Convert grayscale back to 3-channel BGR
    img_3ch = cv2.cvtColor(resized, cv2.COLOR_GRAY2BGR)

    return img, img_3ch

# === Input image path ===
img_path = input("Enter path to test chest X-ray image: ").strip()

# === Load and preprocess the image ===
original_img, img_for_model = preprocess_image(img_path)
if img_for_model is None:
    exit("⚠️ Preprocessing failed.")

# === Convert to uint8 and preprocess for ResNet50 ===
img_uint8 = img_for_model.astype(np.uint8)  # already in 0–255
processed_input = preprocess_input(img_uint8)
input_img = processed_input.reshape(1, IMG_SIZE, IMG_SIZE, 3)

# === Load trained model ===
model = load_model(MODEL_PATH)

# === Predict ===
prediction = model.predict(input_img)[0]
predicted_class = CATEGORIES[np.argmax(prediction)]

# === Display original and prediction ===
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.imshow(cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB))
plt.title("Original Image")
plt.axis("off")

plt.subplot(1, 2, 2)
plt.imshow(cv2.cvtColor(img_for_model, cv2.COLOR_BGR2RGB))
plt.title(f"Predicted: {predicted_class}")
plt.axis("off")

plt.tight_layout()
plt.show()

# === Show Prediction Scores ===
print("\n Prediction Scores:")
for i, category in enumerate(CATEGORIES):
    print(f"{category}: {prediction[i]:.4f}")

# === Show Top 3 Predictions ===
top_3_indices = prediction.argsort()[-3:][::-1]
print("\n Top 3 Predictions:")
for idx in top_3_indices:
    print(f"{CATEGORIES[idx]}: {prediction[idx]:.4f}")
