In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')


In [None]:
import os
import zipfile
import pandas as pd
from PIL import Image

zip_file_path = '/content/drive/MyDrive/archive.zip'
extract_path = '/content/dataset'

# Extract the zip file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# List the names of folders present in the extracted directory
extracted_folders = [f for f in os.listdir(extract_path) if os.path.isdir(os.path.join(extract_path, f))]

# Display the names of the extracted folders
print("Extracted Folders:")
for folder in extracted_folders:
    print(folder)

In [None]:
import os
import pandas as pd

# Specify the path to the extracted folder
extracted_folder_path = '/content/dataset/chest_xray'

# Create DataFrames for each category (NORMAL and PNEUMONIA)
def create_dataframe_from_category(category_path, label):
    image_files = [f for f in os.listdir(category_path) if f.endswith('.jpeg') or f.endswith('.jpg') or f.endswith('.png')]
    image_paths = [os.path.join(category_path, f) for f in image_files]
    labels = [label] * len(image_paths)
    df = pd.DataFrame({'image_path': image_paths, 'label': labels})
    return df

# Create DataFrames for the train, test, and validation sets
train_normal_df = create_dataframe_from_category(os.path.join(extracted_folder_path, 'train', 'NORMAL'), label='NORMAL')
train_pneumonia_df = create_dataframe_from_category(os.path.join(extracted_folder_path, 'train', 'PNEUMONIA'), label='PNEUMONIA')
# Create DataFrames for the train, test, and validation sets
test_normal_df = create_dataframe_from_category(os.path.join(extracted_folder_path, 'test', 'NORMAL'), label='NORMAL')
test_pneumonia_df = create_dataframe_from_category(os.path.join(extracted_folder_path, 'test', 'PNEUMONIA'), label='PNEUMONIA')
valid_normal_df = create_dataframe_from_category(os.path.join(extracted_folder_path, 'val', 'NORMAL'), label='NORMAL')
valid_pneumonia_df = create_dataframe_from_category(os.path.join(extracted_folder_path, 'val', 'PNEUMONIA'), label='PNEUMONIA')


# Display the first few rows of each DataFrame
print("Train Normal DataFrame:")
print(train_normal_df.head())

print("\nTrain Pneumonia DataFrame:")
print(train_pneumonia_df.head())
# Display the first few rows of each DataFrame
print("Test Normal DataFrame:")
print(test_normal_df.head())

print("\nTest Pneumonia DataFrame:")
print(test_pneumonia_df.head())
print("Valid Normal DataFrame:")
print(valid_normal_df.head())

print("\nValid Pneumonia DataFrame:")
print(valid_pneumonia_df.head())



In [None]:
pip install matplotlib


In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.image import imread



# Function to display images
def display_images(data_frame, num_images=5):
    for i in range(num_images):
        image_path = data_frame.iloc[i]['image_path']
        label = data_frame.iloc[i]['label']
        image = imread(image_path)

        plt.figure(figsize=(8, 8))
        plt.imshow(image, cmap='gray')  # Assuming images are grayscale
        plt.title(f'Label: {label}')
        plt.axis('off')
        plt.show()

# Display images from the DataFrames
print("Displaying Train Normal Images:")
display_images(train_normal_df)

print("Displaying Train Pneumonia Images:")
display_images(train_pneumonia_df)




In [None]:
# Display images from the DataFrames
print("Displaying Test Normal Images:")
display_images(test_normal_df)

print("Displaying Test Pneumonia Images:")
display_images(test_pneumonia_df)

print("Displaying Valid Normal Images:")
display_images(valid_normal_df)

print("Displaying Valid Pneumonia Images:")
display_images(valid_pneumonia_df)

In [None]:
def preprocess_image(image_path, target_size=(224, 224)):
    image = imread(image_path)

    # Apply data augmentation
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    image = datagen.random_transform(image)

    # Resize image
    image = image_resize(image, target_size)

    # Normalize pixel values
    image = image.astype(np.float32) / 255.0

    return image

In [None]:
# Function to create DataFrames for each category with preprocessing
from tensorflow.keras.preprocessing.image import ImageDataGenerator
def create_preprocessed_dataframe_from_category(category_path, label, target_size=(224, 224)):
    image_files = [f for f in os.listdir(category_path) if f.endswith('.jpeg') or f.endswith('.jpg') or f.endswith('.png')]
    image_paths = [os.path.join(category_path, f) for f in image_files]

    preprocessed_images = []
    for image_path in image_paths:
        preprocessed_image = preprocess_image(image_path, target_size)
        preprocessed_images.append(preprocessed_image)

    labels = [label] * len(preprocessed_images)
    df = pd.DataFrame({'image_path': image_paths, 'label': labels})
    return df

In [None]:
import cv2
import numpy as np

In [None]:
def preprocess_image(image_path, target_size=(224, 224)):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, target_size, interpolation=cv2.INTER_CUBIC)
    image = image.astype(np.float32) / 255.0
    return image

# Create DataFrames for the train, test, and validation sets with preprocessing
train_normal_df = create_preprocessed_dataframe_from_category(os.path.join(extracted_folder_path, 'train', 'NORMAL'), label='NORMAL')
train_pneumonia_df = create_preprocessed_dataframe_from_category(os.path.join(extracted_folder_path, 'train', 'PNEUMONIA'), label='PNEUMONIA')
test_normal_df = create_preprocessed_dataframe_from_category(os.path.join(extracted_folder_path, 'test', 'NORMAL'), label='NORMAL')
test_pneumonia_df = create_preprocessed_dataframe_from_category(os.path.join(extracted_folder_path, 'test', 'PNEUMONIA'), label='PNEUMONIA')
valid_normal_df = create_preprocessed_dataframe_from_category(os.path.join(extracted_folder_path, 'val', 'NORMAL'), label='NORMAL')
valid_pneumonia_df = create_preprocessed_dataframe_from_category(os.path.join(extracted_folder_path, 'val', 'PNEUMONIA'), label='PNEUMONIA')



In [None]:
   # Function to display images
def display_images(data_frame, num_images=5):
    plt.figure(figsize=(12, 12))
    for i in range(num_images):
        image_path = data_frame.iloc[i]['image_path']
        label = data_frame.iloc[i]['label']
        image = imread(image_path)

        plt.subplot(1, num_images, i+1)
        plt.imshow(image)
        plt.title(label)
        plt.axis('off')
    plt.show()

# Display images from the DataFrames
print("Displaying Train Normal Images:")
display_images(train_normal_df)

print("Displaying Train Pneumonia Images:")
display_images(train_pneumonia_df)

print("Displaying Test Normal Images:")
display_images(test_normal_df)

print("Displaying Test Pneumonia Images:")
display_images(test_pneumonia_df)

print("Displaying Valid Normal Images:")
display_images(valid_normal_df)

print("Displaying Valid Pneumonia Images:")
display_images(valid_pneumonia_df)

In [None]:
from sklearn.decomposition import PCA
num_components = 100  # Choose the number of principal components
pca = PCA(n_components=num_components, random_state=42)
train_images_preprocessed = np.array([preprocess_image(image_path) for image_path in train_normal_df['image_path']])
valid_images_preprocessed = np.array([preprocess_image(image_path) for image_path in valid_normal_df['image_path']])
test_images_preprocessed = np.array([preprocess_image(image_path) for image_path in test_normal_df['image_path']])

train_images_flattened = train_images_preprocessed.reshape((len(train_images_preprocessed), -1))
valid_images_flattened = valid_images_preprocessed.reshape((len(valid_images_preprocessed), -1))
test_images_flattened = test_images_preprocessed.reshape((len(test_images_preprocessed), -1))
train_images_pca = pca.fit_transform(train_images_flattened)
valid_images_pca = pca.transform(valid_images_flattened)
test_images_pca = pca.transform(test_images_flattened)

# Display the explained variance ratio
print("Explained variance ratio:", sum(pca.explained_variance_ratio_))

In [None]:
from sklearn.preprocessing import LabelEncoder

# Convert labels to numeric format (0 or 1)
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_normal_df['label'])
valid_labels = label_encoder.transform(valid_normal_df['label'])
test_labels = label_encoder.transform(test_normal_df['label'])

# ConvModel

In [None]:
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.regularizers import l2
from sklearn.preprocessing import LabelEncoder
from keras.callbacks import EarlyStopping as early_stopping

# Combine train and validation data
X_train_all = np.concatenate((train_images_preprocessed, valid_images_preprocessed), axis=0)
y_train_all = np.concatenate((train_labels, valid_labels), axis=0)

# Split into train and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X_train_all, y_train_all, test_size=0.2, random_state=42)
learning_rate = 0.001
batch_size = 32

# Build the model
model = Sequential([
    Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(256, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.5),
    Dense(256, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define early stopping
early_stopping = EarlyStopping(patience=3, restore_best_weights=True)

# Train the model
history = model.fit(
    datagen.flow(X_train, y_train, batch_size=batch_size),
    steps_per_epoch=len(X_train) // batch_size,
    epochs=10,
    validation_data=datagen.flow(X_valid, y_valid, batch_size=batch_size),  # Use data generator for validation
    validation_steps=len(X_valid) // batch_size,
    callbacks=[early_stopping]
)

# Evaluate the model on test set
test_loss, test_acc = model.evaluate(datagen.flow(test_images_preprocessed, test_labels, batch_size=batch_size))
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)


# RNN MODEL

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, LSTM, Bidirectional, Reshape
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from keras.layers import Conv1D, MaxPooling1D


model = Sequential([
    Conv2D(64, (3, 3), activation='relu', padding='same', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(256, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    LSTM(64, kernel_regularizer=l2(0.001)),
    Dense(1, activation='sigmoid')
])
# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

history = model.fit(datagen.flow(X_train, y_train, batch_size=batch_size),
    steps_per_epoch=len(X_train) // batch_size,
    epochs=20,  # Increase the number of epochs
    validation_data=(X_valid, y_valid),
    callbacks=[EarlyStopping(patience=5), lr_scheduler])

# Evaluate the model on test set
test_images_reshaped = test_images_preprocessed.reshape(-1, timesteps, features)
test_loss, test_acc = model.evaluate(test_images_reshaped, test_labels)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)

# *VGG16*

In [None]:
import numpy as np
from keras.applications import VGG16
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split

# Combine train and validation data
X_train_all = np.concatenate((train_images_preprocessed, valid_images_preprocessed), axis=0)
y_train_all = np.concatenate((train_labels, valid_labels), axis=0)

# Split into train and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X_train_all, y_train_all, test_size=0.2, random_state=42)

# Load VGG16 pre-trained model
vgg16 = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze layers in VGG16
for layer in vgg16.layers:
    layer.trainable = False

# Create a new model
model = Sequential([
    vgg16,
    Flatten(),
    Dense(512, activation='relu', kernel_regularizer=l2(0.001)),  # L2 regularization
    Dropout(0.5),  # Dropout layer
    Dense(256, activation='relu', kernel_regularizer=l2(0.001)),  # L2 regularization
    Dropout(0.5),  # Dropout layer
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Define early stopping callback
early_stopping = EarlyStopping(patience=5, restore_best_weights=True)

# Define ReduceLROnPlateau callback
reduce_lr = ReduceLROnPlateau(factor=0.2, patience=3)

# Train the model
history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=20,
    validation_data=(X_valid, y_valid),
    callbacks=[early_stopping, reduce_lr]
)

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(test_images_preprocessed, test_labels)
print("Test Loss:", test_loss)
print("Test Accuracy:", test_acc)
