In [26]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout



In [39]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

image_height, image_width = 224, 224

def preprocess_images(data_dir):
    images = []
    labels = []

    for label in os.listdir(data_dir):
        if label == '.DS_Store':
            continue  
        label_path = os.path.join(data_dir, label)
        for image_file in os.listdir(label_path):
            if image_file == '.DS_Store':
                continue  
            image_path = os.path.join(label_path, image_file)
            image = cv2.imread(image_path)
            image = cv2.resize(image, (image_height, image_width))
            image = image.astype('float32') / 255.0  
            images.append(image)
            labels.append(label)  

    return images, labels

import os

notebook_dir = os.path.dirname(os.path.abspath('__file__'))

data_dir = os.path.join(notebook_dir, '..', 'data', 'lung_xray_dataset')
images, labels = preprocess_images(data_dir)

label_to_index = {'healthy': 0, 'cancer': 1}
labels = [label_to_index[label] for label in labels]

train_images, val_images, train_labels, val_labels = train_test_split(
    images, labels, test_size=0.2, random_state=42
)


In [40]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(image_height, image_width, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  
])


In [41]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [12]:
batch_size = 32

In [42]:
from keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,  
    rotation_range=20,  
    width_shift_range=0.2,  
    height_shift_range=0.2,  
    shear_range=0.2,  
    zoom_range=0.2,  
    horizontal_flip=True,  
    fill_mode='nearest'  
)

image_height = 224
image_width = 224
batch_size = 32
train_data_dir = '/Users/my/Desktop/mdp/data/lung_xray_dataset/train'

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(image_height, image_width),
    batch_size=batch_size,
    class_mode='binary'
)

total_train = train_generator.n



Found 561 images belonging to 2 classes.


In [43]:
val_datagen = ImageDataGenerator(rescale=1.0 / 255)
val_data_dir = '/Users/my/Desktop/mdp/data/lung_xray_dataset/val'


val_generator = val_datagen.flow_from_directory(
    val_data_dir,
    target_size=(image_height, image_width),  
    batch_size=batch_size,
    class_mode='binary'
)

total_val = val_generator.n


Found 48 images belonging to 2 classes.


In [44]:
epochs = 10

model.fit(
    train_generator,
    steps_per_epoch=total_train // batch_size,
    epochs=epochs,
    validation_data=val_generator,
    validation_steps=total_val // batch_size
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x13f0d5110>

In [46]:
loss, accuracy = model.evaluate(val_generator)
print("Validation Accuracy: {:.2f}%".format(accuracy * 100))


Validation Accuracy: 45.83%


In [52]:
image_path = '/Users/my/Desktop/mdp/data/lung_xray_dataset/train/healthy/Malignant case (549).jpg'
img = tf.keras.preprocessing.image.load_img(image_path, target_size=(image_height, image_width))
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) 
img_array /= 255.0

prediction = model.predict(img_array)
if prediction >= 0.5:
    print("Prediction: Lung Cancer Detected")
else:
    print("Prediction: No Lung Cancer Detected")



Prediction: No Lung Cancer Detected
