**Data Processing**

In [13]:
# import os
# from PIL import Image
# import numpy as np

# # Set parameters
# data_dir = './data/train/'  # Path to your train folder
# target_size = (224, 224)     # Target size for resizing (width, height)

# def process_images(folder_path):
#     # List all files in the given folder
#     for root, dirs, files in os.walk(folder_path):
#         for file in files:
#             if file.endswith(('.jpg')): 
#                 image_path = os.path.join(root, file)
                
#                 # Open and resize the image
#                 with Image.open(image_path) as img:
#                     img = img.resize(target_size)  # Resize image
                    
#                     # Normalize the image
#                     img_array = np.array(img) / 255.0  # Scale pixel values to [0, 1]

#                     # Optionally, save the processed image
#                     # Save path can be modified as needed
#                     processed_image_path = os.path.join(root, f'processed_{file}')
#                     Image.fromarray((img_array * 255).astype(np.uint8)).save(processed_image_path)

# # Process both Benign and Malignant images
# process_images(os.path.join(data_dir, '0')) 
# process_images(os.path.join(data_dir, '1'))  

# print("Image processing complete!")


In [14]:
# import os

# # Define paths to your benign (0) and malignant (1) image folders
# benign_dir = './data/train/0'
# malignant_dir = './data/train/1'

# # Count the number of images in each folder
# benign_count = len([file for file in os.listdir(benign_dir) if file.endswith(('.jpg', '.png', '.jpeg'))])
# malignant_count = len([file for file in os.listdir(malignant_dir) if file.endswith(('.jpg', '.png', '.jpeg'))])

# # Print the counts
# print(f'Number of benign images: {benign_count}')
# print(f'Number of malignant images: {malignant_count}')

In [15]:
# import os
# import cv2
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from imblearn.over_sampling import RandomOverSampler

# # Set the base directory where your images are stored
# base_dir = './data/train/'

# # Initialize lists to hold image paths and labels
# image_paths = []
# labels = []

# # Loop through benign and malignant folders
# for label in ['0', '1']:  # '0' for benign, '1' for malignant
#     folder_path = os.path.join(base_dir, label)
#     for file in os.listdir(folder_path):
#         if file.endswith(('.jpg', '.png', '.jpeg')):
#             image_paths.append(os.path.join(folder_path, file))  # Full image path
#             labels.append(int(label))  # Corresponding label

In [16]:
# # Convert lists to NumPy arrays for further processing
# image_paths = np.array(image_paths)
# labels = np.array(labels)

# # Combine image paths and labels into a DataFrame
# data = pd.DataFrame({'file_path': image_paths, 'label': labels})

# # Split the dataset into training and testing sets (80% train, 20% test)
# X_train, X_test, y_train, y_test = train_test_split(data['file_path'], data['label'], test_size=0.2, random_state=42)

# # Oversampling
# ros = RandomOverSampler(random_state=42)
# X_train_resampled, y_train_resampled = ros.fit_resample(X_train.values.reshape(-1, 1), y_train)


# # Convert resampled data back to arrays
# X_train_resampled = np.array([path[0] for path in X_train_resampled])


In [17]:
# import numpy as np
# from PIL import Image

# def load_image(image_path):
#     img = Image.open(image_path)  # Load image
#     img_array = np.array(img) / 255.0  # Normalize pixel values
#     return img_array

# def load_images_in_batches(image_paths, batch_size=32):
#     for i in range(0, len(image_paths), batch_size):
#         batch_paths = image_paths[i:i + batch_size]
#         batch_images = [load_image(path) for path in batch_paths]
#         yield np.array(batch_images)  # Return the batch as a NumPy array

In [18]:
# import tensorflow as tf
# from tensorflow.keras import layers, models

# def create_cnn_model():
#     model = models.Sequential()
#     model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
#     model.add(layers.MaxPooling2D((2, 2)))
#     model.add(layers.Conv2D(64, (3, 3), activation='relu'))
#     model.add(layers.MaxPooling2D((2, 2)))
#     model.add(layers.Conv2D(128, (3, 3), activation='relu'))
#     model.add(layers.MaxPooling2D((2, 2)))
#     model.add(layers.Flatten())
#     model.add(layers.Dense(128, activation='relu'))
#     model.add(layers.Dense(1, activation='sigmoid'))  # Change to 'softmax' for multiple classes
#     return model


In [19]:
# model = create_cnn_model()
# model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [20]:
# # Evaluate the model on the test dataset
# test_loss, test_accuracy = model.evaluate(X_test_processed, y_test, verbose=1)

# print(f'Test Loss: {test_loss:.4f}')
# print(f'Test Accuracy: {test_accuracy:.4f}')


In [1]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler
import tensorflow as tf
from tensorflow.keras import layers, models

# Set parameters
data_dir = './data/train/'  # Path to your train folder
target_size = (224, 224)     # Target size for resizing (width, height)

# Function to process images
def process_images(folder_path):
    processed_images = []  # To hold paths of processed images
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.endswith(('.jpg')):
                image_path = os.path.join(root, file)
                try:
                    with Image.open(image_path) as img:
                        img = img.resize(target_size)  # Resize image
                        img_array = np.array(img) / 255.0  # Normalize pixel values
                        processed_image_path = os.path.join(root, f'processed_{file}')
                        Image.fromarray((img_array * 255).astype(np.uint8)).save(processed_image_path)
                        processed_images.append(processed_image_path)  # Add processed image path
                except Exception as e:
                    print(f"Error processing image {image_path}: {e}")
    return processed_images  # Return processed image paths


In [2]:
# Process both Benign and Malignant images
benign_images = process_images(os.path.join(data_dir, '0')) 
malignant_images = process_images(os.path.join(data_dir, '1'))

In [3]:
# Initialize lists to hold image paths and labels
image_paths = benign_images + malignant_images
labels = [0] * len(benign_images) + [1] * len(malignant_images)  # 0 for benign, 1 for malignant

# Convert lists to NumPy arrays
image_paths = np.array(image_paths)
labels = np.array(labels)


In [4]:
# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(image_paths, labels, test_size=0.2, random_state=42)

# Oversampling
ros = RandomOverSampler(random_state=42)
X_train_resampled, y_train_resampled = ros.fit_resample(X_train.reshape(-1, 1), y_train)

# Convert resampled data back to arrays
X_train_resampled = np.array([path[0] for path in X_train_resampled])

In [5]:
# Function to load and preprocess images
def load_image(image_path):
    try:
        img = Image.open(image_path)  # Load image
        img = img.resize(target_size)  # Ensure image is resized
        img_array = np.array(img) / 255.0  # Normalize pixel values
        return img_array
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")
        return None

# Function to load images in batches
def load_images_in_batches(image_paths, batch_size=32):
    for i in range(0, len(image_paths), batch_size):
        batch_paths = image_paths[i:i + batch_size]
        batch_images = []
        for path in batch_paths:
            img = load_image(path)
            if img is not None:  # Ensure that the image is loaded successfully
                batch_images.append(img)
        yield np.array(batch_images)  # Return the batch as a NumPy array

In [6]:
# Create CNN model
def create_cnn_model():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))  # Binary classification
    return model

# Create and compile model
model = create_cnn_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:

# Training with batches
epochs = 2  # Adjust as needed
batch_size = 128

for epoch in range(epochs):
    for X_train_batch in load_images_in_batches(X_train_resampled, batch_size):
        # Create corresponding labels for the current batch
        y_train_batch = y_train_resampled[len(X_train_batch) * epoch: len(X_train_batch) * (epoch + 1)]
        
        # Train your model on the current batch
        model.fit(X_train_batch, y_train_batch, epochs=1, verbose=1)  # Adjust epochs as needed

# Evaluate the model on the test dataset
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.5688 - loss: 0.9755
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.6708 - loss: 0.6469
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.7365 - loss: 0.6140
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.7208 - loss: 0.6146
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.7135 - loss: 0.6073
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.7292 - loss: 0.5934
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.7563 - loss: 0.5593
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - accuracy: 0.6833 - loss: 0.6381
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1s/step - accuracy: 0.6979 - loss: 0.6093
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s

ValueError: Invalid dtype: str4608

In [9]:
def load_test_images(test_paths):
    return np.array([load_image(path) for path in test_paths if load_image(path) is not None])

X_test_processed = load_test_images(X_test)  # Load images from paths in X_test


In [10]:
y_test = np.array(y_test).astype(np.float32)  # Convert to a float array if necessary


In [11]:
print(X_test_processed.shape)  # Should be (number_of_samples, 224, 224, 3)
print(y_test.shape)  # Should be (number_of_samples,)


(2940, 224, 224, 3)
(2940,)


In [12]:
# Load and preprocess test images
X_test_processed = load_test_images(X_test)  # Ensure this function is defined as above

# Ensure y_test is a NumPy array and has the correct dtype
y_test = np.array(y_test).astype(np.float32)

# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test_processed, y_test, verbose=1)

print(f'Test Loss: {test_loss:.4f}')
print(f'Test Accuracy: {test_accuracy:.4f}')


[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 174ms/step - accuracy: 0.7131 - loss: 0.6016
Test Loss: 0.5856
Test Accuracy: 0.7282


In [13]:
import numpy as np
from PIL import Image

def predict_tumor(image_path, model):
    # Load the image
    img = Image.open(image_path)
    
    # Ensure the image is in the correct size and format
    img = img.resize((224, 224))  # Resize if necessary for your model
    img_array = np.array(img) / 255.0  # Normalize pixel values
    
    # Expand dimensions to match the model input shape
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    
    # Make prediction
    prediction = model.predict(img_array)
    
    # Interpret the prediction
    return "Malignant" if prediction[0] > 0.5 else "Benign"


In [15]:
# Example usage
image_path = './data/test/25_1723979573_png.rf.cb7b0577d1de86f18474daa325bdd714.jpg'  # Replace with the path to your test image
result = predict_tumor(image_path, model)
print(f'The tumor is: {result}')


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 178ms/step
The tumor is: Benign
