In [None]:
# Import Liberies

In [1]:
import os, glob
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import Callback, EarlyStopping
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import BatchNormalization




2024-03-12 22:03:13.576514: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
# Data Preparation

In [None]:
# Define the path to the training data and testing data
file_path = "train"
file_path1 = "test"

In [None]:
# Obtain the class names from the training and testing directory
name_class = os.listdir(file_path)
name_class1 = os.listdir(file_path1)

In [None]:
# Create a list of file paths for all images in the training and testing directory
file_paths = list(glob.glob(file_path+'/**/*.*'))
file_paths1 = list(glob.glob(file_path1+'/**/*.*'))

In [None]:
# Extract labels from file paths using directory structure
labels = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], file_paths))
labels1 = list(map(lambda x: os.path.split(os.path.split(x)[0])[1], file_paths1))

In [None]:
# Create Pandas Series for file paths and labels
# Combine file paths and labels into a Pandas dataframe and shuffle the data
filepath = pd.Series(file_paths, name='Filepath').astype(str)
labels = pd.Series(labels, name='Label')
data = pd.concat([filepath,labels], axis=1)
data = data.sample(frac=1).reset_index(drop=True)
data.head(5)

In [None]:
# Create Pandas Series for file paths and labels
# Combine file paths and labels into a Pandas dataframe and shuffle the data
filepath1 = pd.Series(file_paths1, name='Filepath').astype(str)
labels1 = pd.Series(labels1, name='Label')
data1 = pd.concat([filepath1,labels1], axis=1)
data1 = data1.sample(frac=1).reset_index(drop=True)
data1.head(5)

In [None]:
# Visualize the distribution of classes and display a grid of training images
counts = data.Label.value_counts()
sns.barplot(x=counts.index, y=counts)
plt.xlabel('Type')
plt.xticks(rotation=90);

In [None]:
# Visualize the distribution of classes and display a grid of testing images
counts = data1.Label.value_counts()
sns.barplot(x=counts.index, y=counts)
plt.xlabel('Type')
plt.xticks(rotation=90);

In [None]:
# Visualize the distribution of classes and display a grid of training images
fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(10,8), subplot_kw={'xticks':[],'yticks':[]})
for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(data.Filepath[i]))
    ax.set_title(data.Label[i])
plt.tight_layout()
plt.show()

In [None]:
# Visualize the distribution of classes and display a grid of testing images
fig, axes = plt.subplots(nrows=5, ncols=3, figsize=(10,8), subplot_kw={'xticks':[],'yticks':[]})
for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(data1.Filepath[i]))
    ax.set_title(data1.Label[i])
plt.tight_layout()
plt.show()

In [None]:
# Data Augmentation

In [None]:
# Store the prepared training and testing datasets
train = data
test = data1
# Create instances of ImageDataGenerator for data augmentation and preprocessing
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [None]:
# Set up flow generators for training, validation, and testing data
train_gen = train_datagen.flow_from_dataframe(
    dataframe=train,
    x_col='Filepath',
    y_col='Label',
    target_size=(100,100),
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42   
)


valid_gen = train_datagen.flow_from_dataframe(
    dataframe=test,
    x_col='Filepath',
    y_col='Label',
    target_size=(100,100),
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42   
)


test_gen = train_datagen.flow_from_dataframe(
    dataframe=test,
    x_col='Filepath',
    y_col='Label',
    target_size=(100,100),
    class_mode='categorical',
    batch_size=32,
    shuffle=False 
)

In [None]:
# Model Definition

In [None]:
# Define your custom CNN model
inputs = Input(shape=(100, 100, 3))
x = Conv2D(128, (3, 3), activation='relu')(inputs)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(256, (3, 3), activation='relu')(x)
x = BatchNormalization()(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
outputs = Dense(2, activation='softmax')(x)

model = Model(inputs=inputs, outputs=outputs)

In [None]:
# Compile the model with Adam optimizer, categorical crossentropy loss, and accuracy metric
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [None]:
# Implement early stopping during training
my_callbacks = [EarlyStopping(monitor='val_accuracy',
                min_delta=0,
                patience=5,
                mode='auto')]

In [None]:
# Model Training

In [None]:

# Train the model on the validation data
history = model.fit(
    train_gen,
    validation_data=valid_gen,
    epochs=1,
    callbacks=my_callbacks
)

In [None]:
model.save('my_model.keras')

In [None]:
# Model Evaluation and Visualization

In [None]:
from tensorflow.keras.models import load_model
loaded_model_imageNet = load_model('my_model.keras')
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
import matplotlib.pyplot as plt

import cv2 
import numpy as np
from matplotlib.pyplot import imread
from matplotlib.pyplot import imshow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.imagenet_utils import decode_predictions
from tensorflow.keras.applications.imagenet_utils import preprocess_input

In [None]:
# Plot training and validation accuracy over epochs
pd.DataFrame(history.history)[['accuracy','val_accuracy']].plot()
plt.title('Accuracy')
plt.show()
# Plot training and validation loss over epochs
pd.DataFrame(history.history)[['loss','val_loss']].plot()
plt.title('Loss')
plt.show()

In [None]:
# Evaluate the model on the test set
results = model.evaluate(test_gen, verbose=0)
# Print test loss and accuracy
print(" Test Loss: {: .5f}".format(results[0]))
print(" Test Accuracy: {: .2f}%".format(results[1]*100))

In [None]:
# Make predictions on the test set and print a classification report
pred = model.predict(test_gen)
pred = np.argmax(pred, axis=1)

labels = (train_gen.class_indices)
labels = dict((v,k)for k,v in labels.items())
pred =[labels[k] for k in pred]

y_test = list(test.Label)
print(classification_report(y_test, pred))


In [None]:
# Visualize predictions on a grid of images from the test set
fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(12, 8),
                         subplot_kw={"xticks": [], "yticks": []})
for i, ax in enumerate(axes.flat):
    ax.imshow(plt.imread(test.Filepath.iloc[i]))
    ax.set_title(f"True: {test.Label.iloc[i]}\nPredicted: {pred[i]}")
plt.tight_layout()
plt.show()



In [None]:
# Prediction on a Single Image

In [None]:
# Example of predicting on a new image
img_path = "test1.jpg"
img = cv2.imread(img_path)
img = cv2.resize(img, (100,100))

x = np.expand_dims(img, axis=0)
x = preprocess_input(x)
result = loaded_model_imageNet.predict(x)
print((result*100).astype("int"))
plt.imshow(img)

In [None]:
p = list((result*100).astype('int'))
pp = list(p[0])
print(pp)
print("Largest element is:", max(pp))

In [None]:
index = pp.index(max(pp))
name_class=['benign','melignant']
name_class[index]

In [None]:
plt.title(name_class[index])
plt.imshow(img)