In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import shutil
import random

# Set seed for reproducibility
random.seed(42)

# Paths
original_dataset_dir = '/content/drive/MyDrive/Dataset'
base_dir = 'dataset'
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'validation')

# Create new directories
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Categories
categories = ['benign', 'malignant']

for category in categories:
    os.makedirs(os.path.join(train_dir, category), exist_ok=True)
    os.makedirs(os.path.join(val_dir, category), exist_ok=True)

    category_dir = os.path.join(original_dataset_dir, category)
    images = os.listdir(category_dir)
    random.shuffle(images)

    # Split the images into 80% training and 20% validation
    train_split = int(0.8 * len(images))
    train_images = images[:train_split]
    val_images = images[train_split:]

    for image in train_images:
        src = os.path.join(category_dir, image)
        dst = os.path.join(train_dir, category, image)
        shutil.copyfile(src, dst)

    for image in val_images:
        src = os.path.join(category_dir, image)
        dst = os.path.join(val_dir, category, image)
        shutil.copyfile(src, dst)

print("Dataset restructured successfully.")

Dataset restructured successfully.


Hybrid Model

In [None]:
!pip install vit-keras

Collecting vit-keras
  Downloading vit_keras-0.1.2-py3-none-any.whl (24 kB)
Collecting validators (from vit-keras)
  Downloading validators-0.31.0-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.5/42.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: validators, vit-keras
Successfully installed validators-0.31.0 vit-keras-0.1.2


In [None]:
!pip install tensorflow-addons
!pip install vit-keras

Collecting tensorflow-addons
  Downloading tensorflow_addons-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (611 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard<3.0.0,>=2.7 (from tensorflow-addons)
  Downloading typeguard-2.13.3-py3-none-any.whl (17 kB)
Installing collected packages: typeguard, tensorflow-addons
Successfully installed tensorflow-addons-0.23.0 typeguard-2.13.3


In [None]:
import tensorflow_addons as tfa
from vit_keras import vit

print("tensorflow-addons and vit-keras installed successfully.")


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 



tensorflow-addons and vit-keras installed successfully.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from vit_keras import vit
import tensorflow as tf
import tensorflow_addons as tfa

# Data augmentation and normalization for training
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

# Validation data should only be rescaled
val_datagen = ImageDataGenerator(rescale=1./255)

# Generators
train_generator = train_datagen.flow_from_directory(
    '/content/dataset/train',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    color_mode='rgb',  # Convert grayscale images to RGB
    shuffle=True
)

validation_generator = val_datagen.flow_from_directory(
    '/content/dataset/validation',
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary',
    color_mode='rgb',  # Convert grayscale images to RGB
    shuffle=False
)

# CNN Model for local feature extraction
def create_cnn_model(input_shape):
    inputs = Input(shape=input_shape)
    x = Conv2D(32, (3, 3), activation='relu')(inputs)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Flatten()(x)
    return Model(inputs, x)

cnn_model = create_cnn_model((224, 224, 3))

# Vision Transformer (ViT) model
vit_model = vit.vit_b16(
    image_size=224,
    pretrained=True,
    include_top=False,
    pretrained_top=False
)

# Combine CNN and ViT
combined_input = cnn_model.input
combined_output = cnn_model.output
vit_output = vit_model(combined_input)

# Concatenate the outputs of CNN and ViT
x = tf.keras.layers.Concatenate()([combined_output, vit_output])
x = Dropout(0.5)(x)  # Dropout for regularization
x = Dense(1, activation='sigmoid')(x)

# Create the hybrid model
model = Model(inputs=combined_input, outputs=x)

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
checkpoint_path = "/content/drive/MyDrive/Project/best_hybrid_model.h5"
early_stopping_cb = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr_cb = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
checkpoint_cb = ModelCheckpoint(filepath=checkpoint_path, save_best_only=True, monitor='val_accuracy', mode='max', verbose=1, save_format='h5')

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=train_generator.samples // train_generator.batch_size,
    epochs=20,
    validation_data=validation_generator,
    validation_steps=validation_generator.samples // validation_generator.batch_size,
    callbacks=[checkpoint_cb, early_stopping_cb, reduce_lr_cb]
)

# Evaluate the model
accuracy = model.evaluate(validation_generator)
print(f"Validation accuracy: {accuracy}")

# Load the best model
best_model = tf.keras.models.load_model(checkpoint_path)

# Save the best model explicitly
best_model.save('/content/drive/MyDrive/Project/best_hybrid_model_final.h5')

# Predict on the validation set
validation_generator.reset()
y_pred = best_model.predict(validation_generator)
y_pred = np.round(y_pred).astype(int).flatten()
y_true = validation_generator.classes

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
cm_plot_labels = ['benign', 'malignant']

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=cm_plot_labels, yticklabels=cm_plot_labels)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Classification report
print(classification_report(y_true, y_pred, target_names=cm_plot_labels))

# Plot training history
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(len(acc))

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()


Found 8000 images belonging to 2 classes.
Found 1999 images belonging to 2 classes.
Downloading data from https://github.com/faustomorales/vit-keras/releases/download/dl/ViT-B_16_imagenet21k+imagenet2012.npz




Epoch 1/20
Epoch 1: val_accuracy improved from -inf to 0.72278, saving model to /content/drive/MyDrive/Project/best_hybrid_model.h5


  saving_api.save_model(


Epoch 2/20
Epoch 2: val_accuracy improved from 0.72278 to 0.74748, saving model to /content/drive/MyDrive/Project/best_hybrid_model.h5
Epoch 3/20
Epoch 3: val_accuracy did not improve from 0.74748
Epoch 4/20
Epoch 4: val_accuracy improved from 0.74748 to 0.76109, saving model to /content/drive/MyDrive/Project/best_hybrid_model.h5
Epoch 5/20
Epoch 5: val_accuracy improved from 0.76109 to 0.76865, saving model to /content/drive/MyDrive/Project/best_hybrid_model.h5
Epoch 6/20
Epoch 6: val_accuracy did not improve from 0.76865
Epoch 7/20
Epoch 7: val_accuracy improved from 0.76865 to 0.80393, saving model to /content/drive/MyDrive/Project/best_hybrid_model.h5
Epoch 8/20
Epoch 8: val_accuracy improved from 0.80393 to 0.81250, saving model to /content/drive/MyDrive/Project/best_hybrid_model.h5
Epoch 9/20