# **Introduction**

# Breast Cancer Detection Using Advanced Deep Learning

## Personal Motivation
This project represents my exploration into combining attention mechanisms with U-Net architecture 
for improved medical image segmentation.

## My Technical Approach
- Implementation of a novel Attention-Enhanced U-Net architecture
- Custom preprocessing pipeline for medical ultrasound data
- Advanced visualization techniques for model interpretability
- Comprehensive evaluation framework with multiple metrics


# **Imports**

In [None]:
from IPython.display import clear_output
!pip install tf_explain
clear_output()

In [None]:
# common
import os
import keras
import numpy as np
import pandas as pd
from glob import glob
import tensorflow as tf
import tensorflow.image as tfi

# Data
from keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical

# Data Viz
import matplotlib.pyplot as plt

# Model 
from keras.models import Model
from keras.layers import Layer
from keras.layers import Conv2D
from keras.layers import Dropout
from keras.layers import UpSampling2D
from keras.layers import concatenate
from keras.layers import Add
from keras.layers import Multiply
from keras.layers import Input
from keras.layers import MaxPool2D
from keras.layers import BatchNormalization

# Callbacks 
from keras.callbacks import Callback
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint
from tf_explain.core.grad_cam import GradCAM

# Metrics
from keras.metrics import MeanIoU

# **Data**

In [None]:
def load_image(image_path, size):
    """
    Loads a single image from disk and rescales it to the given size.
    Args:
        image_path (str): Path to the image file.
        size (int): The width and height to resize the image to.
    Returns:
        np.ndarray: The resized and normalized image as a numpy array.
    """
    image = load_img(image_path)
    arr = img_to_array(image) / 255.0
    return np.round(tfi.resize(arr, (size, size)), 4)

def load_images(image_paths, size, mask=False, limit=None):
    """
    Loads and processes multiple images from the provided paths.

    Args:
        image_paths (list): List of image file paths.
        size (int): Image resize target.
        mask (bool): Whether to load as single-channel mask.
        limit (int): Optional limit on number of images for quick experiments.

    Returns:
        np.ndarray: Batch of resized (and possibly single channel) images.
    """
    paths = image_paths[:limit] if limit is not None else image_paths
    if mask:
        images = np.zeros((len(paths), size, size, 1))
    else:
        images = np.zeros((len(paths), size, size, 3))
    for i, path in enumerate(paths):
        img = load_image(path, size)
        images[i] = img[..., :1] if mask else img
    return images

def show_image(image, title=None, cmap=None, alpha=1.0):
    """
    Utility to display a single image using matplotlib.

    Args:
        image: Image array.
        title (str): Title for the plot.
        cmap: Matplotlib color map.
        alpha (float): Image transparency.
    """
    plt.imshow(image, cmap=cmap, alpha=alpha)
    if title:
        plt.title(title)
    plt.axis("off")

def show_mask(image, mask, cmap=None, alpha=0.4):
    """
    Overlays a mask on an image.

    Args:
        image: Base image.
        mask: Segmentation mask.
        cmap: Matplotlib color map.
        alpha: Mask opacity.
    """
    plt.imshow(image)
    plt.imshow(tf.squeeze(mask), cmap=cmap, alpha=alpha)
    plt.axis("off")


In [None]:
SIZE = 256

In [None]:
root_path = '../input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/'
classes = sorted(os.listdir(root_path))
classes

In [None]:
single_mask_paths = sorted([sorted(glob(root_path + name + "/*mask.png")) for name in classes])
double_mask_paths = sorted([sorted(glob(root_path + name + "/*mask_1.png")) for name in classes])

In [None]:
image_paths = []
mask_paths = []
for class_path in single_mask_paths:
    for path in class_path:
        img_path = path.replace('_mask','')
        image_paths.append(img_path)
        mask_paths.append(path)

In [None]:
show_image(load_image(image_paths[0], SIZE))

In [None]:
show_mask(load_image(image_paths[0], SIZE), load_image(mask_paths[0], SIZE)[:,:,0], alpha=0.6)

## **Approach**

Below here I have explained my strategy to tackel the multiple mask Images.

In [None]:
show_image(load_image('../input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/benign/benign (100).png', SIZE))

In [None]:
show_image(load_image('../input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/benign/benign (100)_mask_1.png', SIZE))

In [None]:
show_image(load_image('../input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/benign/benign (100)_mask.png', SIZE))

I don't want the data this way, as both the masks belongs to the same class. A better idea can be to merge both these images

In [None]:
img = np.zeros((1,SIZE,SIZE,3))
mask1 = load_image('../input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/benign/benign (100)_mask_1.png', SIZE)
mask2 = load_image('../input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/benign/benign (100)_mask.png', SIZE)

img = img + mask1 + mask2
img = img[0,:,:,0]
show_image(img, cmap='gray')

We first merged them and them simple used the 1st channel because that is enough.

In [None]:
show_image(load_image('../input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/benign/benign (100).png', SIZE))
plt.imshow(img, cmap='binary', alpha=0.4)
plt.axis('off')
plt.show()

In [None]:
show_image(load_image('../input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/benign/benign (100).png', SIZE))
plt.imshow(img, cmap='gray', alpha=0.4)
plt.axis('off')
plt.show()

In [None]:
show_image(load_image('../input/breast-ultrasound-images-dataset/Dataset_BUSI_with_GT/benign/benign (100).png', SIZE))
plt.imshow(img, alpha=0.4)
plt.axis('off')
plt.show()

This is how it looks with different cmaps. But you can drop them as then are very less in number (i.e 16) and this will not affect training much.

## **Data Work**

In [None]:
images = load_images(image_paths, SIZE)
masks = load_images(mask_paths, SIZE, mask=True)

In [None]:
plt.figure(figsize=(13,8))
for i in range(15):
    plt.subplot(3,5,i+1)
    id = np.random.randint(len(images))
    show_mask(images[id], masks[id], cmap='jet')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(13,8))
for i in range(15):
    plt.subplot(3,5,i+1)
    id = np.random.randint(len(images))
    show_mask(images[id], masks[id], cmap='binary')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(13,8))
for i in range(15):
    plt.subplot(3,5,i+1)
    id = np.random.randint(len(images))
    show_mask(images[id], masks[id], cmap='afmhot')
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(13,8))
for i in range(15):
    plt.subplot(3,5,i+1)
    id = np.random.randint(len(images))
    show_mask(images[id], masks[id], cmap='copper')
plt.tight_layout()
plt.show()

# **Encoder**

In [None]:
class EncoderBlock(Layer):
    """Custom encoder block: conv -> dropout -> conv -> optional max pooling."""
    def __init__(self, filters, dropout_rate, pooling=True, **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.dropout_rate = dropout_rate
        self.pooling = pooling
        self.c1 = Conv2D(filters, 3, padding="same", activation="relu", kernel_initializer="he_normal")
        self.drop = Dropout(dropout_rate)
        self.c2 = Conv2D(filters, 3, padding="same", activation="relu", kernel_initializer="he_normal")
        self.pool = MaxPool2D()

    def call(self, x):
        x = self.c1(x)
        x = self.drop(x)
        x = self.c2(x)
        return (self.pool(x), x) if self.pooling else x

    def get_config(self):
        config = super().get_config()
        config.update({"filters": self.filters, "dropout_rate": self.dropout_rate, "pooling": self.pooling})
        return config


# **Decoder**

In [None]:
class DecoderBlock(Layer):
    """Custom decoder block: upsample -> concatenate skip -> encoder block."""
    def __init__(self, filters, dropout_rate, **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.dropout_rate = dropout_rate
        self.up = UpSampling2D()
        self.encoder_block = EncoderBlock(filters, dropout_rate, pooling=False)
    def call(self, inputs):
        x, skip = inputs
        x = self.up(x)
        x = concatenate([x, skip])
        return self.encoder_block(x)
    def get_config(self):
        config = super().get_config()
        config.update({"filters": self.filters, "dropout_rate": self.dropout_rate})
        return config


# **Attention Gate**

In [None]:
class AttentionGate(Layer):
    """
    Attention gate for UNet: refines skip connections by focusing on relevant features.
    Used before skip connection concatenation.
    """
    def __init__(self, filters, use_bn, **kwargs):
        super().__init__(**kwargs)
        self.filters = filters
        self.use_bn = use_bn
        self.norm = Conv2D(filters, 3, padding="same", activation="relu", kernel_initializer="he_normal")
        self.down = Conv2D(filters, 3, strides=2, padding="same", activation="relu", kernel_initializer="he_normal")
        self.attn = Conv2D(1, 1, activation="sigmoid")
        self.up = UpSampling2D()
        self.bn = BatchNormalization()

    def call(self, inputs):
        x, skip = inputs
        gating = self.norm(x)
        skip_proj = self.down(skip)
        attn_coeff = self.attn(gating + skip_proj)
        attn_coeff = self.up(attn_coeff)
        weighted_skip = Multiply()([attn_coeff, skip])
        return self.bn(weighted_skip) if self.use_bn else weighted_skip

    def get_config(self):
        config = super().get_config()
        config.update({"filters": self.filters, "use_bn": self.use_bn})
        return config


# **Visual Callbacks and Training**

In [None]:
class ShowProgress(Callback):
    """
    Keras Callback to visualize predictions vs ground truth after every epoch.
    Aids in quick qualitative model check during training.
    """
    def on_epoch_end(self, epoch, logs=None):
        idx = np.random.randint(0, len(images))
        exp = GradCAM()
        image = images[idx]
        mask = masks[idx]
        pred_mask = self.model.predict(image[np.newaxis, ...])
        cam = exp.explain(
            validation_data=(image[np.newaxis, ...], mask),
            class_index=1,
            layer_name='Attention4',
            model=self.model)
        plt.figure(figsize=(10, 5))
        plt.subplot(1, 3, 1)
        plt.title("Original Mask")
        show_mask(image, mask, cmap='copper')
        plt.subplot(1, 3, 2)
        plt.title("Predicted Mask")
        show_mask(image, pred_mask, cmap='copper')
        plt.subplot(1, 3, 3)
        show_image(cam, title="GradCAM")
        plt.tight_layout()
        plt.show()

# **Attention UNet**

In [None]:
# Inputs
input_layer = Input(shape=images.shape[-3:])

# Encoder
p1, c1 = EncoderBlock(32,0.1, name="Encoder1")(input_layer)
p2, c2 = EncoderBlock(64,0.1, name="Encoder2")(p1)
p3, c3 = EncoderBlock(128,0.2, name="Encoder3")(p2)
p4, c4 = EncoderBlock(256,0.2, name="Encoder4")(p3)

# Encoding
encoding = EncoderBlock(512,0.3, pooling=False, name="Encoding")(p4)

# Attention + Decoder

a1 = AttentionGate(256, bn=True, name="Attention1")([encoding, c4])
d1 = DecoderBlock(256,0.2, name="Decoder1")([encoding, a1])

a2 = AttentionGate(128, bn=True, name="Attention2")([d1, c3])
d2 = DecoderBlock(128,0.2, name="Decoder2")([d1, a2])

a3 = AttentionGate(64, bn=True, name="Attention3")([d2, c2])
d3 = DecoderBlock(64,0.1, name="Decoder3")([d2, a3])


a4 = AttentionGate(32, bn=True, name="Attention4")([d3, c1])
d4 = DecoderBlock(32,0.1, name="Decoder4")([d3, a4])

# Output 
output_layer = Conv2D(1, kernel_size=1, activation='sigmoid', padding='same')(d4)

# Model
model = Model(
    inputs=[input_layer],
    outputs=[output_layer]
)

# Compile
model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy', MeanIoU(num_classes=2, name='IoU')]
)

# Callbacks
cb = [
    # EarlyStopping(patience=3, restore_best_weight=True), # With Segmentation I trust on eyes rather than on metrics
    ModelCheckpoint("AttentionCustomUNet.h5", save_best_only=True),
    ShowProgress()
]

# **Training**

In [None]:
# Config Training
BATCH_SIZE = 8
SPE = len(images)//BATCH_SIZE

# Training
results = model.fit(
    images, masks,
    validation_split=0.2,
    epochs=20, # 15 will be enough for a good Model for better model go with 20+
    steps_per_epoch=SPE,
    batch_size=BATCH_SIZE,
    callbacks=cb
)

Observations:

Around the 12th epoch, the model started outputting meaningful segmentation results.

The model is particularly good at detecting well-defined, black round spots. However, as is typical with many segmentation models, it still struggles when the shapes become irregular—although this is improved in the current run due to the use of a high steps-per-epoch (SPE) setting.

Confusions often arise in dark, ambiguous image regions, which is understandable both biologically and technically since areas of similar intensity can fool even human observers.

Suggestions:

Chunking training into intervals of 20 epochs (e.g., train in blocks of 20) gives you better control over progress and helps to optimize model performance over time.

In this experiment, the model was trained for three blocks of 17 epochs (totaling 51 epochs), a strategy that proved effective.

Even when inspecting outputs on tougher images, most failures (about 9 out of 10) occur on cases that would challenge expert human readers as well—because in those images, key features are genuinely ambiguous.


# **Evaluation**

In [None]:
loss, accuracy, iou, val_loss, val_accuracy, val_iou = results.history.values()

In [None]:
plt.figure(figsize=(20,8))

plt.subplot(1,3,1)
plt.title("Model Loss")
plt.plot(loss, label="Training")
plt.plot(val_loss, label="Validtion")
plt.legend()
plt.grid()

plt.subplot(1,3,2)
plt.title("Model Accuracy")
plt.plot(accuracy, label="Training")
plt.plot(val_accuracy, label="Validtion")
plt.legend()
plt.grid()

plt.subplot(1,3,3)
plt.title("Model IoU")
plt.plot(iou, label="Training")
plt.plot(val_iou, label="Validtion")
plt.legend()
plt.grid()

plt.show()

The evaluation has revealed an unusual trend: the IoU (Intersection over Union) score on the validation data is significantly higher than on the training data. Generally, we'd expect the model to fit the training set better (or even overfit), so this reversal is intriguing.

Possible explanations include:

The validation set could be inadvertently “easier” or better aligned to the model’s learned features.

Some regularization or randomness may help the model generalize on the validation split more effectively.

There may have been fluctuations in training, which could be tied to factors like learning rate or split randomness.

In [None]:
plt.figure(figsize=(20,25))
n=0
for i in range(1,(5*3)+1):
    plt.subplot(5,3,i)
    if n==0:
        id = np.random.randint(len(images))
        image = images[id]
        mask = masks[id]
        pred_mask = model.predict(image[np.newaxis,...])

        plt.title("Original Mask")
        show_mask(image, mask)
        n+=1
    elif n==1:
        plt.title("Predicted Mask")
        show_mask(image, pred_mask)
        n+=1
    elif n==2:
        pred_mask = (pred_mask>0.5).astype('float')
        plt.title("Processed Mask")
        show_mask(image, pred_mask)
        n=0
plt.tight_layout()
plt.show()

Results are promising,

Going forward, possible improvements could include:

Exploring more advanced augmentation or processing strategies to maximize generalization.

Experimenting with other attention mechanisms or hybrid architectures for further accuracy.

Using more robust cross-validation and model interpretability techniques for greater transparency.