# Setup to train full model with findinng proper weights to train each head (Classification + Segmentation)
## Breast-Ultrasound-Segmentation

## About Dataset
Breast cancer is one of the most common causes of death among women worldwide. Early detection helps in reducing the number of early deaths. The data reviews the medical images of breast cancer using ultrasound scan. Breast Ultrasound Dataset is categorized into three classes: normal, benign, and malignant images. Breast ultrasound images can produce great results in classification, detection, and segmentation of breast cancer when combined with machine learning.

### Data
The data collected at baseline include breast ultrasound images among women in ages between 25 and 75 years old. This data was collected in 2018. The number of patients is 600 female patients. The dataset consists of 780 images with an average image size of 500*500 pixels. The images are in PNG format. The ground truth images are presented with original images. The images are categorized into three classes, which are normal, benign, and malignant.

If you use this dataset, please cite:
Al-Dhabyani W, Gomaa M, Khaled H, Fahmy A. Dataset of breast ultrasound images. Data in Brief. 2020 Feb;28:104863. DOI: 10.1016/j.dib.2019.104863.

## Imports

In [1]:
import os

import pyrootutils

root = pyrootutils.setup_root(
    search_from=os.path.dirname(os.getcwd()),
    indicator=[".git", "pyproject.toml"],
    pythonpath=True,
    dotenv=True,
)

if os.getenv("DATA_ROOT") is None:
    os.environ["DATA_ROOT"] = f"{root}/data"

In [2]:
import torch
import torch.nn as nn

# Setup device-agnostic code
if torch.cuda.is_available():
    DEVICE = "cuda"  # NVIDIA GPU
    print("GPU Found!!")
else:
    raise Exception("No GPU Found!!")

GPU Found!!


In [3]:
import logging

import hydra
from hydra import compose, initialize

log = logging.getLogger(__name__)

In [4]:
# # auto reload dotenv
%load_ext dotenv
%dotenv

# auto reload libs
%load_ext autoreload
%autoreload 2

## Paths setup

In [5]:
from omegaconf import DictConfig, OmegaConf

# Register a resolver for torch dtypes
OmegaConf.register_new_resolver("torch_dtype", lambda name: getattr(torch, name))

In [6]:
with initialize(config_path="../configs", job_name="training_setup", version_base=None):
    cfg: DictConfig = compose(config_name="train.yaml")
    # print(OmegaConf.to_yaml(cfg))
    print(cfg)



In [7]:
os.chdir(root)

## Loading Dataset

In [8]:
data_module = hydra.utils.instantiate(cfg.datamodule)

class_weights = data_module.class_weights
class_names = data_module.classes
num_classes = len(class_names)
class_names, num_classes, class_weights

(['normal', 'malignant', 'benign'], 3, tensor([1.9774, 1.2494, 0.5903]))

In [9]:
next(iter(data_module.train_dataloader()))

[tensor([[[[ 1.2214,  1.2899,  1.3242,  ..., -0.8849, -0.3369, -0.3883],
           [ 1.2043,  1.2385,  1.2728,  ..., -1.6898, -0.9192, -0.0972],
           [ 1.1015,  1.1187,  1.1529,  ..., -1.4843, -0.5767, -0.1314],
           ...,
           [-2.1179, -2.1179, -1.9809,  ..., -1.7583, -1.7412, -2.0665],
           [-2.1179, -2.1179, -2.0837,  ..., -1.7240, -1.6898, -2.1008],
           [-2.1179, -2.1179, -2.1179,  ..., -1.8610, -1.8097, -2.1008]],
 
          [[ 1.3782,  1.4482,  1.4832,  ..., -0.7752, -0.2150, -0.2675],
           [ 1.3606,  1.3957,  1.4307,  ..., -1.5980, -0.8102,  0.0301],
           [ 1.2556,  1.2731,  1.3081,  ..., -1.3880, -0.4601, -0.0049],
           ...,
           [-2.0357, -2.0357, -1.8957,  ..., -1.6681, -1.6506, -1.9832],
           [-2.0357, -2.0357, -2.0007,  ..., -1.6331, -1.5980, -2.0182],
           [-2.0357, -2.0357, -2.0357,  ..., -1.7731, -1.7206, -2.0182]],
 
          [[ 1.5942,  1.6640,  1.6988,  ..., -0.5495,  0.0082, -0.0441],
           [ 

In [10]:
next(iter(data_module.val_dataloader()))

Traceback (most recent call last):
  File "/opt/conda/lib/python3.11/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/opt/conda/lib/python3.11/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/conda/lib/python3.11/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/opt/conda/lib/python3.11/shutil.py", line 763, in rmtree
    onerror(os.rmdir, path, sys.exc_info())
  File "/opt/conda/lib/python3.11/shutil.py", line 761, in rmtree
    os.rmdir(path, dir_fd=dir_fd)
OSError: [Errno 39] Directory not empty: '/tmp/pymp-3ipvhimu'


[tensor([[[[-2.0323, -0.6794,  0.0056,  ...,  0.2282,  0.1939,  0.1939],
           [-1.7754, -0.8678, -0.7822,  ..., -0.7308, -0.7822, -0.8335],
           [-1.1589, -0.7137, -0.6623,  ..., -0.8164, -0.8164, -0.7650],
           ...,
           [-1.6042, -1.5528, -1.5357,  ..., -1.0219, -1.0733, -1.0904],
           [-1.5185, -1.4843, -1.4843,  ..., -1.1075, -1.1075, -1.0904],
           [-1.5014, -1.4500, -1.4672,  ..., -1.0562, -0.9877, -0.9534]],
 
          [[-1.9482, -0.5651,  0.1352,  ...,  0.3627,  0.3277,  0.3277],
           [-1.6856, -0.7577, -0.6702,  ..., -0.6176, -0.6702, -0.7227],
           [-1.0553, -0.6001, -0.5476,  ..., -0.7052, -0.7052, -0.6527],
           ...,
           [-1.5105, -1.4580, -1.4405,  ..., -0.9153, -0.9678, -0.9853],
           [-1.4230, -1.3880, -1.3880,  ..., -1.0028, -1.0028, -0.9853],
           [-1.4055, -1.3529, -1.3704,  ..., -0.9503, -0.8803, -0.8452]],
 
          [[-1.7173, -0.3404,  0.3568,  ...,  0.5834,  0.5485,  0.5485],
           [-

In [11]:
next(iter(data_module.test_dataloader()))

[tensor([[[[-1.9980, -0.0116,  1.0502,  ...,  0.8961,  0.8618,  0.8447],
           [-1.8268, -0.1828,  0.0398,  ..., -0.5767, -0.4739, -0.3712],
           [-1.7240, -0.9877, -0.9363,  ..., -0.7650, -0.7137, -0.6623],
           ...,
           [-1.6213, -1.6898, -1.7069,  ..., -1.2274, -1.1247, -1.0048],
           [-1.6727, -1.6727, -1.6727,  ..., -0.8507, -0.7137, -0.6281],
           [-1.7925, -1.7069, -1.6898,  ..., -0.4911, -0.5253, -0.4739]],
 
          [[-1.9132,  0.1176,  1.2031,  ...,  1.0455,  1.0105,  0.9930],
           [-1.7381, -0.0574,  0.1702,  ..., -0.4601, -0.3550, -0.2500],
           [-1.6331, -0.8803, -0.8277,  ..., -0.6527, -0.6001, -0.5476],
           ...,
           [-1.5280, -1.5980, -1.6155,  ..., -1.1253, -1.0203, -0.8978],
           [-1.5805, -1.5805, -1.5805,  ..., -0.7402, -0.6001, -0.5126],
           [-1.7031, -1.6155, -1.5980,  ..., -0.3725, -0.4076, -0.3550]],
 
          [[-1.6824,  0.3393,  1.4200,  ...,  1.2631,  1.2282,  1.2108],
           [-

In [12]:
train_dl, val_dl = data_module.get_sampled_dataloader()

Sampled dataset size: 315


In [13]:
images, targets = next(iter(train_dl))
print(images.shape, targets["masks"].shape, targets["labels"].shape)

print(f"images:{images.dtype}, {images[0].min()}, {images[0].max()}")
print(f'masks {targets["masks"].dtype}, {targets["masks"][0].min()}, {targets["masks"][0].max()}')
print(f'labels {targets["labels"].dtype}, {targets["labels"].min()}, {targets["labels"].max()}')

torch.Size([32, 3, 224, 224]) torch.Size([32, 1, 224, 224]) torch.Size([32])
images:torch.float32, -2.1179039478302, 2.640000104904175
masks torch.uint8, 0, 1
labels torch.int64, 0, 2


In [14]:
_images, _targets = next(iter(val_dl))

print(_images.shape, _targets["masks"].shape, _targets["labels"].shape)

print(f"images:{_images[0].dtype}, {_images[0].min()}, {_images[0].max()}")
print(f'masks {_targets["masks"].dtype}, {_targets["masks"].min()}, {_targets["masks"].max()}')
print(f'labels {_targets["labels"].dtype}, {_targets["labels"].min()}, {_targets["labels"].max()}')

torch.Size([32, 3, 224, 224]) torch.Size([32, 1, 224, 224]) torch.Size([32])
images:torch.float32, -2.1179039478302, 2.640000104904175
masks torch.uint8, 0, 1
labels torch.int64, 0, 2


## Loading and training the FCN8 model 

In [15]:
segmentation_criterion = hydra.utils.instantiate(cfg.losses.segmentation_criterion)
classification_criterion = hydra.utils.instantiate(
    cfg.losses.classification_criterion, weight=class_weights
)
classification_criterion.weight

tensor([1.9774, 1.2494, 0.5903])

In [16]:
import mlflow
import mlflow.pytorch

from src.utils.gpu_utils import DeviceDataLoader, get_default_device, to_device

torch.cuda.empty_cache()
device = get_default_device()

gpu_weights = to_device(class_weights, device)

In [17]:
cfg.models

{'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.003}, 'scheduler': {'func': None}, 'model': {'_target_': 'src.models.vggnet_fcn_segmentation_model.VGGNetFCNSegmentationModel', 'segmentation_criterion': None, 'classification_criterion': None, 'seg_num_classes': 1, 'cls_num_classes': 3, 'seg_weight': 0.95, 'cls_weight': 0.05, 'vggnet_type': 'vgg16', 'fcn_type': 'fcn8'}}

In [18]:
model = hydra.utils.instantiate(
    cfg.models.model,
    segmentation_criterion=segmentation_criterion,
    classification_criterion=classification_criterion,
)

In [19]:
model = torch.compile(model)
model

OptimizedModule(
  (_orig_mod): VGGNetFCNSegmentationModel(
    (segmentation_criterion): SoftDiceLoss()
    (classification_criterion): CrossEntropyLoss()
    (cls_auroc): MulticlassAUROC()
    (encoder): VGGNetEncoder(
      (vgg): VGG(
        (features): Sequential(
          (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): ReLU(inplace=True)
          (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (3): ReLU(inplace=True)
          (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
          (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (6): ReLU(inplace=True)
          (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (8): ReLU(inplace=True)
          (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
          (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   

In [20]:
from mlflow.models import infer_signature

task_name = cfg.task_name
mlflow.set_experiment(f"overfitting-{task_name}")
run = mlflow.start_run()
model.eval()  # This ensures layers like Dropout and BatchNorm behave correctly for inference and saves computation.
with torch.no_grad():
    images, labels = next(iter(train_dl))
    # print(images.shape, labels)
    out = model(images)
    # l = labels['labels'][0]
    # print(l , torch.argmax(l))
    print(out["labels"][0], out["labels"].shape)
    print(out["masks"][0])
    print(out["masks"].shape)
    signature = infer_signature(
        model_input={"image_input": images.numpy()},
        model_output={"output": {"masks": out["masks"].numpy(), "labels": out["labels"].numpy()}},
    )
signature

tensor([ 0.0630, -0.0712,  0.0072]) torch.Size([32, 3])
tensor([[[0.0894, 0.0939, 0.0902,  ..., 0.1390, 0.0692, 0.0477],
         [0.0688, 0.0756, 0.1259,  ..., 0.0213, 0.0272, 0.1111],
         [0.0793, 0.1435, 0.0985,  ..., 0.0938, 0.1093, 0.0683],
         ...,
         [0.1298, 0.0341, 0.0715,  ..., 0.0301, 0.0859, 0.0583],
         [0.1053, 0.1780, 0.0771,  ..., 0.0854, 0.1089, 0.0921],
         [0.1160, 0.1196, 0.0966,  ..., 0.0816, 0.1164, 0.0548]]])
torch.Size([32, 1, 224, 224])


inputs: 
  ['image_input': Tensor('float32', (-1, 3, 224, 224))]
outputs: 
  ['output': {labels: Array(Array(float)) (required), masks: Array(Array(Array(Array(float)))) (required)} (required)]
params: 
  None

## GPU Training Setup

## Moving data and model into memory

In [21]:
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)
to_device(model, device)
# train_dl.device

OptimizedModule(
  (_orig_mod): VGGNetFCNSegmentationModel(
    (segmentation_criterion): SoftDiceLoss()
    (classification_criterion): CrossEntropyLoss()
    (cls_auroc): MulticlassAUROC()
    (encoder): VGGNetEncoder(
      (vgg): VGG(
        (features): Sequential(
          (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (1): ReLU(inplace=True)
          (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (3): ReLU(inplace=True)
          (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
          (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (6): ReLU(inplace=True)
          (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
          (8): ReLU(inplace=True)
          (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
          (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
   

## Overfiting the model

In [22]:
optimizer = hydra.utils.instantiate(cfg.models.optimizer, params=model.parameters(), lr=1e-4)

In [25]:
cfg.paths.output_dir

InterpolationResolutionError: ValueError raised while resolving interpolation: HydraConfig was not set
    full_key: paths.output_dir
    object_type=dict

In [None]:
from torchinfo import summary

from src.utils.train_utils import fit

EPOCHS = cfg.trainer.max_epochs
mlflow.log_params({"epochs": EPOCHS})
mlflow.log_params({"batch_size": cfg.datamodule.batch_size})
mlflow.log_params({"optimizer": cfg.models.optimizer.values()})
# Log model summary.
with open("model_summary.txt", "w") as f:
    f.write(str(summary(model)))
mlflow.log_artifact("model_summary.txt")
history = fit(
    model=model,
    train_dataloader=train_dl,
    validation_dataloader=val_dl,
    epochs=EPOCHS,
    optimizer=optimizer,
    device_type=device.type,
    dtype=torch.float16,
    reduce_lr_on_plateau=torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, factor=0.1, patience=5
    ),
)
# saving the trained model
mlflow.pytorch.log_model(model, "model", signature=signature)
mlflow.log_metrics(history[0])

In [None]:
mlflow.end_run()

In [None]:
import matplotlib.pyplot as plt

seg_losses = [x["seg_loss"] for x in history]
seg_dice = [x["seg_dice"] for x in history]

plt.plot(seg_losses, "-bx")
plt.plot(seg_dice, "-rx")

plt.xlabel("epoch")
plt.ylabel("loss")
plt.grid()
plt.legend(["seg_loss", "seg_dice"])
plt.title("Loss vs. NO. of epochs")