<a href="https://colab.research.google.com/github/xolotl18/Master_Thesis/blob/main/training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Semantic Segmentation with FastSCNN

The purpose of this model is to train the network on the train dataset and then export the model in onnx format and also save the state_dict of the pytorch model for later inference

## Installation of libraries for Google  Colab

**Do not run this cell outside of Google Colaboratory**

In [None]:
!pip3 install -q -U albumentations
!echo "$(pip freeze | grep albumentations) is successfully installed"
!pip uninstall opencv-python-headless==4.5.5.62
!pip install opencv-python-headless==4.5.2.52
!pip install torchmetrics
!pip install torch==1.8.0+cu111 torchvision==0.9.0+cu111 torchaudio==0.8.0 -f https://download.pytorch.org/whl/torch_stable.html
!git clone https://github.com/xolotl18/Master_Thesis
!pip install onnx
!pip install onnxruntime

## Imports

In [2]:
import copy
import random
import os
import sys
import shutil
from urllib.request import urlretrieve

import albumentations as A
import albumentations.augmentations.functional as F
from albumentations.pytorch import ToTensorV2
import cv2
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim
from torch.utils.data import Dataset, DataLoader
from torchmetrics import JaccardIndex
from statistics import mean
import torchvision.transforms as T
import torch.onnx
import onnx
import onnxruntime
from torchsummary import summary


from utils.lr_scheduler import PolynomialLRDecay
from models.fast_scnn import FastSCNN
from models.small_scnn import SmallSCNN
from models.super_small_scnn import SuperSmallSCNN

from models.experiments.fast_scnn_mod import FastSCNN as fastscnn_mod

from models.bisenetv2 import BiSeNetV2
from utils.dataset import PackagesDataset, PackagesInferenceDataset
from utils.evaluation import Evaluate
cudnn.benchmark = True

## Load image and label files into Dataset objects

The dataset has already been divided into train, validation and test folders in the notebook **Desktop/Master_Thesis/preparation/dataset_traintest_split.ipynb**

In [3]:
c_dir = os.getcwd()
dataset_directory = os.path.join(c_dir, "full_dataset")

train_images_directory = os.path.join(dataset_directory, "train/images")
train_masks_directory = os.path.join(dataset_directory, "train/labels")
val_images_directory = os.path.join(dataset_directory, "val/images")
val_masks_directory = os.path.join(dataset_directory, "val/labels")
test_images_directory = os.path.join(dataset_directory, "test/images")
test_masks_directory = os.path.join(dataset_directory, "test/labels")

#make sure that image_filenames only contains png files
train_images_filenames = []
train_images_filenames = [ item for item in os.listdir(train_images_directory) if item.endswith(".png") ]
val_images_filenames = []
val_images_filenames = [ item for item in os.listdir(val_images_directory) if item.endswith(".png") ]
test_images_filenames = []
test_images_filenames = [ item for item in os.listdir(test_images_directory) if item.endswith(".png") ]

for names_list, split in zip((train_images_filenames, val_images_filenames, test_images_filenames), ('train', 'validation', 'test')):
  print(f"The size of the {split} set is : {len(names_list)}")
  print()

The size of the train set is : 300

The size of the validation set is : 40

The size of the test set is : 60



Select the transformations and create the Dataset objects. 

The test dataset is different from train and validation because it does not crop the image but applies a resize that will be reversed after the inference.

In [4]:
train_transform = A.Compose(
    [
        A.PadIfNeeded(min_height=512, min_width=512),
        A.RandomCrop(512, 512),
        A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=15, p=0.5),
        A.RGBShift(r_shift_limit=15, g_shift_limit=15, b_shift_limit=15, p=0.3),
        A.RandomBrightnessContrast(p=0.3),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)
val_transform = A.Compose(
    [
        A.PadIfNeeded(min_height=512, min_width=512),
        A.CenterCrop(512, 512),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)
test_transform = A.Compose(
    [
        A.Resize(512, 512),
        A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
        ToTensorV2(),
    ]
)

train_dataset = PackagesDataset(train_images_filenames, train_images_directory, train_masks_directory, transform=train_transform,)
val_dataset = PackagesDataset(val_images_filenames, val_images_directory, val_masks_directory, transform=val_transform,)
test_dataset = PackagesInferenceDataset(test_images_filenames, test_images_directory, test_masks_directory, transform=test_transform,)

## Training functions

In [5]:
if torch.cuda.is_available():
  device = "cuda"
else:
  device = "cpu"

params = {
    "device" : device,
    "lr" : 0.01,
    "batch_size" : 8,
    "num_workers" : 4,
    "epochs" : 40,
}
print(f"The device is : {device}")

The device is : cuda


In [6]:
#these functions are modified to show less information 
#the output of 400 epochs of training takes up too much space
def train(train_loader, model, criterion, optimizer, scheduler, epoch, params):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 1):
        optimizer.zero_grad()

        images, targets = data
        images = images.to(params["device"], non_blocking=True)
        targets = targets.to(params["device"], non_blocking=True)

        outputs = model(images)
        targets = torch.unsqueeze(targets, 1)

        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        scheduler.step(epoch)
        running_loss += loss.item()*images.size(0)

def validate(val_loader, model, criterion, epoch, params):
    model.eval()
    running_loss=0.0
    with torch.no_grad():
        for i, data in enumerate(val_loader, start=1):
                images, targets = data
                images = images.to(params["device"], non_blocking=True)
                targets = targets.to(params["device"], non_blocking=True)
                output = model(images).squeeze(1)
                loss = criterion(output, targets)
                running_loss += loss.item()*images.size(0)
    return running_loss

In [7]:
def predict(model, params, test_dataset):
    test_loader = DataLoader(
        test_dataset, batch_size=params["batch_size"], shuffle=False, num_workers=params["num_workers"], pin_memory=True,
    )
    model.eval()
    predictions = []
    with torch.no_grad():
        for images, masks, (original_heights, original_widths) in test_loader:
            images = images.to(params["device"], non_blocking=True)
            output = model(images)
            probabilities = torch.sigmoid(output.squeeze(1))
            predicted_masks = (probabilities >= 0.5).float() * 1
            predicted_masks = predicted_masks.cpu().numpy()
            for predicted_mask, gt, original_height, original_width in zip(
                predicted_masks, masks.numpy(), original_heights.numpy(), original_widths.numpy()
            ):
                predictions.append((predicted_mask, gt, original_height, original_width))
    return predictions

In [None]:
fastscnn = FastSCNN(in_channels=3, num_classes=1).to(params["device"])

## Model selection

In this section we will define a sequence of hyperparameters that will define the structure of the network. By iterating over these combinations of hyperparameters we will train different models one after the other and save the torch state_dict and the onnx model.

In [9]:
#the list of parameters contains tuples corresponding to a t r pp
#these parameters make up the first 2 stages of simplification
#after the models are trained and evaluated, a combination of the results from
#these 2 stages will make up the third stage of simplification
parameters =[
    (1.0, 6, 3, True),    #baseline
    (0.75, 6, 3, True),
    (0.5, 6, 3, True),
    (0.25, 6, 3, True),
    (0.125, 6, 3, True),
    (0.5, 4, 3, True),
    (0.25, 4, 3, True),
    (0.125, 4, 3, True),
    (0.25, 2, 3, True),
    (0.125, 2, 3, True),
    (0.125, 2, 3, True),
    (1.0, 6, 3, False),
    (1.0, 6, 2, True),
    (1.0, 6, 1, True),
    (1.0, 6, 1, False),   
]

In [10]:
train_loader = DataLoader(
    train_dataset,
    batch_size=params["batch_size"],
    shuffle=True,
    num_workers=params["num_workers"],
    pin_memory=False,
)

val_loader = DataLoader(
    val_dataset,
    batch_size=params["batch_size"],
    shuffle=True,
    num_workers=params["num_workers"],
    pin_memory=False,
)

In [12]:
for a, t, r, pp in parameters:
    model = fastscnn_mod(in_channels=3, num_classes=1, a=a, t=t, r=r, pp=pp).to(params["device"])
    criterion = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=params["lr"])
    scheduler = PolynomialLRDecay(optimizer, max_decay_steps=params["epochs"], end_learning_rate=0.0001, power=0.9)

    best_loss = sys.float_info.max
    model_ckpt = copy.deepcopy(model.state_dict())
    #train the current model
    for epoch in tqdm(range(1, params["epochs"]+1)):
        train(train_loader, model, criterion, optimizer, scheduler, epoch, params)
        epoch_loss = validate(val_loader, model, criterion, epoch, params)
        #select the best model based on the loss on the validation set
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            model_ckpt = copy.deepcopy(model.state_dict())
    
    model.load_state_dict(model_ckpt)
    predictions = predict(model, params, test_dataset)
    #display the intersecion over union and the dice score for the current model
    evaluator = Evaluate(predictions)
    iou, dice = evaluator.get_metrics().values()
    print(f"The Intersection over Union score is : {iou:.4f}")
    print(f"The Dice Coefficient is : {dice:.4f}")
    

The model has been initialized with parameters:
	 Width multiplier a =  1.0
	 Bottleneck expansion rate t =  6
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9674
The Dice Coefficient is : 0.9834
The model has been initialized with parameters:
	 Width multiplier a =  0.75
	 Bottleneck expansion rate t =  6
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9688
The Dice Coefficient is : 0.9841
The model has been initialized with parameters:
	 Width multiplier a =  0.5
	 Bottleneck expansion rate t =  6
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9646
The Dice Coefficient is : 0.9819
The model has been initialized with parameters:
	 Width multiplier a =  0.25
	 Bottleneck expansion rate t =  6
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9627
The Dice Coefficient is : 0.9810
The model has been initialized with parameters:
	 Width multiplier a =  0.125
	 Bottleneck expansion rate t =  6
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9327
The Dice Coefficient is : 0.9650
The model has been initialized with parameters:
	 Width multiplier a =  0.5
	 Bottleneck expansion rate t =  4
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9692
The Dice Coefficient is : 0.9843
The model has been initialized with parameters:
	 Width multiplier a =  0.25
	 Bottleneck expansion rate t =  4
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9557
The Dice Coefficient is : 0.9773
The model has been initialized with parameters:
	 Width multiplier a =  0.125
	 Bottleneck expansion rate t =  4
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9458
The Dice Coefficient is : 0.9720
The model has been initialized with parameters:
	 Width multiplier a =  0.25
	 Bottleneck expansion rate t =  2
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9554
The Dice Coefficient is : 0.9771
The model has been initialized with parameters:
	 Width multiplier a =  0.125
	 Bottleneck expansion rate t =  2
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9374
The Dice Coefficient is : 0.9675
The model has been initialized with parameters:
	 Width multiplier a =  0.125
	 Bottleneck expansion rate t =  2
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9349
The Dice Coefficient is : 0.9662
The model has been initialized with parameters:
	 Width multiplier a =  1.0
	 Bottleneck expansion rate t =  6
	 Bottleneck block repetition r =  3
	 Presence of Pyramid Pooling module pp =  False


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9717
The Dice Coefficient is : 0.9856
The model has been initialized with parameters:
	 Width multiplier a =  1.0
	 Bottleneck expansion rate t =  6
	 Bottleneck block repetition r =  2
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9655
The Dice Coefficient is : 0.9824
The model has been initialized with parameters:
	 Width multiplier a =  1.0
	 Bottleneck expansion rate t =  6
	 Bottleneck block repetition r =  1
	 Presence of Pyramid Pooling module pp =  True


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9668
The Dice Coefficient is : 0.9830
The model has been initialized with parameters:
	 Width multiplier a =  1.0
	 Bottleneck expansion rate t =  6
	 Bottleneck block repetition r =  1
	 Presence of Pyramid Pooling module pp =  False


  0%|          | 0/40 [00:00<?, ?it/s]

The Intersection over Union score is : 0.9698
The Dice Coefficient is : 0.9846


# Model evaluation on test set


In [9]:
def predict(model, params, test_dataset):
    test_loader = DataLoader(
        test_dataset, batch_size=params["batch_size"], shuffle=False, num_workers=params["num_workers"], pin_memory=True,
    )
    model.eval()
    predictions = []
    with torch.no_grad():
        for images, masks, (original_heights, original_widths) in test_loader:
            images = images.to(params["device"], non_blocking=True)
            output = model(images)
            probabilities = torch.sigmoid(output.squeeze(1))
            predicted_masks = (probabilities >= 0.5).float() * 1
            predicted_masks = predicted_masks.cpu().numpy()
            for predicted_mask, gt, original_height, original_width in zip(
                predicted_masks, masks.numpy(), original_heights.numpy(), original_widths.numpy()
            ):
                predictions.append((predicted_mask, gt, original_height, original_width))
    return predictions

In [10]:
predictions = predict(model, params, test_dataset)

evaluator = Evaluate(predictions)
iou, dice = evaluator.get_metrics().values()
print(f"The Intersection over Union score is : {iou:.4f}")
print(f"The Dice Coefficient is : {dice:.4f}")

The Intersection over Union score is : 0.9666
The Dice Coefficient is : 0.9830


## Visualize some of the predictions alongside the ground truth masks

In [None]:
predicted_masks = []
gt_masks = []
for predicted_256x256_mask, ground_truth, original_height, original_width in predictions:
    full_sized_mask = A.resize(
        predicted_256x256_mask, height=original_height, width=original_width, interpolation=cv2.INTER_NEAREST
    )
    full_sized_gt = A.resize(
        ground_truth, height=original_height, width=original_width, interpolation=cv2.INTER_NEAREST
    )
    predicted_masks.append(full_sized_mask)
    gt_masks.append(full_sized_gt)


In [None]:
cols = 3 
rows = 4
figure, ax = plt.subplots(nrows=rows, ncols=cols, figsize=(12, 10))
for i, image_filename in enumerate(test_images_filenames[:rows]):
    image = cv2.imread(os.path.join(test_images_directory, image_filename))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    mask = gt_masks[i]
    ax[i, 0].imshow(image)
    ax[i, 1].imshow(mask, interpolation="nearest")

    ax[i, 0].set_title("Image")
    ax[i, 1].set_title("Ground truth mask")

    ax[i, 0].set_axis_off()
    ax[i, 1].set_axis_off()

    predicted_mask = predicted_masks[i]
    ax[i, 2].imshow(predicted_mask, interpolation="nearest")
    ax[i, 2].set_title("Predicted mask")
    ax[i, 2].set_axis_off()
plt.tight_layout()
plt.show()

## Export the model as torch state dict and ONNX graph

In [None]:
#select the name that the model will be saved with
model_name = "small400e"
model_path = os.path.join(c_dir, "model_checkpoints", model_name)
print(model_path)

Save the model as torch state dict, when doing inference the paramenters in the state dict will be used to load the model to this state

In [None]:
torch.save(model.state_dict(), model_path+".pt")

Export the model in onnx format and then check for errors with the built in onnx check_model function.

In order to export the model a dummy input needs to be given as input

In [None]:
#put the model in inference mode
model.eval()
#generate dummy input for onnx export
x = torch.randn(1, 3, 512, 512, requires_grad=True).cuda()
torch_out = model(x)

# Export the model
torch.onnx.export(model,                     # model being run
                  x,                         # model input (or a tuple for multiple inputs)
                  model_path+".onnx",       # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=11,          # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  operator_export_type=torch.onnx.OperatorExportTypes.ONNX_ATEN_FALLBACK,
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})