# SegNet implementation in PyTorch


Move the frame to colab [**Done**]

Mount data [**done**]

Fix the training function [**Done**]

Report training accuracy [**Done**]

Learning Rate decay and cooldown [**Done**]

Save checkpints [**Done**]

Visualize an output [**On**]

Calculate class weights [**Pending**]

Try different loss functions (Focal, boundry) [**Pending**]

Quantitive evaluation (IOU, accuracy, etc) [**Pending**]

Video visualization [**Pending**]



In [8]:
# import necessary libraries
import torch
from torchvision import models
import torchsummary
import torch.nn as nn
import torch.nn.functional as F
import cv2
import numpy as np
from torch.utils.data import DataLoader
import time
import os


In [9]:
# Mount Google drive
from google.colab import drive
drive.mount('/content/gdrive')

# !ls gdrive/MyDrive

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [10]:
# get the patht to the camvid data
camvid_path = r'/content/gdrive/MyDrive/CamVid'
!ls '/content/gdrive/MyDrive/CamVid'

# get the path to the scripts
!ls gdrive/MyDrive/Segmentation/code

# access necessary code
from gdrive.MyDrive.Segmentation.code.model import SegNet
import gdrive.MyDrive.Segmentation.code.data as data

class_dict.csv	test  test_labels  train  train_labels	val  val_labels
data.py  eval.py  loss.py  model.py  __pycache__  train.py  train_resnet.py


## Reporting/Evaluation Functions

In [12]:
def avg_pixelwise_accuracy(model, dataset):

  if not dataset:
    return -1

  # change to eval mode
  model.eval()

  # get the dataloader using large batch since only forward pass required
  loader = DataLoader(dataset=dataset, 
                      batch_size=16, 
                      )
  
  correct, total = 0, 0
  for i, batch in enumerate(loader):

    # mount to GPU if available [Need to fix y]
    imgs, labels = batch['X'].to(DEVICE), batch['y'][:, 1, :, :].to(DEVICE)

    # argmax X along dim 1
    out = model(imgs)
    predicted = torch.argmax(out, dim=1)
    total += labels.nelement()
    correct += predicted.eq(labels).sum().item()

  return correct / total


## Save/Load checkpoints

In [13]:
def save_checkpoints(model, filename, epoch, optimizer, loss, train_acc, valid_acc):

  dirname = "/content/gdrive/MyDrive/Segmentation/check_points"

  save_path = os.path.join(dirname, filename)
  print(save_path)
  torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': loss,
            'train_acc': train_acc,
            'valid_acc': valid_acc
            }, save_path)


def load_checkpoints(filename, transfer_learning=True):

  dirname = "/content/gdrive/MyDrive/Segmentation/check_points"
  file_path = os.path.join(dirname, filename)
  checkpoint = torch.load(file_path)
  model = SegNet(IN_CHANNELS, NUM_CLASS, transfer_learning=transfer_learning)
  model.load_state_dict(checkpoint['model_state_dict'])

  print("Model Loaded: Epoch#{}\tLoss:{:.4f}\tTrainAcc:{:.4f}\tValidAcc:{:.4f}".format(
      checkpoint['epoch'],
      checkpoint['loss'],
      checkpoint['train_acc'],
      checkpoint['valid_acc'],
  ))
  return model


# model2 = load_checkpoints("Epoch1_loss59.1495_trainacc31.217_valacc28.866.pth")
# model2

## Train

In [14]:
# train function
def train(model, optimizer, loss_fn, X_train, y_train, X_valid=None, y_valid=None):

  # get the dataloader
  train_dataset = data.CamVidDataset(X_train, y_train)
  train_loader = DataLoader(dataset=train_dataset, 
                            batch_size=BATCH_SIZE, 
                            num_workers=NUM_WORKERS,
                            pin_memory=False,
                            shuffle=True,
                            drop_last=True,
                            worker_init_fn=None
                            )

  valid_dataset = data.CamVidDataset(X_valid, y_valid) if X_valid is not None else None

  
  # for plotting and logging
  iters, losses, train_acc, val_acc, iter_counter = [], [], [], [], 0

  # for checkpoints
  checkpoint_path_template = "Epoch{}_loss{:.4f}_trainacc{:.3f}_valacc{:.3f}.pth"
  acc_recoder, save_gap = 97.7, 0.15
  
  # start training
  for epoch in range(NUM_EPOCHS):

    # visualize an segmentation



    # Learning Rate Decay [Optional]
    if (epoch + 1) % 20 == 0:
      optimizer.param_groups[0]['lr'] /= 3
      print("<=============== Learning Rate {} -> {}===== ==========>".format(optimizer.param_groups[0]['lr'] * 3, optimizer.param_groups[0]['lr']))

    epoch_loss = 0
    t_start = time.time()
    for i, batch in enumerate(train_loader):

      # mount to GPU if available [Need to fix y]
      imgs, labels = batch['X'].to(DEVICE), batch['y'][:, 1, :, :].to(DEVICE)

      # change the mode to training mode and step training
      model.train()
      out = model(imgs)
      loss = loss_fn(out, labels)   # note: soft-max should not be used here since it's included in nn.CrossEntropyLoss
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()
      iter_counter += 1

      # save the current training information, TODO
      losses.append(loss)
      epoch_loss += loss

    delta = time.time() - t_start

    # train_acc = avg_pixelwise_accuracy(model, train_dataset)
    train_acc = avg_pixelwise_accuracy(model, train_dataset) * 100
    valid_acc = avg_pixelwise_accuracy(model, valid_dataset) * 100
    print("Epoch #{}\tLoss: {:.8f}\tTrain Acc: {:.6f}%\tValid Acc: {:.6f}%\tTime: {:.2f}s".format(epoch+1, epoch_loss, train_acc, valid_acc, delta))

    # Save checkpoints
    if train_acc > acc_recoder + save_gap:
      acc_recoder = train_acc
      checkpoint_name = checkpoint_path_template.format(epoch+1, epoch_loss, train_acc, valid_acc)
      save_checkpoints(model, checkpoint_name, epoch+1, optimizer, epoch_loss, train_acc, valid_acc)



In [15]:
# load the dataset
X_train, y_train = data.load_data(camvid_path+"/train")
X_valid, y_valid = data.load_data(camvid_path+"/val")
X_test, y_test = data.load_data(camvid_path+"/test")

print("Loaded {} training samples, {} validation samples, {} testing samples".format(len(X_train), len(X_valid), len(X_test)))

Loaded 367 training samples, 101 validation samples, 233 testing samples


In [16]:
# for reproduction
torch.manual_seed(123)

# Pre config
IN_CHANNELS = 3
NUM_CLASS = 2

# training parameters
NUM_EPOCHS = 200
LEARNING_RATE = 0.1   # will decrease with epoch growing larger
BATCH_SIZE = 8
DEVICE = "cuda" if torch.cuda.is_available else "cpu"
LABEL_WEIGHTS = torch.FloatTensor([0.5, 2.3])

#
WEIGHT_DECAY = 0.0005
NUM_WORKERS = 16
PIN_MEMORY = False

#
model = SegNet(3,2, transfer_learning=True).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
loss = nn.CrossEntropyLoss(weight=LABEL_WEIGHTS).to(DEVICE)


In [17]:
# Launch training
torch.cuda.empty_cache()
train(model, optimizer, loss, X_train, y_train, X_valid, y_valid)



Epoch #1	Loss: 48.54034805	Train Acc: 31.215397%	Valid Acc: 28.865068%	Time: 8.57s
Epoch #2	Loss: 22.10857201	Train Acc: 58.036225%	Valid Acc: 62.433048%	Time: 8.51s
Epoch #3	Loss: 10.51165962	Train Acc: 86.396935%	Valid Acc: 90.604676%	Time: 8.55s
Epoch #4	Loss: 7.55886269	Train Acc: 91.002559%	Valid Acc: 87.954992%	Time: 8.56s
Epoch #5	Loss: 6.73101377	Train Acc: 94.106141%	Valid Acc: 91.291722%	Time: 8.54s
Epoch #6	Loss: 5.90004253	Train Acc: 92.814763%	Valid Acc: 88.709525%	Time: 8.56s
Epoch #7	Loss: 5.52414322	Train Acc: 95.314037%	Valid Acc: 95.957477%	Time: 8.54s
Epoch #8	Loss: 5.15556908	Train Acc: 91.782772%	Valid Acc: 87.428371%	Time: 8.55s
Epoch #9	Loss: 5.19527054	Train Acc: 95.640169%	Valid Acc: 95.926221%	Time: 8.59s
Epoch #10	Loss: 5.59613466	Train Acc: 95.733177%	Valid Acc: 95.739610%	Time: 8.63s
Epoch #11	Loss: 4.94937801	Train Acc: 94.066700%	Valid Acc: 92.559833%	Time: 8.63s
Epoch #12	Loss: 4.87129259	Train Acc: 95.818430%	Valid Acc: 95.964364%	Time: 8.64s
Epoch #13	