3b

Dataset and DataLoader

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import numpy as np
import time
import requests
from torchvision.datasets import ImageFolder
from torchvision import models
from PIL import Image
from shutil import copyfile
import os

In [None]:
os.chdir('/content/drive/MyDrive/MLDL_project')
from datasets.cityscapes import CityscapesDataset
from datasets.gta5 import GTA5

In [None]:
import torchvision.transforms as transforms
transform_train = transforms.Compose([
    transforms.Resize((720,1280), interpolation=transforms.InterpolationMode.NEAREST),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

target_transform_train = transforms.Compose([
    transforms.Resize((720,1280), interpolation=transforms.InterpolationMode.NEAREST)
])

# transform with augmentation
transform__train_aug = transforms.Compose([
    # flip/rotarion/crop
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=30),
    transforms.RandomResizedCrop(size=(720,1280), interpolation=transforms.InterpolationMode.NEAREST),

    # transformation for colorful image
    transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.1),

    # gaussian blur
    transforms.GaussianBlur(kernel_size=5, sigma=(0.1, 2.0)),

    *transform.transforms

])

# transform with augmentation
target_transform_train_aug = transforms.Compose([
    # flip/rotarion/crop
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomRotation(degrees=30),
    transforms.RandomResizedCrop(size=(720,1280), interpolation=transforms.InterpolationMode.NEAREST),

    *target_transform.transforms
])


transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# you need to use other data augmentation techniques
# ........

In [None]:
train_dataset = GTA5('/content/drive/MyDrive/MLDL_project/datasets/GTA5', transform=transform__train_aug, target_transform=target_transform_train_aug)
val_dataset = CityscapesDataset('/content/drive/MyDrive/MLDL_project/datasets/Cityspaces', transform=transform_val, split='val')

In [None]:
# Create a DataLoader
from torch.utils.data import DataLoader
dataloader_train = DataLoader(train_dataset, batch_size=4, shuffle=True, num_workers=8)
dataloader_val = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=8)



In [None]:
num_train_samples = len(train_dataset)
num_val_samples = len(val_dataset)

print(f'Number of training samples: {num_train_samples}')
print(f'Number of validation samples: {num_val_samples}')

Number of training samples: 2500
Number of validation samples: 1572


Build model

In [None]:
# Load pre-trained model

from models.bisenet.build_bisenet import BiSeNet
model = BiSeNet(num_classes = 19, context_path='resnet18').cuda()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 149MB/s]
Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:01<00:00, 149MB/s]


Trainning and validation process

In [None]:
# This is just used to test if you can train your model with this dataloader
# from utils import poly_lr_scheduler
# from utils import fast_hist
# from utils import per_class_iou
# from utils import total_hist

# def total_hist(outputs, labels, num_classes):
#     hist = 0
#     for i in range(len(outputs)):
#         output, label = outputs[i].cpu().detach().numpy().reshape(-1,), labels[i].cpu().detach().numpy().reshape(-1,)
#         hist += fast_hist(label, output, num_classes)
#     return hist
#
# model.train()
# running_loss = 0.0
# hist = 0
# criterion = nn.CrossEntropyLoss(ignore_index=255)
# optimizer = optim.Adam(model.parameters(), lr=0.0001)  #Since our batch size is only 2, so we need to choose a small initial learning rate

# for i, (inputs, labels) in enumerate(dataloader_train, 0):
#     inputs, labels = inputs.cuda(), labels.cuda()
#     optimizer.zero_grad()

#     outputs, _, _ = model(inputs)
#     print(outputs.shape, labels.shape)
#     loss = criterion(outputs, labels)
#     loss.backward()
#     optimizer.step()

#     running_loss += loss.item()
#     outputs = torch.argmax(outputs, dim=1)
#     hist += total_hist(outputs, labels, 19)
#     break
# avg_loss = running_loss
# miou = np.mean(per_class_iou(hist))
# print(f"Avg. Training Loss: {avg_loss}, mIoU: {miou}")

torch.Size([4, 19, 720, 1280]) torch.Size([4, 720, 1280])
Avg. Training Loss: 3.0851752758026123, mIoU: 0.015068737094135707


In [None]:
# This is just used to test if you can train your model with this dataloader
# model.eval()
# running_loss = 0.0
# hist = 0
# with torch.no_grad():
#   for i, (inputs, labels) in enumerate(dataloader_val, 0):
#       inputs, labels = inputs.cuda(), labels.cuda()

#       outputs = model(inputs)
#       print(outputs.shape, labels.shape)
#       loss = criterion(outputs, labels)

#       running_loss += loss.item()
#       outputs = torch.argmax(outputs, dim=1)
#       hist += total_hist(outputs, labels, 19)
#       break
#   avg_loss = running_loss
#   miou = np.mean(per_class_iou(hist))
#   print(f"Avg. Training Loss: {avg_loss}, mIoU: {miou}")

torch.Size([4, 19, 512, 1024]) torch.Size([4, 512, 1024])
Avg. Training Loss: 3.211564540863037, mIoU: 0.006056246590488598


In [None]:
from train import train
from validate import validation

In [None]:
import torch.optim as optim
from utils import poly_lr_scheduler

criterion = nn.CrossEntropyLoss(ignore_index=255)
optimizer = optim.SGD(model.parameters(), lr=lr=(2.5e-2)/4, momentum=0.9, weight_decay=1e-4)

# training model
epochs = 5

miou_train_list = []
miou_val_list = []
pmiou_val_list = []
models = []
for epoch in range(epochs):
    print('--------------------------------------------------------------------------------')
    _, miou_train, miou_per_class_train = train(epoch, model, dataloader_train, criterion, optimizer)
    # curr_lr = poly_lr_scheduler(optimizer = optimizer, init_lr = 0.0001, iter = epoch, lr_decay_iter=1, max_iter=epochs, power=0.9)
    # every 5 epochs print the miou and loss of validation set
    if epoch % 5 == 0:
      _, miou_val, miou_per_class_val = validation(model, dataloader_val, criterion)
      print('--------------------------------------------------------------------------------')
      miou_train_list.append(miou_train)
      miou_val_list.append(miou_val)
      pmiou_val_list.append(miou_per_class_val)
      models.append(model.state_dict())


print('Finished Training')

--------------------------------------------------------------------------------




In [None]:
# save best model
import os
DIR = '/content/drive/MyDrive/MLDL_project/models/bisenet/trained_models/'
if not os.path.exists(DIR):
    os.makedirs(DIR)
PATH = DIR + f'biseNet_gta5_aug_epoch{epochs}.pth'

# delete old model files
if os.path.exists(PATH):
    os.remove(PATH)

model = BiSeNet(num_classes = 19, context_path='resnet18').cuda()
model.load_state_dict(models[np.argmax(np.array(miou_val_list))])
torch.save(model.state_dict(), PATH)

In [None]:
# visualization
import matplotlib.pyplot as plt

epochs_list = np.arange(1, epochs+1, 5)
plt.figure(figsize=(10, 6))

plt.plot(epochs_list, miou_train_list, marker='o', linestyle='-', color='r', label='Training mIOU')
plt.plot(epochs_list, miou_val_list, marker='o', linestyle='--', color='b', label='Val mIOU')

plt.title('Training and validation mIOU over Epochs')
plt.xlabel('Epochs')
plt.ylabel('mIOU')

plt.legend()
plt.grid(True)
plt.show()

Flops and Number of parameters

In [None]:
!pip install -U fvcore

In [None]:
from fvcore.nn import FlopCountAnalysis, flop_count_table

height = train_dataset[0][0].shape[0]
width = train_dataset[0][0].shape[1]
image = torch.zeros((1, 3, height, width)).cuda()

flops = FlopCountAnalysis(model, image)
print(flops)

total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {total_params}')

In [None]:
#more detail information about number of parameters and flops
print(flop_count_table(flops))

Latency and FPS

In [None]:
# latency and FPS
import time

height = train_dataset[0][0].shape[0]
width = train_dataset[0][0].shape[1]
image = np.random.randint(0,256,(height, width, 3)) / 255.
image = transform(image)
image = torch.unsqueeze(image, dim=0).float().cuda()

iterations = 1000
latency = np.zeros(iterations)
fps = np.zeros(iterations)
for i in range(iterations):
  start = time.time()
  output = model(image)
  end = time.time()
  time_diff_seconds = end - start
  latency[i] = time_diff_seconds
  fps[i] = 1/time_diff_seconds

meanLatency = np.mean(latency)*1000
stdLatency = np.std(latency)*1000
meanFPS = np.mean(fps)
stdFPS = np.std(fps)

print(f"Mean Latency: {meanLatency} ms")
print(f"Std Latency: {stdLatency} ms")
print(f"Mean FPS: {meanFPS}")
print(f"Std FPS: {stdFPS}")