3a

Dataset and DataLoader

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader
import numpy as np
import time
import requests
from torchvision.datasets import ImageFolder
from torchvision import models
from PIL import Image
from shutil import copyfile
import os

In [5]:
os.chdir('/content/drive/MyDrive/MLDL_project')
from datasets.cityscapes import CityscapesDataset
from datasets.gta5 import GTA5

In [6]:
import torchvision.transforms as transforms
transform_train = transforms.Compose([
    transforms.Resize((720,1280), interpolation=transforms.InterpolationMode.NEAREST),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

target_transform_train = transforms.Compose([
    transforms.Resize((720,1280), interpolation=transforms.InterpolationMode.NEAREST)
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [7]:
train_dataset_source = GTA5('/content/drive/MyDrive/MLDL_project/datasets/GTA5', transform=transform_train, target_transform=target_transform_train)
train_dataset_target = CityscapesDataset('/content/drive/MyDrive/MLDL_project/datasets/Cityspaces', transform=transform_val, split='train')
val_dataset = CityscapesDataset('/content/drive/MyDrive/MLDL_project/datasets/Cityspaces', transform=transform_val, split='val')

In [8]:
# Create a DataLoader
from torch.utils.data import DataLoader
dataloader_train_source = DataLoader(train_dataset_source, batch_size=4, shuffle=True, num_workers=8)
dataloader_train_target = DataLoader(train_dataset_target, batch_size=4, shuffle=True, num_workers=8)
dataloader_val = DataLoader(val_dataset, batch_size=4, shuffle=False, num_workers=8)



In [9]:
num_train_samples_source = len(train_dataset_source)
num_train_samples_target = len(train_dataset_target)
num_val_samples = len(val_dataset)

print(f'Number of source training samples: {num_train_samples_source}')
print(f'Number of target training samples: {num_train_samples_target}')
print(f'Number of validation samples: {num_val_samples}')

Number of source training samples: 2500
Number of target training samples: 1572
Number of validation samples: 500


Build model

In [10]:
# Load pre-trained model

from models.bisenet.build_bisenet import BiSeNet
model = BiSeNet(num_classes = 19, context_path='resnet18').cuda()

In [11]:
# Load discriminator model
from models.discriminator.discriminator import FCDiscriminator
model_D = FCDiscriminator(num_classes=19).cuda()

Trainning process

In [None]:
def train(epoch, model, model_D, dataloader_train_source, dataloader_train_target, criterion, criterion_D, optimizer, optimizer):
  model.train()
  model_D.train()
  loss_seg_value = 0.0
  loss_adv_target_value = 0.0
  loss_D_value = 0.0
  hist = 0
  for i, (data_source, data_target) in enumerate(itertools.zip_longest(dataloader_train_source, dataloader_train_target, fillvalue=None)):
    inputs_s, lables_s = data_source[0].cuda(), data_source[1].cuda()
    if data_target is not None:
      inputs_t, _ = data_target[0].cuda(), data_target[1].cuda()
    optimizer.zero_grad()
    optimizer_D.zero_grad()

    # train G

    # don't accumulate grads in D
    for param in model_D.parameters():
        param.requires_grad = False

    # train with source
    pre_s,_,_ = model(inputs_s)
    loss_seg = criterion(pre_s, lables_s)
    loss_seg.backward()
    loss_seg_value += loss_seg.data.cpu().numpy()[0]

    # train with target
    if data_target is not None:
      pre_t,_,_ = model(inputs_t)
      D_out = model_D(F.softmax(pre_t))
      loss_adv_target = criterion_D(D_out, Variable(torch.FloatTensor(D_out.data.size()).fill_(source_label)).cuda())
      loss_adv = lambda_adv_target * loss_adv_target
      loss_adv.backend()
      loss_adv_target_value += loss_adv_target.data.cpu().numpy()[0]

    # train D

    # bring back requires_grad
    for param in model_D.parameters():
        param.requires_grad = True

    # train with source
    pre_s = pre_s.detach()
    D_out = model_D(F.softmax(pre_s))
    loss_D = criterion_D(D_out, Variable(torch.FloatTensor(D_out.data.size()).fill_(source_label)).cuda())
    loss_D.backend()
    loss_D_value += loss_D.data.cpu().numpy()[0]

    # train with target
    if data_target is not None:
      pre_t = pre_t.detach()
      D_out = model_D(F.softmax(pre_t))
      loss_D = criterion_D(D_out, Variable(torch.FloatTensor(D_out.data.size()).fill_(target_label)).cuda())
      loss_D.backward()
      loss_D_value += loss_D.data.cpu().numpy()[0]

    optimizer.step()
    optimizer_D.step()

    _, predicted = pre_s.max(1)
    hist += total_hist(predicted, lables_s, 19)

  miou_per_class = per_class_iou(hist)
  miou = np.mean(miou_per_class)

  # pay attention, this is different with AdaptSegNet
  loss_seg_value = loss_seg_value / len(dataloader_train_source)
  loss_adv_target_value = loss_adv_target_value / len(dataloader_train_target)
  loss_D_value = loss_D_value / (len(dataloader_train_source) + len(dataloader_train_target))
  print(f"Epoch{epoch+1}, Loss_seg: {loss_seg_value}, Loss_adv: {loss_adv_target_value}, Loss_D: {loss_D_value}, mIOU: {miou}")

  return loss_seg_value, loss_adv_target_value, loss_D_value, miou, miou_per_class

In [None]:
def validation(model, model_D, dataloader_train_source, dataloader_train_target, criterion, criterion_D, optimizer, optimizer):
  # implement here
  # ...

In [None]:
criterion = nn.CrossEntropyLoss(ignore_index=255)
optimizer = optim.SGD(model.parameters(), lr=(2.5e-2)/4, momentum=0.9, weight_decay=1e-4)

criterion_D = nn.BCEWithLogitsLoss()
optimizer_D = optim.Adam(model_D.parameters(), lr=1e-4, betas=(0.9, 0.99))

# labels for adversarial training
source_label = 0
target_label = 1
# hyper parameter
lambda_adv_target = 0.001

epochs = 1
for epoch in range(epochs):
  # uisng train and validation here
  # ...

In [None]:
# save best model
import os
DIR = '/content/drive/MyDrive/MLDL_project/models/bisenet/trained_models/'
if not os.path.exists(DIR):
    os.makedirs(DIR)
PATH = DIR + f'biseNet_UDA_epoch{epochs}.pth'

# delete old model files
if os.path.exists(PATH):
    os.remove(PATH)

model = BiSeNet(num_classes = 19, context_path='resnet18').cuda()
model.load_state_dict(models[np.argmax(np.array(miou_val_list))])
torch.save(model.state_dict(), PATH)

In [None]:
# visualization
import matplotlib.pyplot as plt

epochs_list = np.arange(1, epochs+1, 5)
plt.figure(figsize=(10, 6))

plt.plot(epochs_list, miou_train_list, marker='o', linestyle='-', color='r', label='Training mIOU')
plt.plot(epochs_list, miou_val_list, marker='o', linestyle='--', color='b', label='Val mIOU')

plt.title('Training and validation mIOU over Epochs')
plt.xlabel('Epochs')
plt.ylabel('mIOU')

plt.legend()
plt.grid(True)
plt.show()

Flops and Number of parameters

In [None]:
!pip install -U fvcore

In [None]:
from fvcore.nn import FlopCountAnalysis, flop_count_table

height = train_dataset[0][0].shape[0]
width = train_dataset[0][0].shape[1]
image = torch.zeros((1, 3, height, width)).cuda()

flops = FlopCountAnalysis(model, image)
print(flops)

total_params = sum(p.numel() for p in model.parameters())
print(f'Total number of parameters: {total_params}')

In [None]:
#more detail information about number of parameters and flops
print(flop_count_table(flops))

Latency and FPS

In [None]:
# latency and FPS
import time

height = train_dataset[0][0].shape[0]
width = train_dataset[0][0].shape[1]
image = np.random.randint(0,256,(height, width, 3)) / 255.
image = transform(image)
image = torch.unsqueeze(image, dim=0).float().cuda()

iterations = 1000
latency = np.zeros(iterations)
fps = np.zeros(iterations)
for i in range(iterations):
  start = time.time()
  output = model(image)
  end = time.time()
  time_diff_seconds = end - start
  latency[i] = time_diff_seconds
  fps[i] = 1/time_diff_seconds

meanLatency = np.mean(latency)*1000
stdLatency = np.std(latency)*1000
meanFPS = np.mean(fps)
stdFPS = np.std(fps)

print(f"Mean Latency: {meanLatency} ms")
print(f"Std Latency: {stdLatency} ms")
print(f"Mean FPS: {meanFPS}")
print(f"Std FPS: {stdFPS}")