### Train coupling localization - training notebook
Run every cell once.


In [None]:
import cv2
import json
import numpy as np
import matplotlib.pyplot as plt
import random, os, time, copy, glob
import imgaug as ia
import imgaug.augmenters as iaa
from imgaug.augmentables import Keypoint, KeypointsOnImage

#from __future__ import print_function
#from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

from dataset import TrainCouplingDataset


## ACTION REQUIRED

In cell below, set values for variables below, especially for training / validation images and jsons!

The training set has been separated into train and validation by suffix (jpg / jpeg) because all .jpeg files seem to be from a single camera not included in the other .jpg images. This should help generailzation by preventing overfitting to the limited set of cameras available (i. e. the model will overfit to the cameras in the trainset but should not overfit the validation camera, this way we can have a somewhat reasonable generalization (to other unseen cameras) performance estimate from the validation set) 

In [None]:
batch_size = 8
num_epochs = 200

# Replace with folders containing all of the training / validation images
train_filelist = glob.glob('../car_coupling_train/*.jpg')
val_filelist = glob.glob('../car_coupling_train/*.jpeg')
# Replace with directory containing jsons for training / validation images. Expecting same filename as image, with extension replaced with .json
train_root_dir = '../car_coupling_train/'
val_root_dir = '../car_coupling_train/'

ia.seed(42)
torch.manual_seed(42)

print(f'got {len(train_filelist)} training images and {len(val_filelist)} validation images')

In [None]:
#Create datasets and dataloaders

train_dset = TrainCouplingDataset(filename_list = train_filelist, root_dir=train_root_dir, mode='train')
val_dset = TrainCouplingDataset(filename_list = val_filelist, root_dir=val_root_dir, mode='val')
image_datasets = {
    'train': train_dset,
    'val': val_dset,
}
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=0) for x in ['train', 'val']}

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f'Torch device is {device}')

#### Data sanity check
Cell below shows example image from training dataset including augmentations. This is the time to check it seems ok. Ypu can rerun it multiple times to see different augmentations of the same image

In [None]:
_ = train_dset[0]
_img = np.array(_['image'].permute(1, 2, 0).numpy(), dtype=np.uint8)
plt.figure(figsize=(20, 20))
plt.imshow(_img)

In [None]:
# model setup

model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)
model_ft.to(device)


params_to_update = model_ft.parameters()

for name,param in model_ft.named_parameters():
    if param.requires_grad == True:
        print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)
criterion = nn.MSELoss()

In [None]:
# This is the training loop itself. A lot of this code is courtesy of the Pytorch classification finetuning tutorial adapted for our purposes

def train_model(model, dataloaders, criterion, optimizer, num_epochs=300):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 100.

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            _dl_start = time.time()
            for x in dataloaders[phase]:
                inputs = x['image']
                inputs = inputs.to(device)
                labels = x['bbox']
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
             
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        
                # statistics
                running_loss += loss.item() * inputs.size(0)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f}'.format(phase, epoch_loss))

            # deep copy the model
            if phase == 'val' and epoch_loss < best_loss:
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
                
            if phase == 'val':
                val_acc_history.append(epoch_loss)

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

### Cell below runs the actual training
At the end model is saved to `$(pwd)/model_tmp.pth`.
Also despite the augmentation the model still seems to overfit, as training data is limited. Playing around with learning rate has helped slightly, but validation set accuracy does vary between training runs (with different random seed)


Below that cell are some visualisations of outputs

In [None]:
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs)
torch.save(model_ft.state_dict(), 'model_tmp.pth')


In [None]:
se = 0.
ctr = 0
torch.save(model_ft.state_dict(), 'model_x2.pth')
for idx in range(len(val_dset)):

    res = model_ft(val_dset[idx]['image'].unsqueeze(0).to(device))[0].detach().cpu().numpy()
    print(res)
    gold = val_dset[idx]['bbox'].numpy() 
    print(gold)
    
    
    
    #se += (res - gold)**2 
    ctr += 1
    print()
print(se/ctr)
print(ctr)

In [None]:
model_loaded = models.resnet18(pretrained=True)
#set_parameter_requires_grad(model_ft, feature_extract)
num_ftrs = model_loaded.fc.in_features
model_loaded.fc = nn.Linear(num_ftrs, 2)
if device == 'cpu':
    model_loaded.load_state_dict(torch.load('model_x2.pth'))
else:
    model_loaded.load_state_dict(torch.load('model_x2.pth', map_location=torch.device('cpu')))
model_loaded.eval()
model_loaded.to(device)

#odel_loaded.to(device)


for idx in range(len(val_dset)):

    res = model_loaded(val_dset[idx]['image'].unsqueeze(0).to(device))[0].detach().cpu().numpy()
    print(res)
    gold = val_dset[idx]['bbox'].numpy() 
    print(gold)
    
    img = val_dset[idx]['image'].permute(1, 2, 0)
    npi = np.array(img.numpy(), dtype = np.uint8)
    res0_int = int(res[0] * img.shape[1])
    gold0_int = int(gold[0] * img.shape[1])
    res1_int = int(res[1] * img.shape[1])
    gold1_int = int(gold[1] * img.shape[1])
    
    cv2.line(npi, (res0_int, 20), (res0_int, 420), (0, 0, 255), 2)
    cv2.line(npi, (res1_int, 20), (res1_int, 420), (0, 0, 128), 2)
    cv2.line(npi, (gold0_int, 20), (gold0_int, 420), (0, 255, 0), 2)
    cv2.line(npi, (gold1_int, 20), (gold1_int, 420), (0, 128, 0), 2)
    plt.figure(figsize=(20, 20))
    plt.imshow(npi)
    
    
    
    
    

In [None]:

ctr = 0
for idx in range(len(train_dset)):

    res = model_ft(train_dset[idx]['image'].unsqueeze(0).to(device))[0].detach().cpu().numpy()
    print(res)
    gold = train_dset[idx]['bbox'].numpy() 
    print(gold)
    ctr += 1
    img = train_dset[idx]['image'].permute(1, 2, 0)
    npi = np.array(img.numpy(), dtype = np.uint8)
    res0_int = int(res[0] * img.shape[1])
    gold0_int = int(gold[0] * img.shape[1])
    res1_int = int(res[1] * img.shape[1])
    gold1_int = int(gold[1] * img.shape[1])
    
    cv2.line(npi, (res0_int, 20), (res0_int, 420), (0, 0, 255), 2)
    cv2.line(npi, (res1_int, 20), (res1_int, 420), (0, 0, 128), 2)
    cv2.line(npi, (gold0_int, 20), (gold0_int, 420), (0, 255, 0), 2)
    cv2.line(npi, (gold1_int, 20), (gold1_int, 420), (0, 128, 0), 2)
    plt.figure(figsize=(20, 20))
    plt.imshow(npi)
    
    if ctr > 10:
        break


In [None]:
}