In [2]:
import os
import random

import numpy as np
import pandas as pd
import pdb
from collections import OrderedDict
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import collections  as mc
matplotlib.rcParams['figure.figsize'] = [5, 5]
matplotlib.rcParams['figure.dpi'] = 200

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms

from data_helper import UnlabeledDataset, LabeledDataset
from helper import collate_fn, draw_box

random.seed(0)
np.random.seed(0)
torch.manual_seed(0);

# All the images are saved in image_folder
# All the labels are saved in the annotation_csv file
image_folder = '/scratch/vr1059/self-driving-data/data'
annotation_csv = '/scratch/vr1059/self-driving-data/data/annotation.csv'

# You shouldn't change the unlabeled_scene_index
# The first 106 scenes are unlabeled
unlabeled_scene_index = np.arange(106)
# The scenes from 106 - 133 are labeled
# You should devide the labeled_scene_index into two subsets (training and validation)
train_labeled_scene_index = np.arange(106, 128)
val_labeled_scene_index = np.arange(128, 132)
test_labeled_scene_index = np.arange(132, 134)

In [3]:
!nvidia-smi

Thu Apr 23 19:19:05 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.39       Driver Version: 418.39       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P40           On   | 00000000:04:00.0 Off |                    0 |
| N/A   27C    P0    50W / 250W |  12735MiB / 22919MiB |      0%   E. Process |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [3]:
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.degrees(np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)))

In [14]:
# The labeled dataset can only be retrieved by sample.
# And all the returned data are tuple of tensors, since bounding boxes may have different size
# You can choose whether the loader returns the extra_info. It is optional. You don't have to use it.
val_transform = transforms.ToTensor()
train_transform = transforms.Compose([
    transforms.RandomApply([
        transforms.ColorJitter(brightness = 0.5, contrast = 0.3, saturation = 0.2, hue = (-0.3, 0.3)),
        transforms.Grayscale(3)
    ]),
    transforms.ToTensor(),
])
labeled_trainset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=train_labeled_scene_index,
                                  transform=train_transform,
                                  extra_info=True
                                 )
labeled_valset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=val_labeled_scene_index,
                                  transform=val_transform,
                                  extra_info=True
                                 )

def front_collate_fn(batch):
    front_imgs = []
    front_right_imgs = []
    front_left_imgs = []
    target = []
    road_imgs = []
    bbs = []
    for x in batch:
        # input
        front_left_imgs.append(torch.tensor(x[0][0]))
        front_imgs.append(torch.tensor(x[0][1]))
        front_right_imgs.append(torch.tensor(x[0][2]))
        road_imgs.append(torch.tensor(x[2]))
        
        # target
        bb_tens = x[1]['bounding_box']
        bbs.append(bb_tens)
        x_min = 800
        bb_cand = (-500, -500, 0)
        
        for i, corners in enumerate(bb_tens):
            # Get bird's eye view coordinates. 
            point_squence = torch.stack([corners[:, 0], corners[:, 1], corners[:, 3], corners[:, 2]])
            xs = point_squence.T[0] * 10 + 400
            ys = -point_squence.T[1] * 10 + 400
            if xs[2] - xs[0] > 5:
                top_center_x, top_center_y = 0.5*(xs[2] + xs[3]), 0.5*(ys[2] + ys[3])
            else:
                top_center_x, top_center_y = 0.5*(xs[0] + xs[1]), 0.5*(ys[0] + ys[1])
                
            v1 = np.array([top_center_x - 400, 800 - top_center_y - 400])
            v2 = np.array([2, 0])
            
            if abs(angle_between(v1, v2)) <= 35 and x[1]['category'][i] not in [1, 3, 6, 8]:
                if top_center_x < x_min:
                    x_min = top_center_x
                    bb_cand = (top_center_x.item(), top_center_y.item(), 1)
                
        target.append(bb_cand)
                    
    boom = torch.stack(front_imgs), torch.tensor(target), torch.stack(road_imgs), bbs, torch.stack(front_right_imgs), torch.stack(front_left_imgs)
    return boom

train_loader = torch.utils.data.DataLoader(labeled_trainset, batch_size=256, shuffle=True, collate_fn=front_collate_fn)
val_loader = torch.utils.data.DataLoader(labeled_valset, batch_size=256, shuffle=False, collate_fn=front_collate_fn)

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        
        self.encoder = torchvision.models.resnet18()
        self.encoder.fc = nn.Identity()
        
        self.regression = nn.Sequential(OrderedDict([
            ('linear1', nn.Linear(512, 2))
        ]))
        
#         self.regression.linear1.bias = nn.Parameter(torch.tensor(4.), requires_grad=True)
        
    def forward(self, x):
        x = self.encoder(x)
        return self.regression(x)
    
model = SimpleModel().to(device)

In [16]:
model_dict = model.state_dict()
pretrained_dict = torch.load('best_val_loss_simple.pt')
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
model_dict.update(pretrained_dict) 
model.load_state_dict(model_dict)

for name, param in model.encoder.named_parameters():
    if("bn" not in name):
        param.requires_grad = False
        
unfreeze_layers = [model.encoder.layer3, model.encoder.layer4]
for layer in unfreeze_layers:
    for param in layer.parameters():
        param.requires_grad = True

In [17]:
def train(model, epoch):
    model.train()
    train_losses = []
    for i, (sample, target, road_img, bbs, front_right, front_left) in enumerate(train_loader):

        optimizer.zero_grad()

        sample = sample.to(device)
        target = target.to(device)
        
        target /= 100.
        target = target[:, :2]
        
        y_hat = model(sample)
        
#         target = target[:, 0]
        loss = criterion(y_hat, target)
        
        train_losses.append(loss.item())

        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, i * len(sample), len(train_loader.dataset),
                10. * i / len(train_loader), loss.item()))
            
    print("\nAverage Train Epoch Loss: ", np.mean(train_losses))
            
def val(model):
    model.eval()
    val_losses = []
    for i, (sample, target, road_img, bbs, front_right, front_left) in enumerate(val_loader):

        sample = sample.to(device)
        target = target.to(device)

        with torch.no_grad():
            target /= 100.
            target = target[:, :2]
            # target = target[:, 0]
            y_hat = model(sample)
            loss = criterion(y_hat, target)

            val_losses.append(loss.item())
            
    print("Average Validation Epoch Loss: ", np.mean(val_losses))
    global best_val_loss
    if np.mean(val_losses) < best_val_loss:
        best_val_loss = np.mean(val_losses)
        torch.save(model.state_dict(), 'best_val_loss_simple_class_then_reg.pt')

In [18]:
!nvidia-smi

Tue Apr 21 23:56:50 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 418.39       Driver Version: 418.39       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  On   | 00000000:84:00.0 Off |                    0 |
| N/A   24C    P0    30W / 250W |   6787MiB / 16280MiB |      0%   E. Process |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [19]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
epochs = 20
best_val_loss = 100

for epoch in range(epochs):
    train(model, epoch)
    val(model)
    




Average Train Epoch Loss:  17.39893765883012
Average Validation Epoch Loss:  3.6459970474243164

Average Train Epoch Loss:  5.189363241195679
Average Validation Epoch Loss:  3.268145352602005

Average Train Epoch Loss:  2.2272907278754492
Average Validation Epoch Loss:  2.842980146408081

Average Train Epoch Loss:  1.5778050260110335
Average Validation Epoch Loss:  2.8899271935224533

Average Train Epoch Loss:  1.2118195403705945
Average Validation Epoch Loss:  2.810337096452713

Average Train Epoch Loss:  1.1061693971807307
Average Validation Epoch Loss:  2.9526683688163757

Average Train Epoch Loss:  0.862889441576871
Average Validation Epoch Loss:  3.0667253136634827

Average Train Epoch Loss:  0.9017108624631708
Average Validation Epoch Loss:  2.935458689928055

Average Train Epoch Loss:  0.7100047956813466
Average Validation Epoch Loss:  3.1708260774612427

Average Train Epoch Loss:  0.750885776498101
Average Validation Epoch Loss:  2.896598070859909

Average Train Epoch Loss:  0

KeyboardInterrupt: 

In [9]:
# 3.07 best val regression loss <-- grayscale, classification pre-trained. 
# 2.71 best val regression loss (epoch 3) <-- grayscale and color jitter, classification pre-trained. Batch size 128.
# Interestingly, the above + batch size 160 seems to get worse performance. Like 5.0 avg val loss. 
# 2.80 but a *lot* better consistently in the 2.80-2.85ish. <-- grayscale & color jitter, classification pre-trained, freeze everything except layers [3, 4]. Batch size 128.