In [1]:
import os
import random
import math

import numpy as np
import pandas as pd
import pdb
from collections import OrderedDict
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import collections  as mc
matplotlib.rcParams['figure.figsize'] = [5, 5]
matplotlib.rcParams['figure.dpi'] = 200

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms

from data_helper import UnlabeledDataset, LabeledDataset
from helper import collate_fn, draw_box

random.seed(0)
np.random.seed(0)
torch.manual_seed(0);

# All the images are saved in image_folder
# All the labels are saved in the annotation_csv file
image_folder = '/scratch/vr1059/self-driving-data/data'
annotation_csv = '/scratch/vr1059/self-driving-data/data/annotation.csv'

# You shouldn't change the unlabeled_scene_index
# The first 106 scenes are unlabeled
unlabeled_scene_index = np.arange(106)
# The scenes from 106 - 133 are labeled
# You should devide the labeled_scene_index into two subsets (training and validation)
train_labeled_scene_index = np.arange(106, 128)
val_labeled_scene_index = np.arange(128, 132)
test_labeled_scene_index = np.arange(132, 134)

In [2]:
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.degrees(np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)))

In [3]:
class_label = 1
class_dict = dict()
reverse_class_dict = []
reverse_class_dict.append((-100, -100))
for i in range(400, 800, 50):
    for j in range(100, 600, 50):
        class_dict[(i, j)] = class_label
        class_label += 1
        reverse_class_dict.append((i, j))
        
class_dict[(-100, -100)] = 0

In [4]:
def round_up(x):
    return int(math.ceil(x / 50.0)) * 50

def round_down(x):
    return round_up(x) - 50

In [23]:
def front_collate_fn(batch):
    front_imgs = []
    front_right_imgs = []
    front_left_imgs = []
    target = []
    road_imgs = []
    bbs = []
    for x in batch:
        # input
        front_left_imgs.append(torch.tensor(x[0][0]))
        front_imgs.append(torch.tensor(x[0][1]))
        front_right_imgs.append(torch.tensor(x[0][2]))
        road_imgs.append(torch.tensor(x[2]))
        
        # target
        bb_tens = x[1]['bounding_box']
        bbs.append(bb_tens)
        x_min = 800
        bb_cand = (-100., -100.)
        
        for i, corners in enumerate(bb_tens):
            # Get bird's eye view coordinates. 
            point_squence = torch.stack([corners[:, 0], corners[:, 1], corners[:, 3], corners[:, 2]])
            xs = point_squence.T[0] * 10 + 400
            ys = -point_squence.T[1] * 10 + 400
            if xs[2] - xs[0] > 5:
                top_center_x, top_center_y = 0.5*(xs[2] + xs[3]), 0.5*(ys[2] + ys[3])
            else:
                top_center_x, top_center_y = 0.5*(xs[0] + xs[1]), 0.5*(ys[0] + ys[1])
                
            # We do (800 - top_center_y) because matplotlib y-axis starts from the top. 
            v1 = np.array([top_center_x - 400, 800 - top_center_y - 400])
            v2 = np.array([2, 0])
            
            if abs(angle_between(v1, v2)) <= 35 and x[1]['category'][i] not in [1, 3, 6, 8]:
                if top_center_x < x_min:
                    x_min = top_center_x
                    bb_cand = (top_center_x.item(), top_center_y.item())
         
        if int(bb_cand[0]) == -100:
            target.append((0, bb_cand[0]/100., bb_cand[1]/100.))
        else:
            key = (round_down(bb_cand[0]), round_down(bb_cand[1]))
            if key not in class_dict:
                print(bb_cand)
            label = class_dict[key]
            target.append((label, bb_cand[0]/100., bb_cand[1]/100.))
                
    boom = torch.stack(front_imgs), torch.tensor(target), torch.stack(road_imgs), bbs, torch.stack(front_right_imgs), torch.stack(front_left_imgs)
    return boom

### Dataset


In [24]:
# The labeled dataset can only be retrieved by sample.
# And all the returned data are tuple of tensors, since bounding boxes may have different size
# You can choose whether the loader returns the extra_info. It is optional. You don't have to use it.
val_transform = transforms.ToTensor()
train_transform = transforms.Compose([
    transforms.RandomApply([
        transforms.ColorJitter(brightness = 0.5, contrast = 0.3, saturation = 0.2, hue = (-0.3, 0.3)),
        transforms.Grayscale(3),
#         transforms.RandomAffine(10),
    ]),
    transforms.ToTensor(),
])
labeled_trainset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=train_labeled_scene_index,
                                  transform=train_transform,
                                  extra_info=True
                                 )
labeled_valset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=val_labeled_scene_index,
                                  transform=val_transform,
                                  extra_info=True
                                 )

train_loader = torch.utils.data.DataLoader(labeled_trainset, batch_size=256, shuffle=True, collate_fn=front_collate_fn)
val_loader = torch.utils.data.DataLoader(labeled_valset, batch_size=256, shuffle=False, collate_fn=front_collate_fn)

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        
        self.encoder = torchvision.models.resnet18()
        self.encoder.fc = nn.Identity()
        
        self.classification = nn.Sequential(OrderedDict([
            ('linear1', nn.Linear(512, 81)),
        ]))
        
        self.regression = nn.Sequential(OrderedDict([
            ('linear_reg', nn.Linear(512, 2)),
        ]))
        
#         self.regression.linear1.bias = nn.Parameter(torch.tensor(400.))
        
    def forward(self, x):
        x = self.encoder(x)
        return self.classification(x), self.regression(x)
    


In [35]:
model = SimpleModel().to(device)
class_criterion = nn.CrossEntropyLoss()
reg_criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
best_val_loss = 100

In [38]:
def train():
    model.train()
    train_losses = []
    class_losses = []
    reg_losses = []
    for i, (sample, target, road_img, bbs, front_right, front_left) in enumerate(train_loader):

        optimizer.zero_grad()

        sample = sample.to(device)
        target = target.to(device)

        y_hat_class, y_hat_reg = model(sample)
        target_class = target[:, 0]
        target_reg = target[:, 1:]
        
        class_loss = class_criterion(y_hat_class, target_class.long())
        reg_loss = reg_criterion(y_hat_reg, target_reg)
        loss = class_loss + 0.5 * reg_loss
        
        train_losses.append(loss.item())
        class_losses.append(class_loss.item())
        reg_losses.append(reg_loss.item())

        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, i * len(sample), len(train_loader.dataset),
                10. * i / len(train_loader), loss.item()))
            print('Classify Loss: {}'.format(np.mean(class_losses)))
            print('Regression Loss: {}'.format(np.mean(reg_losses)))
            
    print("\nAverage Train Epoch Loss: ", np.mean(train_losses))
            
def val():
    model.eval()
    val_losses = []
    class_losses = []
    reg_losses = []
    for i, (sample, target, road_img, bbs, front_right, front_left) in enumerate(val_loader):

        model.eval()

        sample = sample.to(device)
        target = target.to(device)

        with torch.no_grad():
            y_hat_class, y_hat_reg = model(sample)
            target_class = target[:, 0]
            target_reg = target[:, 1:]

            class_loss = class_criterion(y_hat_class, target_class.long())
            reg_loss = reg_criterion(y_hat_reg, target_reg)
            loss = class_loss + 0.5 * reg_loss

            val_losses.append(loss.item())
            class_losses.append(class_loss.item())
            reg_losses.append(reg_loss.item())

#         if i % 5 == 0:
#             print('Val Epoch: {} [{}/{} ({:.0f}%)]\tAverage Loss So Far: {:.6f}'.format(
#                 epoch, i * len(sample), len(val_loader.dataset),
#                 5. * i / len(val_loader), np.mean(val_losses)))
            
    print("Average Validation Epoch Loss: ", np.mean(val_losses))
    print("Average Validation Classify Loss: ", np.mean(class_losses))
    print("Average Validation Regression Loss: ", np.mean(reg_losses))
    print("\n")
    global best_val_loss
    if np.mean(val_losses) < best_val_loss:
        best_val_loss = np.mean(val_losses)
        torch.save(model.state_dict(), 'best_val_loss_simple_class_plus_reg.pt')

In [None]:
epochs = 40
for epoch in range(epochs):
    train()
    val()

  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':
  del sys.path[0]


Classify Loss: 4.451223850250244
Regression Loss: 17.31562614440918
Classify Loss: 4.082628141749989
Regression Loss: 11.446815664117986

Average Train Epoch Loss:  9.806036082181064
Average Validation Epoch Loss:  7.805248260498047
Average Validation Classify Loss:  4.101257085800171
Average Validation Regression Loss:  7.407982349395752


Classify Loss: 3.685173749923706
Regression Loss: 6.364619731903076
Classify Loss: 3.399478717283769
Regression Loss: 4.8500120423056865

Average Train Epoch Loss:  5.824484738436612
Average Validation Epoch Loss:  4.593433380126953
Average Validation Classify Loss:  3.7844713926315308
Average Validation Regression Loss:  1.6179240942001343


Classify Loss: 2.9510140419006348
Regression Loss: 4.0531744956970215
Classify Loss: 2.8834752386266533
Regression Loss: 3.25788950920105

Average Train Epoch Loss:  4.512420047413219
Average Validation Epoch Loss:  4.524097561836243
Average Validation Classify Loss:  3.6997084617614746
Average Validation Regre

In [None]:
# 2.28 lowest val loss so far, cross entropy
# I want to try learning classification and regression simultaneously
# Actually, I want to try a counting network. 
# Count how many cars it can see. Classification. 

# 2.23 (epoch 14) Trying to add RandomAffine, see if lowest classify loss goes down. 


In [None]:
# Classification + Regression
# Best combined loss: 3.72 (total), 3.04 classify, 1.36 regress