In [3]:
import os
import random
import math

import numpy as np
import pandas as pd
import pdb
from collections import OrderedDict
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import collections  as mc
matplotlib.rcParams['figure.figsize'] = [5, 5]
matplotlib.rcParams['figure.dpi'] = 200

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms

from data_helper import UnlabeledDataset, LabeledDataset
from helper import collate_fn, draw_box

random.seed(0)
np.random.seed(0)
torch.manual_seed(0);

# All the images are saved in image_folder
# All the labels are saved in the annotation_csv file
image_folder = '/scratch/vr1059/self-driving-data/data'
annotation_csv = '/scratch/vr1059/self-driving-data/data/annotation.csv'

# You shouldn't change the unlabeled_scene_index
# The first 106 scenes are unlabeled
unlabeled_scene_index = np.arange(106)
# The scenes from 106 - 133 are labeled
# You should devide the labeled_scene_index into two subsets (training and validation)
train_labeled_scene_index = np.arange(106, 128)
val_labeled_scene_index = np.arange(128, 132)
test_labeled_scene_index = np.arange(132, 134)

In [4]:
def unit_vector(vector):
    """ Returns the unit vector of the vector.  """
    return vector / np.linalg.norm(vector)

def angle_between(v1, v2):
    """ Returns the angle in radians between vectors 'v1' and 'v2'::

            >>> angle_between((1, 0, 0), (0, 1, 0))
            1.5707963267948966
            >>> angle_between((1, 0, 0), (1, 0, 0))
            0.0
            >>> angle_between((1, 0, 0), (-1, 0, 0))
            3.141592653589793
    """
    v1_u = unit_vector(v1)
    v2_u = unit_vector(v2)
    return np.degrees(np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)))

In [5]:
class_label = 1
class_dict = dict()
reverse_class_dict = []
reverse_class_dict.append((-100, -100))
for i in range(400, 800, 50):
    for j in range(100, 600, 50):
        class_dict[(i, j)] = class_label
        class_label += 1
        reverse_class_dict.append((i, j))
        
class_dict[(-100, -100)] = 0

In [6]:
class_dict

{(400, 100): 1,
 (400, 150): 2,
 (400, 200): 3,
 (400, 250): 4,
 (400, 300): 5,
 (400, 350): 6,
 (400, 400): 7,
 (400, 450): 8,
 (400, 500): 9,
 (400, 550): 10,
 (450, 100): 11,
 (450, 150): 12,
 (450, 200): 13,
 (450, 250): 14,
 (450, 300): 15,
 (450, 350): 16,
 (450, 400): 17,
 (450, 450): 18,
 (450, 500): 19,
 (450, 550): 20,
 (500, 100): 21,
 (500, 150): 22,
 (500, 200): 23,
 (500, 250): 24,
 (500, 300): 25,
 (500, 350): 26,
 (500, 400): 27,
 (500, 450): 28,
 (500, 500): 29,
 (500, 550): 30,
 (550, 100): 31,
 (550, 150): 32,
 (550, 200): 33,
 (550, 250): 34,
 (550, 300): 35,
 (550, 350): 36,
 (550, 400): 37,
 (550, 450): 38,
 (550, 500): 39,
 (550, 550): 40,
 (600, 100): 41,
 (600, 150): 42,
 (600, 200): 43,
 (600, 250): 44,
 (600, 300): 45,
 (600, 350): 46,
 (600, 400): 47,
 (600, 450): 48,
 (600, 500): 49,
 (600, 550): 50,
 (650, 100): 51,
 (650, 150): 52,
 (650, 200): 53,
 (650, 250): 54,
 (650, 300): 55,
 (650, 350): 56,
 (650, 400): 57,
 (650, 450): 58,
 (650, 500): 59,
 (650,

In [7]:
def round_up(x):
    return int(math.ceil(x / 50.0)) * 50

def round_down(x):
    return round_up(x) - 50

In [8]:
def front_collate_fn(batch):
    front_imgs = []
    front_right_imgs = []
    front_left_imgs = []
    target = []
    road_imgs = []
    bbs = []
    for x in batch:
        # input
        front_left_imgs.append(torch.tensor(x[0][0]))
        front_imgs.append(torch.tensor(x[0][1]))
        front_right_imgs.append(torch.tensor(x[0][2]))
        road_imgs.append(torch.tensor(x[2]))
        
        # target
        bb_tens = x[1]['bounding_box']
        bbs.append(bb_tens)
        x_min = 800
        bb_cand = (-100, -100)
        
        for i, corners in enumerate(bb_tens):
            # Get bird's eye view coordinates. 
            point_squence = torch.stack([corners[:, 0], corners[:, 1], corners[:, 3], corners[:, 2]])
            xs = point_squence.T[0] * 10 + 400
            ys = -point_squence.T[1] * 10 + 400
            if xs[2] - xs[0] > 5:
                top_center_x, top_center_y = 0.5*(xs[2] + xs[3]), 0.5*(ys[2] + ys[3])
            else:
                top_center_x, top_center_y = 0.5*(xs[0] + xs[1]), 0.5*(ys[0] + ys[1])
                
            # We do (800 - top_center_y) because matplotlib y-axis starts from the top. 
            v1 = np.array([top_center_x - 400, 800 - top_center_y - 400])
            v2 = np.array([2, 0])
            
            if abs(angle_between(v1, v2)) <= 35 and x[1]['category'][i] not in [1, 3, 6, 8]:
                if top_center_x < x_min:
                    x_min = top_center_x
                    bb_cand = (top_center_x.item(), top_center_y.item())
                    
#         target.append(bb_cand)
#         classification logic
        if int(bb_cand[0]) == -100:
            target.append((0, bb_cand[0], bb_cand[1]))
        else:
            key = (round_down(bb_cand[0]), round_down(bb_cand[1]))
            if key not in class_dict:
                print(bb_cand)
            label = class_dict[key]
            target.append((label, bb_cand[0], bb_cand[1]))
                
    boom = torch.stack(front_imgs), torch.tensor(target), torch.stack(road_imgs), bbs, torch.stack(front_right_imgs), torch.stack(front_left_imgs)
    return boom

### Dataset

In [9]:
# The labeled dataset can only be retrieved by sample.
# And all the returned data are tuple of tensors, since bounding boxes may have different size
# You can choose whether the loader returns the extra_info. It is optional. You don't have to use it.
val_transform = transforms.ToTensor()
train_transform = transforms.Compose([
    transforms.RandomApply([
        transforms.ColorJitter(brightness = 0.5, contrast = 0.3, saturation = 0.2, hue = (-0.3, 0.3)),
        transforms.Grayscale(3)
    ]),
    transforms.ToTensor(),
])
labeled_trainset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=train_labeled_scene_index,
                                  transform=train_transform,
                                  extra_info=True
                                 )
labeled_valset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=val_labeled_scene_index,
                                  transform=val_transform,
                                  extra_info=True
                                 )

train_loader = torch.utils.data.DataLoader(labeled_trainset, batch_size=128, shuffle=True, collate_fn=front_collate_fn)
val_loader = torch.utils.data.DataLoader(labeled_valset, batch_size=128, shuffle=False, collate_fn=front_collate_fn)


In [10]:
# sample, target, road_img, bbs, front_right, front_left = iter(train_loader).next()
# idx = 0
# target
# preds = model(sample.to(device))
# preds
# idx += 1
# plt.imshow(sample[idx].cpu().detach().numpy().transpose(1, 2, 0))
# fig, ax = plt.subplots()
# ax.imshow(road_img[idx], cmap ='binary');
# ax.plot(400, 400, 'x', color="red")
# ax.plot(target[idx][0], target[idx][1], 'x', color="purple")
# # ax.plot(model_preds[idx][0].cpu().detach().numpy()*100, model_preds[idx][1].cpu().detach().numpy()*100, 'x', color="green")


In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class SimpleModel(nn.Module):
    def __init__(self):
        super(SimpleModel, self).__init__()
        
        self.encoder = torchvision.models.resnet18()
        self.encoder.fc = nn.Identity()
        
        self.classification = nn.Sequential(OrderedDict([
            ('linear1', nn.Linear(512, 81)),
        ]))
        
    def forward(self, x):
        x = self.encoder(x)
        return self.classification(x)
    
model = SimpleModel().to(device)

### Train/Val

In [12]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
best_val_loss = 100

In [13]:
def train():
    model.train()
    train_losses = []
    for i, (sample, target, road_img, bbs, front_right, front_left) in enumerate(train_loader):

        optimizer.zero_grad()

        sample = sample.to(device)
        target = target.to(device)
        target = target[:, 0]
        
        y_hat = model(sample)
#         target /= 100.
        
        loss = criterion(y_hat, target.long())
        
        train_losses.append(loss.item())

        loss.backward()
        optimizer.step()

        if i % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, i * len(sample), len(train_loader.dataset),
                10. * i / len(train_loader), loss.item()))
            
    print("\nAverage Train Epoch Loss: ", np.mean(train_losses))
            
def val():
    model.eval()
    val_losses = []
    for i, (sample, target, road_img, bbs, front_right, front_left) in enumerate(val_loader):

        model.eval()

        sample = sample.to(device)
        target = target.to(device)

        with torch.no_grad():
            target = target[:, 0]
            y_hat = model(sample)
            loss = criterion(y_hat, target.long())

            val_losses.append(loss.item())
            
    print("Average Validation Epoch Loss: ", np.mean(val_losses))
    global best_val_loss
    if np.mean(val_losses) < best_val_loss:
        best_val_loss = np.mean(val_losses)
        torch.save(model.state_dict(), 'best_val_loss_simple_classify.pt')

In [14]:
model = SimpleModel().to(device)
# model_dict = model.state_dict()
# pretrained_dict = torch.load('best_val_loss_simple.pt')
# pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
# model_dict.update(pretrained_dict) 
# model.load_state_dict(model_dict)

# for name, param in model.encoder.named_parameters():
#     if("bn" not in name):
#         param.requires_grad = False
        
# unfreeze_layers = [model.encoder.layer3, model.encoder.layer4]
# for layer in unfreeze_layers:
#     for param in layer.parameters():
#         param.requires_grad = True

In [15]:
epochs = 40
for epoch in range(epochs):
    train()
    val()

  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':
  del sys.path[0]



Average Train Epoch Loss:  4.550322749397972
Average Validation Epoch Loss:  4.530222654342651

Average Train Epoch Loss:  4.549232721328735
Average Validation Epoch Loss:  4.538056015968323

Average Train Epoch Loss:  4.552551226182417
Average Validation Epoch Loss:  4.578994631767273

Average Train Epoch Loss:  4.5513082634318955
Average Validation Epoch Loss:  4.652528882026672

Average Train Epoch Loss:  4.549565141851252
Average Validation Epoch Loss:  4.665585994720459

Average Train Epoch Loss:  4.551037701693448
Average Validation Epoch Loss:  4.66670298576355

Average Train Epoch Loss:  4.551454955881292
Average Validation Epoch Loss:  4.66685152053833


KeyboardInterrupt: 

In [None]:
model.load_state_dict(torch.load('best_val_loss_simple.pt', map_location=device))

val_losses = []
for i, (sample, target, road_img, bbs, front_right, front_left) in enumerate(val_loader):
    
    model.eval()
    
    sample = sample.to(device)
    target = target.to(device)
    
    with torch.no_grad():
        target = target/100.

        y_hat = model(sample)
        target = target[:, :2]
        loss = criterion(y_hat, target)
        
        val_losses.append(loss.item())
    
    if i % 5 == 0:
        print('Val Epoch: {} [{}/{} ({:.0f}%)]\tAverage Loss So Far: {:.6f}'.format(
            0, i * len(sample), len(val_loader.dataset),
            5. * i / len(val_loader), np.mean(val_losses)))

In [None]:
labeled_testset = LabeledDataset(image_folder=image_folder,
                                  annotation_file=annotation_csv,
                                  scene_index=test_labeled_scene_index,
                                  transform=val_transform,
                                  extra_info=True
                                 )

test_loader = torch.utils.data.DataLoader(labeled_testset, batch_size=32, shuffle=False, collate_fn=front_collate_fn)

In [None]:
model.load_state_dict(torch.load('best_val_loss_simple.pt'))

test_losses = []
for i, (sample, target, road_img, bbs, front_right, front_left) in enumerate(test_loader):
    
    model.eval()
    
    sample = sample.to(device)
    target = target.to(device)
    
    with torch.no_grad():
        target = target/100.

        y_hat = model(sample)
        target = target[:, :2]
        loss = criterion(y_hat, target)
        
        test_losses.append(loss.item())
    
print("Average Test Loss: ", np.mean(test_losses))

In [None]:
test_loader = torch.utils.data.DataLoader(labeled_testset, batch_size=32, shuffle=True, collate_fn=front_collate_fn)

In [None]:
sample, target, road_img, bbs, front_right, front_left = iter(test_loader).next()
sample = sample.to(device)
model_preds = model(sample)

In [None]:
model_preds

In [None]:
idx = 5

In [None]:
idx += 1

In [None]:
plt.imshow(sample[idx].cpu().detach().numpy().transpose(1, 2, 0))

In [None]:
fig, ax = plt.subplots()
ax.imshow(road_img[idx], cmap ='binary');
ax.plot(400, 400, 'x', color="red")
ax.plot(target[idx][0], target[idx][1], 'x', color="blue")
ax.plot(model_preds[idx][0].cpu().detach().numpy()*100, model_preds[idx][1].cpu().detach().numpy()*100, 'x', color="green")


In [None]:
val_loader = torch.utils.data.DataLoader(labeled_valset, batch_size=20, shuffle=True, collate_fn=front_collate_fn)
sample, target, road_img, bbs, front_right, front_left = iter(val_loader).next()
sample = sample.to(device)
target = target.to(device)
model.eval()
target/100.

In [None]:
model(sample)

In [None]:
sample, target, road_img, bbs, front_right, front_left = iter(train_loader).next()

In [None]:
sample.shape

In [None]:
target

In [None]:
idx = 0
plt.imshow(sample[idx][:, :, :].numpy().transpose(1, 2, 0))
# plt.imshow(front_right[idx][:, :, :].numpy().transpose(1, 2, 0))

In [None]:
fig, ax = plt.subplots()
ax.imshow(road_img[idx], cmap ='binary');
ax.plot(400, 400, 'x', color="red")
ax.plot(target[idx][0], target[idx][1], 'x', color="blue")