In [1]:
import os
import sys
from tqdm import tqdm_notebook
from datetime import datetime
import pandas as pd
import numpy as np
from torchvision import models
import torch.nn as nn
import torch
import torch.optim as optim
## It's better to ensure the current path
os.chdir('../doc')
sys.path.append('../lib')

# Getting Started:
## A simple driving model training and evaluation pipeline using the Drive360 dataset and PyTorch.

## Loading data from Drive360 dataset.

The **dataset.py** file contains the 3 classes necessary for creating a Drive360Loader. Using the **config.json** file to specify the location of the csv and data directory, we can generate phase (train, validation, test) specific data loaders that can output samples from each set. Adjust the **dataset.py** to your preferred training framework.

In [2]:
import json
from dataset import Drive360Loader

# load the config.json file that specifies data 
# location parameters and other hyperparameters 
# required.
config = json.load(open('./config.json'))

normalize_targets = config['target']['normalize']
target_mean = config['target']['mean']
target_std = config['target']['std']

# create a train, validation and test data loader
train_loader = Drive360Loader(config, 'train')
validation_loader = Drive360Loader(config, 'validation')
test_loader = Drive360Loader(config, 'test')

# print the data (keys) available for use. See full 
# description of each data type in the documents.
print('Loaded train loader with the following data available as a dict.')
print(train_loader.drive360.dataframe.keys())

Phase: train # of data: 157125
Phase: validation # of data: 10338
Phase: test # of data: 28018
Loaded train loader with the following data available as a dict.
Index(['cameraRight', 'cameraFront', 'cameraRear', 'cameraLeft', 'canSteering',
       'canSpeed', 'chapter'],
      dtype='object')


In [None]:
os.getcwd()

## Training a basic driving model

Create your driving model. This is specific to your learning framework. 

Below we give a very basic dummy model that uses the front facing camera and a resnet34 + LSTM architecture to predict canSteering and canSpeed. 

In [3]:
from Non_local_pytorch.lib.non_local_embedded_gaussian import NONLocalBlock2D

In [4]:
class NonLocalModel(nn.Module):
    def __init__(self):
        super(NonLocalModel, self).__init__()
        final_concat_size = 0
        
        # Main CNN
        cnn = models.resnet34(pretrained=True)
        for i, layer in enumerate(cnn.children()):
            if i <= 6:
                for param in layer.parameters():
                    param.requires_grad = False
        self.features = nn.Sequential(
            *list(cnn.children())[0:4],
            NONLocalBlock2D(64),
            list(cnn.children())[4],
            NONLocalBlock2D(64),
            list(cnn.children())[5],
            NONLocalBlock2D(128),
            list(cnn.children())[6],
            NONLocalBlock2D(256),
            *list(cnn.children())[7:9]
        )
        # self.resnet_output = nn.Sequential(*list(cnn.children())[:-2])
        self.intermediate = nn.Sequential(nn.Linear(
                          cnn.fc.in_features, 128),
                          nn.ReLU())
        final_concat_size += 128

        # Main LSTM
        self.gru = nn.GRU(input_size=128,
                            hidden_size=64,
                            num_layers=3,
                            batch_first=False)
        final_concat_size += 64
        
        # Angle Regressor
        self.control_angle = nn.Sequential(
            nn.Linear(final_concat_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
        # Speed Regressor
        self.control_speed = nn.Sequential(
            nn.Linear(final_concat_size, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    
    def forward(self, data):
        module_outputs = []
        gru_i = []
        # Loop through temporal sequence of
        # front facing camera images and pass 
        # through the cnn.
        for k, v in data['cameraFront'].items():
            if torch.cuda.is_available():
                v = v.cuda()
            x = self.features(v)
            x = x.view(x.size(0), -1)
            x = self.intermediate(x)
            gru_i.append(x)
            # feed the current front facing camera
            # output directly into the 
            # regression networks.
            if k == 0:
                module_outputs.append(x)

        # Feed temporal outputs of CNN into LSTM
        self.gru.flatten_parameters()
        i_gru, _ = self.gru(torch.stack(gru_i))
        module_outputs.append(i_gru[-1])
        
        # Concatenate current image CNN output 
        # and LSTM output.
        x_cat = torch.cat(module_outputs, dim=-1)
        
        # Feed concatenated outputs into the 
        # regession networks.
        prediction = {'canSteering': torch.squeeze(self.control_angle(x_cat)),
                      'canSpeed': torch.squeeze(self.control_speed(x_cat))}
        return prediction

### Training and validation process

In [None]:
def train_nn(train_loader, validation_loader, model, optimizer, 
             criterion, epochs=5, validation=True, load_path='', save_path='', print_freq = 200):
    '''Training the model
    Args:
        validation: boolean, whether process validation
        load_path: string, the model weights file to load
        save_path: string, the model weights file to save
    Returns:
    '''
    
    if torch.cuda.is_available():
        model = model.cuda()
        # model = nn.DataParallel(model)
        
    if os.path.exists(load_path):
        print('='*10 + 'loading weights from ' + load_path + '='*10)
        checkpoint = torch.load(load_path)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        print('='*10 + 'loading finished' + '='*10)
    else:
        print('load_path does not exist!')
        
    
    train_metrics = {
        'angle_loss': {},
        'speed_loss': {}
    }
    validation_metrics = {
        'angle_loss': {},
        'speed_loss': {}
    }
    for epoch in range(epochs):
        print('='*10 + 'start ' + str(epoch+1) + ' epoch' + '='*10)
        model.train()
        angle_loss = 0.0
        speed_loss = 0.0
        since = datetime.now()
        cnt = 0
        for batch_idx, (data, target) in enumerate(tqdm_notebook(train_loader)):
            optimizer.zero_grad()
            prediction = model(data)
            loss1 = criterion(prediction['canSteering'], target['canSteering'].cuda())
            loss2 = criterion(prediction['canSpeed'], target['canSpeed'].cuda())
            loss = loss1 + loss2
            loss.backward()
            optimizer.step()
            
            # print statistics
            angle_loss += loss1.item()
            speed_loss += loss2.item()
            cnt += 1
#             if batch_idx > 5:
#                 break
            if (batch_idx+1) % print_freq == 0:
                if normalize_targets:
                    angle_loss = (angle_loss * target_std['canSteering']**2) / cnt
                    speed_loss = (speed_loss * target_std['canSpeed']**2) / cnt
                else:
                    angle_loss /= cnt
                    speed_loss /= cnt
                train_metrics['angle_loss'].setdefault(str(epoch), []).append(angle_loss)
                train_metrics['speed_loss'].setdefault(str(epoch), []).append(speed_loss)
                print('[epoch: %d, batch: %5d] time: %.2f angle_loss: %.2f speed_loss: %.2f' %
                      (epoch + 1, batch_idx + 1, (datetime.now() - since).total_seconds(), angle_loss, speed_loss))
                angle_loss = 0.0
                speed_loss = 0.0
                since = datetime.now()
                cnt = 0
        print('='*10 + 'saving the model to' + save_path + '='*10)
        torch.save({
            "model_state_dict":model.state_dict(),
            "angle_loss": angle_loss,
            "speed_loss": speed_loss,
            "optimizer_state_dict":optimizer.state_dict(),
            "epoch":epoch
            }, save_path)
        print('saving success!')
        if validation:
            print('='*10 + 'starting validation' + '='*10)
            model.eval()
            with torch.no_grad():
                for batch_idx, (data, target) in enumerate(tqdm_notebook(validation_loader)):
                    if torch.cuda.is_available():
                        for w in ['canSteering', 'canSpeed']:
                            target[w].cuda()
                    prediction = model(data)
                    mse1 = (np.square(prediction['canSteering'].cpu() - target['canSteering'].cpu())).mean()
                    mse2 = (np.square(prediction['canSpeed'].cpu() - target['canSpeed'].cpu())).mean()
                    if normalize_targets:
                        mse1 = mse1 * target_std['canSteering'] ** 2
                        mse2 = mse2 * target_std['canSpeed'] ** 2
                    validation_metrics['angle_loss'].setdefault(str(epoch), []).append(mse1)
                    validation_metrics['speed_loss'].setdefault(str(epoch), []).append(mse2)
            print('angle_loss: %.2f speed_loss: %.2f' % (np.mean(validation_metrics['angle_loss'][str(epoch)]), 
                  np.mean(validation_metrics['speed_loss'][str(epoch)])))
            print('='*10 + 'validation finished' + '='*10)
    return train_metrics, validation_metrics

In [None]:
NOW = datetime.now().strftime("%m-%d-%H_%M")
MODEL_NAME = 'non_local'

if not os.path.isdir(os.path.join('../output', MODEL_NAME)):
    os.mkdir(os.path.join('../output', MODEL_NAME))

LOAD_PATH = os.path.join('../output', MODEL_NAME, '10-08-18:11_non_local.pth')
SAVE_PATH = os.path.join('../output', MODEL_NAME, NOW + '_' + MODEL_NAME + '.pth')

model = NonLocalModel()
criterion =nn.MSELoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.0001)

result = train_nn(train_loader, validation_loader, model, optimizer, criterion, epochs=5, 
                  validation=True, load_path=LOAD_PATH, save_path=SAVE_PATH, print_freq=200)


## Creating a submission file.

In [5]:
def add_results(results, output):
    steering = np.squeeze(output['canSteering'].cpu().data.numpy())
    speed = np.squeeze(output['canSpeed'].cpu().data.numpy())
    if normalize_targets:
        steering = (steering*target_std['canSteering'])+target_mean['canSteering']
        speed = (speed*target_std['canSpeed'])+target_mean['canSpeed']
    if np.isscalar(steering):
        steering = [steering]
    if np.isscalar(speed):
        speed = [speed]
    results['canSteering'].extend(steering)
    results['canSpeed'].extend(speed)

We use pandas to create a submission file which is simply a 2-column csv with a canSteering and canSpeed prediction for each row in the **drive360_test.csv** a total of 305437 rows/predictions not including the header. See the **sample_submission.csv** file as an example.

IMPORTANT: for the test phase indices will start 10s (100 samples) into each chapter this is to allow challenge participants to experiment with different temporal settings of data input. If challenge participants have a greater temporal length than 10s for each training sample, then they must write a custom function here. Please check out the **dataset.py** file for additional explanation.

In [6]:
results = {'canSteering': [],
           'canSpeed': []}

model = NonLocalModel()
model.cuda()
MODEL_NAME = 'non_local'
LOAD_PATH = os.path.join('../output', MODEL_NAME, '10-08-20_07_non_local.pth')
checkpoint = torch.load(LOAD_PATH)
model.load_state_dict(checkpoint['model_state_dict'])

with torch.no_grad():
    for batch_idx, (data, target) in enumerate(tqdm_notebook(test_loader)):
        prediction = model(data)
        add_results(results, prediction)

df = pd.DataFrame.from_dict(results)

HBox(children=(IntProgress(value=0, max=1752), HTML(value='')))




In [8]:
## linear interpolation
from scipy.interpolate import interp1d
file = os.path.join('../output', MODEL_NAME, 'submission.csv')
output = {
    'canSteering': [],
    'canSpeed': []
}
test_sample = pd.read_csv('../data/test_sample1.csv')
curr_list = test_sample['chapter'].value_counts()-9
test_full = pd.read_csv('../data/test_full.csv')
target_list = test_full['chapter'].value_counts()-100
k = 0
for ch in test_sample['chapter'].unique():
    curr_num = curr_list[ch]
    target_num = target_list[ch]
    x = list(range(100, 100+10*curr_num, 10))
    # x.insert(0, 0)
    x.append(target_num+100)

    newx = list(range(101, target_num+101))
    y1, y2 = list(df.iloc[k:(k+curr_num),0]), list(df.iloc[k:(k+curr_num),1])
    #y1.insert(0, y1[0])
    #y1.insert(0, y1[0])
    y1.append(y1[-1])
    f1 = interp1d(x, y1, kind='linear')
    output['canSteering'].extend(f1(newx))
    
    #y2.insert(0, y2[0])
    #y2.insert(0, y2[0])
    y2.append(y2[-1])
    f2 = interp1d(x, y2, kind='linear')
    output['canSpeed'].extend(f2(newx))
    k += curr_num
    
output_df = pd.DataFrame(output)
print(len(output_df))

279863


In [12]:
output_df.to_csv(file, index=False)

In [19]:
df.head()

Unnamed: 0,canSteering,canSpeed
0,-13.520702,10.831070
1,-5.570440,12.634859
2,-7.053128,9.654808
3,-7.493575,11.772408
4,-7.230843,11.571272
...,...,...
28013,-7.505049,13.847966
28014,-3.342599,11.367834
28015,-2.805348,13.475492
28016,-5.925328,11.921484


In [14]:
a = pd.read_csv('../output/non_local/submission2.csv')
b = pd.read_csv('../output/non_local/submission3.csv')

In [20]:
a.head(40)

Unnamed: 0,canSteering,canSpeed
0,-9.247024,13.016008
1,-8.991435,13.033178
2,-8.735847,13.050349
3,-8.480258,13.067519
4,-8.22467,13.08469
5,-7.969081,13.10186
6,-7.713493,13.11903
7,-7.457904,13.136201
8,-7.202316,13.153371
9,-6.946728,13.170542


In [17]:
b.head(20)

Unnamed: 0,canSteering,canSpeed
0,-12.725676,11.011449
1,-11.93065,11.191828
2,-11.135624,11.372207
3,-10.340598,11.552586
4,-9.545571,11.732965
5,-8.750545,11.913343
6,-7.955519,12.093722
7,-7.160493,12.274101
8,-6.365466,12.45448
9,-5.57044,12.634859


In [None]:
!pip3 freeze > requirements.txt