## Install the package dependencies before running this notebook

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create a Torch.Dataset class for the training dataset

In [2]:
from glob import glob
import pickle
import numpy as np

ROOT_PATH = "./Data/"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):

    outputs = None
    
    if split=="train":
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[:int(n * 0.8)]
        
    elif split == 'val':
        f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    else:
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)

    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=False)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

## Create a DataLoader class for training

In [3]:
from torch import nn, optim

class Pred(nn.Module):

    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(100, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 32)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 120),
            nn.ReLU(),
            nn.Linear(120, 120)
        )
        
    def forward(self, x):
        x = x.reshape(-1, 100).float()
        x = self.encoder(x)
        x = self.decoder(x)
        x = x.reshape(-1, 60, 2)
        return x

In [4]:
pred = Pred()
opt = optim.Adam(pred.parameters(), lr=1e-3)

## Training

In [5]:
batch_sz = 4  # batch size 
for city in cities:
    print(city)
    train_dataset = ArgoverseDataset(city = city, split = 'train')
    train_loader = DataLoader(train_dataset, batch_size=batch_sz)

    for epoch in range(10):

        total_loss = 0
        for i_batch, sample_batch in enumerate(train_loader):
            inp, out = sample_batch
            preds = pred(inp)
            loss = ((preds - out) ** 2).sum()

            opt.zero_grad()
            loss.backward()
            opt.step()

            total_loss += loss.item()

        print('epoch {} loss: {}'.format(epoch, total_loss / len(train_dataset)))

austin
epoch 0 loss: 1634624.9248961676
epoch 1 loss: 509429.6373264348
epoch 2 loss: 372467.9686743659
epoch 3 loss: 309067.2268192622
epoch 4 loss: 284505.80188814073
epoch 5 loss: 246720.6271205819
epoch 6 loss: 215982.2516756155
epoch 7 loss: 196358.7343233841
epoch 8 loss: 176074.1210879532
epoch 9 loss: 164735.27213289405
miami
epoch 0 loss: 747228.862616736
epoch 1 loss: 595068.3758197145
epoch 2 loss: 548281.5615821584
epoch 3 loss: 419461.60409196414
epoch 4 loss: 406702.9160870471
epoch 5 loss: 504897.36161355523
epoch 6 loss: 352801.7256975416
epoch 7 loss: 340679.6941178063
epoch 8 loss: 339171.13813718245
epoch 9 loss: 379063.1786120027
pittsburgh
epoch 0 loss: 113867.86084644332
epoch 1 loss: 94283.5504309847
epoch 2 loss: 84175.75139716851
epoch 3 loss: 78688.81342648862
epoch 4 loss: 76707.91519718764
epoch 5 loss: 75735.75101396455
epoch 6 loss: 75769.27801170785
epoch 7 loss: 81864.08904278431
epoch 8 loss: 76945.29813044026
epoch 9 loss: 73318.09725041011
dearborn
ep

## Validation

In [18]:
val_dataset = ArgoverseDataset(city = 'austin', split = 'val')
val_loader = DataLoader(val_dataset, batch_size=batch_sz)

val_loss = 0
for i_batch, sample_batch in enumerate(val_loader):
    inp, out = sample_batch
    preds = pred(inp)
    loss = ((preds - out) ** 2).sum()

    val_loss += loss.item()
print('loss: {}'.format(val_loss / len(val_dataset)))

loss: 209867.5162729818


## Test

In [38]:
import pandas as pd

df = pd.read_csv(ROOT_PATH + 'submission.csv')
int_col = df.select_dtypes(include=['int'])
for col in int_col.columns.values:
    df[col] = df[col].astype('float32')
row = 0

for city in cities:
    print(city)
    test_dataset = ArgoverseDataset(city = city, split = 'test')

    for i in range(len(test_dataset.inputs)):
        preds = pred(torch.from_numpy(test_dataset.inputs[i]))
        df.iloc[row, 1:121] = preds.detach().numpy().ravel()
        row += 1
        
df.to_csv(ROOT_PATH + 'submission.csv')     

austin
miami
pittsburgh
dearborn
washington-dc
palo-alto


In [25]:
print(df.iloc[0, 1:121])

v0      0
v1      0
v2      0
v3      0
v4      0
       ..
v115    0
v116    0
v117    0
v118    0
v119    0
Name: 0, Length: 120, dtype: object


In [39]:
print(preds.detach().numpy().ravel())
print(preds.detach().numpy().ravel()[0])
print(preds.detach().numpy().ravel()[1])
print(preds.detach().numpy().ravel()[2])

[-13.622161  405.25217   -13.65951   405.29318   -13.73504   405.29575
 -13.8152275 405.3567    -13.901553  405.45712   -14.004041  405.52954
 -14.096269  405.60107   -14.116809  405.67938   -14.175305  405.77536
 -14.335941  405.90155   -14.325156  405.95773   -14.337722  405.95886
 -14.290741  405.98138   -14.521394  406.05032   -14.562526  406.08014
 -14.500876  406.12805   -14.506674  406.18036   -14.482896  406.2714
 -14.52232   406.27576   -14.611872  406.40244   -14.595396  406.41562
 -14.749883  406.4368    -14.769161  406.58658   -14.776707  406.57822
 -14.821599  406.62808   -14.867535  406.65683   -14.8936405 406.725
 -15.023832  406.79147   -14.873926  406.80676   -14.986387  406.95242
 -14.921481  406.97784   -15.079258  407.01758   -15.026589  407.03906
 -15.174707  407.0621    -15.126724  407.09937   -15.221514  407.14175
 -15.187416  407.13995   -15.3077345 407.336     -15.255352  407.24374
 -15.253522  407.3635    -15.387627  407.4743    -15.38082   407.32138
 -15.4574

## Sample a batch of data and visualize 

In [None]:
import matplotlib.pyplot as plt
import random


def show_sample_batch(sample_batch):
    """visualize the trajectory for a batch of samples"""
    inp, out = sample_batch
    batch_sz = inp.size(0)
    agent_sz = inp.size(1)
    
    fig, axs = plt.subplots(1,batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
    fig.subplots_adjust(hspace = .5, wspace=.001)
    axs = axs.ravel()   
    for i in range(batch_sz):
        axs[i].xaxis.set_ticks([])
        axs[i].yaxis.set_ticks([])
        
        # first two feature dimensions are (x,y) positions
        axs[i].scatter(inp[i,:,0], inp[i,:,1])
        axs[i].scatter(out[i,:,0], out[i,:,1])

        
for i_batch, sample_batch in enumerate(train_loader):
    inp, out = sample_batch
    print(inp.shape, out.shape)
    break
    """
    TODO:
      implement your Deep learning model
      implement training routine
    """
    show_sample_batch(sample_batch)
    break