## Install the package dependencies before running this notebook

In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy 
import pickle
from glob import glob

"""
    number of trajectories in each city
    # austin --  train: 43041 test: 6325 
    # miami -- train: 55029 test:7971
    # pittsburgh -- train: 43544 test: 6361
    # dearborn -- train: 24465 test: 3671
    # washington-dc -- train: 25744 test: 3829
    # palo-alto -- train:  11993 test:1686

    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds
    
"""

'\n    number of trajectories in each city\n    # austin --  train: 43041 test: 6325 \n    # miami -- train: 55029 test:7971\n    # pittsburgh -- train: 43544 test: 6361\n    # dearborn -- train: 24465 test: 3671\n    # washington-dc -- train: 25744 test: 3829\n    # palo-alto -- train:  11993 test:1686\n\n    trajectories sampled at 10HZ rate, input 5 seconds, output 6 seconds\n    \n'

## Create a Torch.Dataset class for the training dataset

In [2]:
from glob import glob
import pickle
import numpy as np

ROOT_PATH = "./Data/"

cities = ["austin", "miami", "pittsburgh", "dearborn", "washington-dc", "palo-alto"]
splits = ["train", "test"]

def get_city_trajectories(city="palo-alto", split="train", normalized=False):

    outputs = None
    
    if split=="train":
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = np.asarray(pickle.load(open(f_in, "rb")))
#         n = len(inputs)
#         inputs = np.asarray(inputs)[:int(n * 0.8)]
        
        f_out = ROOT_PATH + split + "/" + city + "_outputs"
        outputs = np.asarray(pickle.load(open(f_out, "rb")))
#         outputs = np.asarray(outputs)[:int(n * 0.8)]
        
    elif split == 'val':
        f_in = ROOT_PATH + 'train' + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)[int(n * 0.8):]
        
        f_out = ROOT_PATH + 'train' + "/" + city + "_outputs"
        outputs = pickle.load(open(f_out, "rb"))
        outputs = np.asarray(outputs)[int(n * 0.8):]
    
    else:
        f_in = ROOT_PATH + split + "/" + city + "_inputs"
        inputs = pickle.load(open(f_in, "rb"))
        n = len(inputs)
        inputs = np.asarray(inputs)

    return inputs, outputs

class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, city: str, split:str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.transform = transform

        self.inputs, self.outputs = get_city_trajectories(city=city, split=split, normalized=False)

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):

        data = (self.inputs[idx], self.outputs[idx])
            
        if self.transform:
            data = self.transform(data)

        return data

## Create a DataLoader class for training

In [3]:
from torch import nn, optim

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class Pred(nn.Module):

    def __init__(self):
        super().__init__()
        
        self.encoder = nn.Sequential(
            nn.Linear(100, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 32)
        )
        
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 120),
            nn.ReLU(),
            nn.Linear(120, 120)
        )
        
    def forward(self, x):
        x = x.reshape(-1, 100).float()
        x = self.encoder(x)
        x = self.decoder(x)
        x = x.reshape(-1, 60, 2)
        return x

## Training/Testing

In [4]:
import pandas as pd

df = pd.read_csv(ROOT_PATH + 'submission.csv')
int_col = df.select_dtypes(include=['int'])
for col in int_col.columns.values:
    df[col] = df[col].astype('float32')
row = 0

batch_sz = 4  # batch size

for city in cities:
    print(city)
    
    pred = Pred()
    pred.to(device)
    opt = optim.Adam(pred.parameters(), lr=1e-3)
    train_dataset = ArgoverseDataset(city = city, split = 'train')
    train_loader = DataLoader(train_dataset, batch_size=batch_sz)

    for epoch in range(20):

        total_loss = 0
        for i_batch, sample_batch in enumerate(train_loader):
            i, o = sample_batch
            inp, out = i.to(device), o.to(device)
            preds = pred(inp)
            loss = ((preds - out) ** 2).sum()

            opt.zero_grad()
            loss.backward()
            opt.step()

            total_loss += loss.item()

        print('epoch {} loss: {}'.format(epoch, total_loss / len(train_dataset)))
    
    print("TESTING " + city)
    test_dataset = ArgoverseDataset(city = city, split = 'test')

    for i in range(len(test_dataset.inputs)):
        data = torch.from_numpy(test_dataset.inputs[i]).to(device)
        preds = pred(data)
        df.iloc[row, 1:121] = preds.cpu().detach().numpy().ravel()
        row += 1
        
df.to_csv(ROOT_PATH + 'submission.csv')     

austin
epoch 0 loss: 2524674.485589119
epoch 1 loss: 652546.2615164025
epoch 2 loss: 475704.8591288926
epoch 3 loss: 378950.5892921918
epoch 4 loss: 337259.00061049405
epoch 5 loss: 291565.58649857226
epoch 6 loss: 257968.50025270178
epoch 7 loss: 224989.3794912811
epoch 8 loss: 200301.48239147424
epoch 9 loss: 181224.5072772007
epoch 10 loss: 191084.2549121803
epoch 11 loss: 157415.52109909555
epoch 12 loss: 142209.89131933902
epoch 13 loss: 127455.50145216139
epoch 14 loss: 121927.51034248645
epoch 15 loss: 121855.51588447123
epoch 16 loss: 116049.30737673808
epoch 17 loss: 116463.90131671335
epoch 18 loss: 107066.73324713753
epoch 19 loss: 111174.58448914564
TESTING austin
miami
epoch 0 loss: 10091000.252191778
epoch 1 loss: 3110372.753052425
epoch 2 loss: 1877646.230336158
epoch 3 loss: 1864544.397161922
epoch 4 loss: 1117196.4919949116
epoch 5 loss: 935269.0361674565
epoch 6 loss: 1069368.7747359131
epoch 7 loss: 800309.9550166713
epoch 8 loss: 681561.9268989341
epoch 9 loss: 7323

## Validation

In [5]:
# val_dataset = ArgoverseDataset(city = 'austin', split = 'val')
# val_loader = DataLoader(val_dataset, batch_size=batch_sz)

# val_loss = 0
# for i_batch, sample_batch in enumerate(val_loader):
#     i, o = sample_batch
#     inp, out = i.to(device), o.to(device)
#     preds = pred(inp)
#     loss = ((preds - out) ** 2).sum()

#     val_loss += loss.item()
# print('loss: {}'.format(val_loss / len(val_dataset)))

## Sample a batch of data and visualize 

In [6]:
# import matplotlib.pyplot as plt
# import random


# def show_sample_batch(sample_batch):
#     """visualize the trajectory for a batch of samples"""
#     inp, out = sample_batch
#     batch_sz = inp.size(0)
#     agent_sz = inp.size(1)
    
#     fig, axs = plt.subplots(1,batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
#     fig.subplots_adjust(hspace = .5, wspace=.001)
#     axs = axs.ravel()   
#     for i in range(batch_sz):
#         axs[i].xaxis.set_ticks([])
#         axs[i].yaxis.set_ticks([])
        
#         # first two feature dimensions are (x,y) positions
#         axs[i].scatter(inp[i,:,0], inp[i,:,1])
#         axs[i].scatter(out[i,:,0], out[i,:,1])

        
# for i_batch, sample_batch in enumerate(train_loader):
#     inp, out = sample_batch
#     print(inp.shape, out.shape)
#     break
#     """
#     TODO:
#       implement your Deep learning model
#       implement training routine
#     """
#     show_sample_batch(sample_batch)
#     break

In [7]:
print("DONE")

DONE
