In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
import os, os.path 
import numpy
import pickle
from glob import glob

"""Change to the data folder"""
new_path = "./new_train/new_train"
test_path = './new_val_in/new_val_in'
# number of sequences in each dataset
# train:205942  val:3200 test: 36272 
# sequences sampled at 10HZ rate

### Create a dataset class 

In [2]:
class ArgoverseDataset(Dataset):
    """Dataset class for Argoverse"""
    def __init__(self, data_path: str, transform=None):
        super(ArgoverseDataset, self).__init__()
        self.data_path = data_path
        self.transform = transform

        self.pkl_list = glob(os.path.join(self.data_path, '*'))
        self.pkl_list.sort()
        
    def __len__(self):
        return len(self.pkl_list)

    def __getitem__(self, idx):

        pkl_path = self.pkl_list[idx]
        with open(pkl_path, 'rb') as f:
            data = pickle.load(f)
            
        if self.transform:
            data = self.transform(data)

        return data


# intialize a dataset
val_dataset  = ArgoverseDataset(data_path=new_path)
test_dataset = ArgoverseDataset(data_path=test_path)

### Create a loader to enable batch processing

In [3]:
batch_sz = 4

def train_collate(batch):
    """ collate lists of samples into batches, create [ batch_sz x agent_sz x seq_len x feature] """
# #     inp = [numpy.dstack([scene['p_in'], scene['v_in']]) for scene in batch]
#     inp = [numpy.dstack([scene['p_in']]) for scene in batch]
# #     out = [numpy.dstack([scene['p_out'], scene['v_out']]) for scene in batch]
#     out = [numpy.dstack([scene['p_out']]) for scene in batch]
    inp = [numpy.dstack([scene['p_in'][scene['track_id'][:,0,0]==scene['agent_id'],:,:], scene['v_in'][scene['track_id'][:,0,0]==scene['agent_id'],:,:]]) for scene in batch]
#     out = [numpy.dstack([scene['p_out'], scene['v_out']]) for scene in batch]
    out = [numpy.dstack([scene['p_out'][scene['track_id'][:,0,0]==scene['agent_id'],:,:], scene['v_out'][scene['track_id'][:,0,0]==scene['agent_id'],:,:]]) for scene in batch]
    inp = torch.Tensor(inp)
    out = torch.Tensor(out)
    return [inp, out]

def test_collate(batch):
    """ collate lists of samples into batches, create [ batch_sz x agent_sz x seq_len x feature] """
    inp = [numpy.dstack([scene['p_in'][scene['track_id'][:,0,0]==scene['agent_id'],:,:], scene['v_in'][scene['track_id'][:,0,0]==scene['agent_id'],:,:]]) for scene in batch]
    inp = torch.Tensor(inp)
    idx = [numpy.dstack([scene['scene_idx']]) for scene in batch]
    return inp, idx
    
val_loader = DataLoader(val_dataset,batch_size=batch_sz, shuffle = False, collate_fn=train_collate, num_workers=0)

test_loader = DataLoader(test_dataset,batch_size=batch_sz, shuffle = False, collate_fn=test_collate, num_workers=0)

### Visualize the batch of sequences

In [4]:
import matplotlib.pyplot as plt
import random

agent_id = 0

def show_sample_batch(sample_batch, agent_id):
    """visualize the trajectory for a batch of samples with a randon agent"""
    inp, out = sample_batch
    batch_sz = inp.size(0)
    agent_sz = inp.size(1)
    
    fig, axs = plt.subplots(1,batch_sz, figsize=(15, 3), facecolor='w', edgecolor='k')
    fig.subplots_adjust(hspace = .5, wspace=.001)
    axs = axs.ravel()   
    for i in range(batch_sz):
        axs[i].xaxis.set_ticks([])
        axs[i].yaxis.set_ticks([])
        
        # first two feature dimensions are (x,y) positions
        axs[i].scatter(inp[i, agent_id,:,0], inp[i, agent_id,:,1])
        axs[i].scatter(out[i, agent_id,:,0], out[i, agent_id,:,1])

In [29]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class NN(nn.Module):
    """This class defines your deep learning model that extends a Module class
      The constructor of your class defines the layers of the model. 
      The forward() function defines how to forward propagate 
      input through the defined layers of the model.

    """
    def __init__(self):
        super(NN, self).__init__()

        self.fc1 = nn.Linear(128 * 4 * 4, 120)  # 4*4 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.flatten = nn.Flatten()
        

    def forward(self, x):
        x = self.flatten(x)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))
        return x

# class NN(nn.Module):
#     """This class defines your deep learning model that extends a Module class
#       The constructor of your class defines the layers of the model. 
#       The forward() function defines how to forward propagate 
#       input through the defined layers of the model.

#     """
#     def __init__(self):
#         super(NN, self).__init__()
# #         self.flatten = nn.Flatten()
#         self.LSTM_stack = nn.Sequential(
#             nn.LSTM(input_size=4, hidden_size=4, num_layers=1, batch_first=True),
#         )
#         self.LSTM_many = nn.LSTM(input_size=4, hidden_size=4, num_layers=1, batch_first=True)

#     def forward(self, x):
# #         print(x.shape)
#         x = self.LSTM_stack(x)
# #         x = torch.reshape(x, (4,60,76))
# #         x = x.float()
# #         print(x[0][:,-1,:])
#         output = x[0][:,-1,:].reshape((-1,1,4))
# #         print(output.shape)
# #         print(x[0][:,-1,:].reshape((4,1,2)))
#         for i in range(29):
#             x = self.LSTM_many(x[0][:,-1,:].reshape((-1,1,4),x[1]))
#             output = torch.cat((output, x[0]), 1)
# #             print(output.shape)
# #         print(output.shape)
#         return output

In [30]:
print(NN())

NN(
  (fc1): Linear(in_features=2048, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
)


In [31]:
from tqdm import tqdm_notebook as tqdm

def train(model, device, train_loader, optimizer, epoch, log_interval=10000):
    model.train()
    iterator = tqdm(train_loader, total=int(len(train_loader)))
    counter = 0
    MSELoss = nn.MSELoss()
    for i_batch, sample_batch in enumerate(train_loader):
        inp, out = sample_batch
#         print("myinp", inp.shape)
#         print("mytryinp", inp[:,0,:,:].shape)
#         print("myout", out.shape)
#         print("mytryout", out[:,0,:,:].shape)
        inp = inp[:,0,:,:]
        out = out[:,0,:,:]
        """TODO:
          Deep learning model
          training routine
        """
        data, target = inp.to(device), out.to(device)
        optimizer.zero_grad()
        output = model(data)
        #print(output[0])
#         print("OURPUT", output[0].shape)
#         print("TARGET", target.shape)
        #target = torch.argmax(target, 1)
        loss = MSELoss(output, target)
        loss.backward()
        optimizer.step()
        counter += 1
        iterator.set_postfix(loss=(loss.item()*data.size(0) / (counter * train_loader.batch_size)))
#         print(out.shape)
#         show_sample_batch(sample_batch, agent_id)
#         break

In [32]:
def predict(model, device, test_loader):
    model.eval()
    counter = 0
    all_outs = numpy.zeros((1,60))
    idx_out = []
#     print(all_outs.shape)
    with torch.no_grad():
        for data, idx in test_loader:
            data= data[:,0,:,:]
            data= data.to(device)
            output = model(data)
            output = output[:,:,0:2].reshape(-1,60)
            arr = output.data.cpu().numpy()
#             print(arr.shape)
            all_outs = numpy.append(all_outs, arr, axis=0)
#             print(all_outs)
#             print(idx)
#             print('')
            idx_out.append(int(idx[0][0][0][0]))
            idx_out.append(int(idx[1][0][0][0]))
            idx_out.append(int(idx[2][0][0][0]))
            idx_out.append(int(idx[3][0][0][0]))
#     print(all_outs.shape)
#     print(idx_out[0])
#     print(len(idx_out))
    all_outs = all_outs[1:,:]
#     all_outs = numpy.insert(all_outs, 0, idx_out, axis=1)
#     for i in range(len(all_outs)):
#         print(all_outs[i][0])
#         all_outs[i][0] = idx_out[i]
#     print(all_outs.shape)\
    print(all_outs)
    return all_outs, idx_out

In [33]:
device = "cuda"
model = NN().to(device) #using gpu here
optimizer = optim.Adagrad(model.parameters(), lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0)
num_epoch = 1

for epoch in range(1, num_epoch + 1):
        train(model, device, val_loader, optimizer, epoch)
#         predict(model, device, test_loader)

HBox(children=(IntProgress(value=0, max=51486), HTML(value='')))

RuntimeError: mat1 dim 1 must match mat2 dim 0

In [None]:
output, idx_out = predict(model, device, test_loader)
print(output)

In [None]:
HEAD_STR = "v1,v2,v3,v4,v5,v6,v7,v8,v9,v10,v11,v12,v13,v14,v15,v16,v17,v18,v19,v20,v21,v22,v23,v24,v25,v26,v27,v28,v29,v30,v31,v32,v33,v34,v35,v36,v37,v38,v39,v40,v41,v42,v43,v44,v45,v46,v47,v48,v49,v50,v51,v52,v53,v54,v55,v56,v57,v58,v59,v60"

numpy.savetxt("outputs.csv", output, delimiter=",", header = HEAD_STR)

In [None]:
output.shape

In [None]:
import pandas as pd
df = pd.read_csv("outputs.csv")
df = df.rename(columns={'# v1': 'v1'})
df

In [None]:
df.insert(0, 'ID', idx_out)
df.to_csv("outputs.csv", index=False)

In [None]:
df

In [None]:
# numpy.loadtxt("oututs.csv", delimiter=',').shape

## TRYING SKLEARN.ENSEMBLE