In [1]:
import torch
import torchvision
import torchvision.transforms.functional as TF
from sklearn.model_selection import train_test_split
import pandas as pd
from ast import literal_eval
import glob
import os
import numpy as np
from PIL import Image
import cv2
import matplotlib.pyplot as plt
import time
import import_ipynb
import utils

importing Jupyter notebook from utils.ipynb


In [2]:
class myJAAD(torch.utils.data.Dataset):
    def __init__(self, args):
        
        if(args.from_file):
            sequence_centric = pd.read_csv(args.file)
            df = sequence_centric.copy()
            if not args.citywalks:
                df = df.drop(columns=['ID'])
            for v in list(df.columns.values):
                print(v)
                df.loc[:,v] = df.loc[:, v].apply(lambda x: literal_eval(x))
            sequence_centric[df.columns] = df[df.columns]
            
        else:
            #read data
            df = pd.DataFrame()
            new_index=0
            for file in glob.glob(os.path.join(args.jaad_dataset,args.dtype,"*")):
                temp = pd.read_csv(file)
                if not temp.empty:
                    #drop unnecessary columns
                    temp = temp.drop(columns=['type', 'occlusion', 'nod', 'slow_down', 'speed_up', 'WALKING', 'walking',
                   'standing', 'looking', 'handwave', 'clear_path', 'CLEAR_PATH','STANDING', 
                   'standing_pred', 'looking_pred', 'walking_pred','keypoints', 'crossing_pred'])
                    
                    temp['file'] = [file for t in range(temp.shape[0])]

                    #assign unique ID to each 
                    for index in temp.ID.unique():
                        new_index += 1
                        temp.ID = temp.ID.replace(index, new_index)

                    #sort rows by ID and frames
                    temp = temp.sort_values(['ID', 'frame'], axis=0)

                    df = df.append(temp, ignore_index=True)
            print('reading files complete')
            
            #create sequence column
            df.insert(0, 'sequence', df.ID)
            
            df = df.apply(lambda row: utils.compute_center(row), axis=1)

            #reset index
            df = df.reset_index(drop = True)
            
            #drop rest if not dividable by sequence len
            length = 0
            for index in df.ID.unique():
                rest = len(df[df['sequence'] == index]) % args.seq_len
                index_1 = length + df[df['sequence'] == index].shape[0]-rest
                index_2 = length + df[df['sequence'] == index].shape[0]-1
                length = index_2 + 1
                if rest != 0:
                    df = df.drop(df.loc[index_1:index_2].index)
            print('frame drop complete')
            
            #reset IDs
            new_index=0
            for index in df.ID.unique():
                df.loc[df['ID'] == index, 'ID'] = new_index
                new_index += 1
            print('reindexing complete')
            
            #reset index
            df = df.reset_index(drop=True)
            
            self.df = df
            
            #create sequences and assign sequence values
            sequences = np.linspace(0, (df.shape[0]/args.seq_len)-1, int(df.shape[0]/args.seq_len), dtype=np.int64)
            sequences = np.repeat(sequences, args.seq_len)
            df.sequence = sequences
            print('sequence assignment complete')
            
            df['bounding_box'] = list(zip(df.x, df.y, df.w, df.h))
            df['im_size'] = list(zip(df.im_w, df.im_h))
            df.bounding_box = df.bounding_box.apply(list)
            df.im_size = df.im_size.apply(list)
            df = df.drop(columns=['x', 'y', 'w', 'h'])
            df = df.drop(columns=['im_w', 'im_h'])
            
            #create sequence centric datafrae
            sequence_centric = pd.DataFrame()
            sequence_centric = df.groupby('sequence').agg(lambda x: x.tolist())
            sequence_centric.ID = sequence_centric.ID.apply(lambda x: x[0])
            print('sequence centric complete')
            
            sequence_centric['future_bounding_box'] = sequence_centric['bounding_box']
            tmp = sequence_centric.copy()
            for ind in tmp.ID.unique():
                tmp = tmp.drop(tmp[tmp['ID'] == ind].index[0])
                sequence_centric = sequence_centric.drop(sequence_centric[sequence_centric['ID'] == ind].index[-1])
    
            tmp = tmp.reset_index(drop=True)
            sequence_centric = sequence_centric.reset_index(drop=True)

            sequence_centric['future_bounding_box'] = tmp['bounding_box']
            
        if args.sample:
            if args.trainOrVal == 'train':
                self.data = sequence_centric.loc[:args.n_train_sequences].copy().reset_index(drop=True)
            elif args.trainOrVal == 'val':
                self.data = sequence_centric.loc[args.n_train_sequences:args.n_train_sequences+args.n_val_sequences].copy().reset_index(drop=True)
    
        else:
            self.data = sequence_centric.copy().reset_index(drop=True)
            
        self.args = args
        self.dtype = args.dtype
        print(self.dtype, " loaded")
        

    def __len__(self):
        return len(self.data)
    
    
    def __getitem__(self, index):

        seq = self.data.iloc[index]

        observed = torch.tensor(seq.bounding_box)

        future = torch.tensor(seq.future_bounding_box)
        
        obs = torch.cat((observed[0].unsqueeze(0), observed[2].unsqueeze(0), observed[4].unsqueeze(0), 
                         observed[6].unsqueeze(0), observed[8].unsqueeze(0), observed[10].unsqueeze(0), 
                         observed[12].unsqueeze(0), observed[14].unsqueeze(0), observed[16].unsqueeze(0)), dim=0)
        true = torch.cat((future[0].unsqueeze(0), future[2].unsqueeze(0), future[4].unsqueeze(0), 
                          future[6].unsqueeze(0), future[8].unsqueeze(0), future[10].unsqueeze(0), 
                          future[12].unsqueeze(0), future[14].unsqueeze(0), future[16].unsqueeze(0)), dim=0)
        
        obs_speed = obs[1:9] - obs[:8]
        true_speed = true[1:9] - true[:8]
        
        return obs_speed, true_speed, obs, true
    

In [None]:
def data_loader(args):
    train_set = myJAAD(args)
    train_loader = torch.utils.data.DataLoader(
        train_set, batch_size=args.batch_size, shuffle=args.loader_shuffle,
        pin_memory=args.pin_memory, num_workers=args.loader_workers, drop_last=True)
    
    args.trainOrVal = 'val'
    
    val_set = myJAAD(args)
    val_loader = torch.utils.data.DataLoader(
        val_set, batch_size=args.batch_size, shuffle=args.loader_shuffle,
        pin_memory=args.pin_memory, num_workers=args.loader_workers, drop_last=True)
    
    args.file = args.val_file
    args.dtype = 'val'
    args.trainOrVal = 'test'
    args.sample = False
    
    test_set = myJAAD(args)
    
    test_loader = torch.utils.data.DataLoader(
        test_set, batch_size=args.batch_size, shuffle=args.loader_shuffle,
        pin_memory=args.pin_memory, num_workers=args.loader_workers, drop_last=True)
    
    return train_loader, val_loader, test_loader