In [1]:
import pandas as pd
import numpy as np
#import seaborn as sns
#import matplotlib.pyplot as plt

import math
#from numpy.random import default_rng
#import time

from sklearn import preprocessing

In [2]:

def read_data_from_file(file_name, split=1):

    train_file = file_name
    df = pd.read_csv(train_file, sep=',')

    # Select a percentage from full dataset (used in debugging)
    df = df.head(math.floor(df.shape[0]*split))

    return df


def filter_data(df):

    # Select cars only
    df = df.loc[df['agent_type'] == "car"]
    return df

def filter_length(df):

    # Select only trajectories with the same length=40 and in the same time phase

    g = df.groupby(["object_id"])

    g_equal_len = g.filter(lambda x: x['frame_id'].count() == 40) # This works because the recordings are done with a timestamp_max =4000   # and (x['timestamp_ms'].ge(100).any() and x['timestamp_ms'].le(4000).any()))

    return g_equal_len


In [3]:
def preprocess_dataframe(df):

    # Create a synthetic index variable for easier indexing and search
    df['object_id']=df['case_id'].astype(str)+"-"+df['track_id'].astype(str)

    # Create a synthetic frame_id indexing
    df['frame_ix']=df['case_id'].astype(str)+"-"+df['frame_id'].astype(str)


    # Convert to unique object identifier
    le = preprocessing.LabelEncoder()
    df['object_id'] = le.fit_transform(df['object_id'])

    # Convert to unique frame_ix identifier
    le_frame = preprocessing.LabelEncoder()
    df['frame_ix'] = le_frame.fit_transform(df['frame_ix'])

    # Convert to unique case_id identifier
    le_case = preprocessing.LabelEncoder()
    df['case_id'] = le_case.fit_transform(df['case_id'])


    # Select only vehicles
    df = filter_data(df)

    # Select only same length sequences
    df = filter_length(df)

    # Order by object_id
    group = df.groupby(["object_id"])
    cars = list()

    for ix,seq in group:
        sub_seq = seq.to_numpy()
        cars.append(sub_seq)

    cars = np.array(cars)
    cars = cars.reshape(-1,cars.shape[2])
    df = pd.DataFrame(data = cars, columns = df.columns)

    df = df.iloc[0::2]  #sub-sampling to 5hz, skip one row

    # Add a sequential timestamp required by GluonTS library
    #timestamp = pd.date_range("2023-01-01", periods=df.shape[0], freq="S")
    #df['timestamp'] = timestamp

    # Keep only important features
    df = df.drop(columns=['track_id','timestamp_ms','agent_type','length','width','frame_id'])
    #df = df.drop(columns=['track_id','timestamp_ms','agent_type','length','width'])

    # Reorder columns
    df = df[['case_id', 'object_id', 'frame_ix','x','y', 'vx', 'vy', 'psi_rad']]

    return df


In [60]:
# Create the same splits than A. Quintanar et al.

training_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_MA_train.csv',
                  '/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP0_train.csv',
                  '/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP1_train.csv']

#training_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_MA_train.csv',
#                  '/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_GL_train.csv',
#                  '/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP0_train.csv']

#training_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_MA_train.csv']
#training_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_GL_train.csv']
#training_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP0_train.csv']
#training_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP1_train.csv']


#testing_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_MA_val.csv',
#                  '/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP0_val.csv',
#                  '/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP1_val.csv']

#testing_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_MA_val.csv',
#                  '/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_GL_val.csv',
#                  '/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP0_val.csv']



#testing_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_MA_val.csv']
#testing_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP0_val.csv']
#testing_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_EP1_val.csv']
#testing_files = ['/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_GL_val.csv']



In [35]:
def group_files(filenames):

    df_full = pd.DataFrame()

    for ix,f in enumerate(filenames):
        df = read_data_from_file(f, split=1)
        df['case_id'] = chr(65+ix) + df['case_id'].astype(int).astype(str)  #Add a prefix letter to idendify the cases of each file

        df_full = pd.concat([df_full,df])

    return df_full

In [45]:
df = group_files(training_files)
df = preprocess_dataframe(df)
df.to_csv('/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/INTERACTION/_train_EP1.csv',  index = False)

  for ix,seq in group:


In [61]:
df = group_files(testing_files)
df = preprocess_dataframe(df)
df.to_csv('/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/INTERACTION/_val_GL.csv',  index = False)


  for ix,seq in group:
