In [None]:
!pip install pyproj

In [None]:
import sys
sys.path.insert(0,'/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/')  #Root folder
sys.path.insert(0,'/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/utils') # Libraries folder

from utils import map_vis_without_lanelet

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


import math
from numpy.random import default_rng
import time

from sklearn import preprocessing

from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import shapely

In [None]:
def read_data_from_file(file_name, split=1):
    
    train_file = file_name
    df = pd.read_csv(train_file, sep=',')

    # Select a percentage from full dataset (used in debugging)
    df = df.head(math.floor(df.shape[0]*split))

    return df


def filter_data(df):
    
    # Select cars only
    df = df.loc[df['agent_type'] == "car"]
    return df

def filter_length(df):            
     
    # Select only trajectories with the same length=40 and in the same time phase
 
    g = df.groupby(["object_id"])
 
    g_equal_len = g.filter(lambda x: x['frame_id'].count() == 40) # This works because the recordings are done with a timestamp_max =4000   # and (x['timestamp_ms'].ge(100).any() and x['timestamp_ms'].le(4000).any()))

    return g_equal_len


In [None]:
def preprocess_dataframe(df):
    # Create a synthetic index variable for easier indexing and search
    df['object_id']=df['case_id'].astype(int).astype(str)+"-"+df['track_id'].astype(str)

    # Convert to unique object identifier 
    le = preprocessing.LabelEncoder()
    df['object_id'] = le.fit_transform(df['object_id'])

    # Select only vehicles
    df = filter_data(df)

    # Select only same length sequences
    df = filter_length(df)

    # Order by object_id
    group = df.groupby(["object_id"])
    cars = list()

    for ix,seq in group:
        sub_seq = seq.to_numpy()
        cars.append(sub_seq)

    cars = np.array(cars)
    cars = cars.reshape(-1,cars.shape[2])   
    df = pd.DataFrame(data = cars, columns = df.columns)

    df = df.iloc[0::2]  #sub-sampling to 5hz, skip one row

    # Add a sequential timestamp required by GluonTS library
    #timestamp = pd.date_range("2023-01-01", periods=df.shape[0], freq="S")
    #df['timestamp'] = timestamp

    # Keep only important features
    df = df.drop(columns=['track_id','timestamp_ms','agent_type','length','width','frame_id'])

    # Reorder columns
    df = df[['case_id', 'object_id','x','y', 'vx', 'vy', 'psi_rad']]

    return df


In [None]:
import random

def make_data_splits(df, n_splits, name="train"):
    
    random.seed(1)
    ids = df["object_id"].unique()
    
    random.shuffle(ids)
    df = df.set_index("object_id").loc[ids].reset_index()

    steps = 20
    total_trajectories = int(df.shape[0]/steps)

    block_size = int(math.floor(total_trajectories/n_splits))*steps

    start = 0
    end = 0

    print("total size: "+str(df.shape))

    for ix in range(0, n_splits):

        end = end + block_size
        
        if(ix != n_splits -1):  
            df_split = df.iloc[start:end,:]
        else: # if this is the last split, add the remaining rows
            df_split = df.iloc[start:,:]

        start = end
        
        df_split.to_csv('/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/_'+name+'_split-'+str(ix)+'.csv',  index = False)
        
        print(df_split.shape)


In [None]:
# Converts a polygon in lat,long to a polygon in x,y coords
def latlon2xy_polygon(P):

    points = []

    P_x = shapely.get_coordinates(P)[:,0]
    P_y = shapely.get_coordinates(P)[:,1]

    for x,y in zip(P_x,P_y):
        points.append(projector.latlon2xy(x, y))

    return  Polygon(np.array(points))  

In [None]:
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import shapely

CENTER = Polygon([
(0.0091415,0.008972),
(0.0091498,0.0093028),
(0.009005,0.0093289),
(0.0089777,0.0089797)
])

# Create a projector object to use the lat,lon to x,y method
projector = map_vis_without_lanelet.LL2XYProjector(0.0,0.0)

# Convert all polygons to x,y polygons
CENTER = latlon2xy_polygon(CENTER)

import geopandas as gpd
from shapely.geometry import Polygon

xmin,ymin,xmax,ymax =  CENTER.bounds


 width = 5
 height = 5

 rows = int(np.ceil((ymax-ymin) /  height))
 cols = int(np.ceil((xmax-xmin) / width))
 XleftOrigin = xmin
 XrightOrigin = xmin + width
 YtopOrigin = ymax
 YbottomOrigin = ymax- height
 polygons = []
 for i in range(cols):
    Ytop = YtopOrigin
    Ybottom =YbottomOrigin
    for j in range(rows):
        polygons.append(Polygon([(XleftOrigin, Ytop), (XrightOrigin, Ytop), (XrightOrigin, Ybottom), (XleftOrigin, Ybottom)])) 
        Ytop = Ytop - height
        Ybottom = Ybottom - height
    XleftOrigin = XleftOrigin + width
    XrightOrigin = XrightOrigin + width

grid = gpd.GeoDataFrame({'geometry':polygons})
grid.to_file("grid.shp")

In [None]:
# Read train dataset
df = read_data_from_file('/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_MA.csv', split=1)
df = preprocess_dataframe(df)
df = grid_labelization()

df.to_csv('/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/_train.csv',  index = False)

  for ix,seq in group:


In [None]:
# Create mini-data splits
make_data_splits(df, 10, name='train')

total size: (804560, 7)
(80440, 7)
(80440, 7)
(80440, 7)
(80440, 7)
(80440, 7)
(80440, 7)
(80440, 7)
(80440, 7)
(80440, 7)
(80600, 7)


In [None]:
# Read test dataset
df = read_data_from_file('/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/DR_USA_Intersection_MA_val.csv', split=1)
df = preprocess_dataframe(df)
df.to_csv('/content/drive/Othercomputers/My Laptop/github-repositories/wavenet-trajectory/data/DR_USA_Intersection_MA/_val.csv',  index = False)

  for ix,seq in group:


In [None]:
# Create mini-data splits
make_data_splits(df, 10, name="test")

total size: (148460, 7)
(14840, 7)
(14840, 7)
(14840, 7)
(14840, 7)
(14840, 7)
(14840, 7)
(14840, 7)
(14840, 7)
(14840, 7)
(14900, 7)


In [None]:
def get_section_label(x,y):

    start_point = Point(x, y)

    entrance = 'U'

    if A.contains(start_point):
        entrance = 'A'
    if B.contains(start_point):
        entrance = 'B'
    if C.contains(start_point):
        entrance = 'C'
    if D.contains(start_point):
        entrance = 'D'
    if E.contains(start_point):
        entrance = 'E'
    if F.contains(start_point):
        entrance = 'F'
    if G.contains(start_point):
        entrance = 'G'
    if H.contains(start_point):
        entrance = 'H'
    if I.contains(start_point):
        entrance = 'I'
    if I.contains(start_point):
        entrance = 'Z'
    
    return entrance       


In [None]:
dataset['section'] = dataset.apply(lambda row : get_section_label(row['x'], row['y']), axis = 1)

In [None]:
a = dataset.loc[dataset['section'] == 'U']

In [None]:
a.shape

(26961, 13)

In [None]:
a.head(10)

Unnamed: 0,case_id,track_id,frame_id,timestamp_ms,agent_type,x,y,vx,vy,psi_rad,length,width,section
1105,3.0,1,18,1800,pedestrian/bicycle,1003.87,989.406,-4.412,-1.508,,,,U
1120,3.0,1,19,1900,pedestrian/bicycle,1003.434,989.252,-4.298,-1.568,,,,U
1135,3.0,1,20,2000,pedestrian/bicycle,1003.012,989.094,-4.141,-1.602,,,,U
1150,3.0,1,21,2100,pedestrian/bicycle,1002.608,988.933,-3.946,-1.609,,,,U
1165,3.0,1,22,2200,pedestrian/bicycle,1002.224,988.773,-3.727,-1.591,,,,U
1180,3.0,1,23,2300,pedestrian/bicycle,1001.862,988.616,-3.506,-1.55,,,,U
1195,3.0,1,24,2400,pedestrian/bicycle,1001.522,988.464,-3.31,-1.498,,,,U
1210,3.0,1,25,2500,pedestrian/bicycle,1001.198,988.316,-3.164,-1.453,,,,U
1225,3.0,1,26,2600,pedestrian/bicycle,1000.886,988.173,-3.084,-1.425,,,,U
1240,3.0,1,27,2700,pedestrian/bicycle,1000.578,988.03,-3.061,-1.417,,,,U


In [None]:
group_id = dataset.groupby(["case_id","object_id","section"]).count()

In [None]:
group_id = dataset.groupby(["object_id"])

for object_id,car in group_id:

    start_point = Point(car.x.iloc[0], car.y.iloc[0])
    end_point = Point(car.x.iloc[-1], car.y.iloc[-1])

    entrance = 'I'  # 'Z'
    exit = 'I'      # 'Z'

    if A.contains(start_point):
        entrance = 'A'
    if B.contains(start_point):
        entrance = 'B'
    if C.contains(start_point):
        entrance = 'C'
    if D.contains(start_point):
        entrance = 'D'
    if E.contains(start_point):
        entrance = 'E'
    if F.contains(start_point):
        entrance = 'F'
    if G.contains(start_point):
        entrance = 'G'
    if H.contains(start_point):
        entrance = 'H'
    if I.contains(start_point):
        entrance = 'I'


    if A.contains(end_point):
        exit = 'A'
    if B.contains(end_point):
        exit = 'B'
    if C.contains(end_point):
        exit = 'C'
    if D.contains(end_point):
        exit = 'D'
    if E.contains(end_point):
        exit = 'E'
    if F.contains(end_point):
        exit = 'F'
    if G.contains(end_point):
        exit = 'G'
    if H.contains(end_point):
        exit = 'H'
    if I.contains(end_point):
       exit = 'I'

 #   if entrance == 'Z':
 #       print(object_id)
 #       print(start_point)

 #   if exit == 'Z':
 #      print(object_id)
 #      print(end_point)

    dataset.loc[dataset['object_id'] == object_id, 'entrance'] = entrance
    dataset.loc[dataset['object_id'] == object_id, 'exit'] = exit

In [None]:
# Convert to manueuvers

dataset.loc[dataset['entrance'] == dataset['exit'], 'maneuver'] = 'straight'

dataset.loc[(dataset['entrance'] == 'B') & (dataset['exit'] == 'C'), 'maneuver'] = 'right'
dataset.loc[(dataset['entrance'] == 'D') & (dataset['exit'] == 'E'), 'maneuver'] = 'right'
dataset.loc[(dataset['entrance'] == 'F') & (dataset['exit'] == 'G'), 'maneuver'] = 'right'
dataset.loc[(dataset['entrance'] == 'H') & (dataset['exit'] == 'A'), 'maneuver'] = 'right'

dataset.loc[(dataset['entrance'] == 'B') & (dataset['exit'] == 'E'), 'maneuver'] = 'straight'
dataset.loc[(dataset['entrance'] == 'D') & (dataset['exit'] == 'G'), 'maneuver'] = 'straight'
dataset.loc[(dataset['entrance'] == 'F') & (dataset['exit'] == 'A'), 'maneuver'] = 'straight'
dataset.loc[(dataset['entrance'] == 'H') & (dataset['exit'] == 'C'), 'maneuver'] = 'straight'

dataset.loc[(dataset['entrance'] == 'B') & (dataset['exit'] == 'G'), 'maneuver'] = 'left'
dataset.loc[(dataset['entrance'] == 'D') & (dataset['exit'] == 'A'), 'maneuver'] = 'left'
dataset.loc[(dataset['entrance'] == 'F') & (dataset['exit'] == 'C'), 'maneuver'] = 'left'
dataset.loc[(dataset['entrance'] == 'H') & (dataset['exit'] == 'E'), 'maneuver'] = 'left'


dataset.loc[(dataset['entrance'] == 'I') & (dataset['exit'] == 'A'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'I') & (dataset['exit'] == 'B'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'I') & (dataset['exit'] == 'C'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'I') & (dataset['exit'] == 'D'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'I') & (dataset['exit'] == 'E'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'I') & (dataset['exit'] == 'F'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'I') & (dataset['exit'] == 'G'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'I') & (dataset['exit'] == 'H'), 'maneuver'] = 'undefined'

dataset.loc[(dataset['entrance'] == 'A') & (dataset['exit'] == 'I'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'B') & (dataset['exit'] == 'I'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'C') & (dataset['exit'] == 'I'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'D') & (dataset['exit'] == 'I'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'E') & (dataset['exit'] == 'I'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'F') & (dataset['exit'] == 'I'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'G') & (dataset['exit'] == 'I'), 'maneuver'] = 'undefined'
dataset.loc[(dataset['entrance'] == 'H') & (dataset['exit'] == 'I'), 'maneuver'] = 'undefined'

dataset['maneuver'] = dataset['maneuver'].fillna('undefinded')

In [None]:
df = dataset.loc[(dataset['entrance']=='I') | (dataset['exit']=='I')]

In [None]:
df = df.drop(columns=["entrance","exit"])

In [None]:
def create_filtered_dataset(df, max_cars, max_radius,split):

    column_names = list(df.columns)

    filtered = []
    
    case_id = df.groupby(["cluster_id"])


    for ix,element in case_id:
        l = element.groupby(["object_id"])
        if(len(l)==max_cars): # number of interacting vehicles
            for iy,e in l:
                filtered.append(e.values.tolist())

    # Convert list results to array
    filtered_array = np.array(filtered)

    filtered_array = filtered_array.reshape(-1,filtered_array.shape[2]) # Remove the dimension added by grouping by case. 15 is the number of features

    filtered_df = pd.DataFrame(filtered_array,columns= column_names)

    # Types of columns
    filtered_df.object_id = filtered_df.object_id.astype('int64')
    filtered_df.cluster_id = filtered_df.cluster_id.astype('int64')
    filtered_df.case_id = filtered_df.case_id.astype('int64')

#    filtered_df.drop(columns=['frame_id'],inplace=True)
  
    # Save filtered dataset to file
    filtered_df.to_csv('/content/drive/Othercomputers/My Laptop/github-repositories/GNN-trajectory-prediction-2023/data/DR_USA_Intersection_MA/'+str(max_radius)+"m/center-"+str(max_radius)+'m-'+str(max_cars)+'c-'+split+'.csv',  index = False)
   



In [None]:
create_filtered_dataset(df, max_cars = 3, max_radius=20, split="test")