In [1]:
# import warnings filter
from warnings import simplefilter
# ignore all future warnings
simplefilter(action='ignore', category=FutureWarning)

In [None]:
!pip install gluonts==0.9
!pip install pytorchts

from gluonts.evaluation.backtest import make_evaluation_predictions
from gluonts.evaluation import MultivariateEvaluator
from pts.model.time_grad import TimeGradEstimator
from pts import Trainer

In [3]:
import sys
sys.path.insert(0,'/content/drive/Othercomputers/My Laptop/github-repositories/GNN-trajectory-prediction-2023/')  #Root folder
sys.path.insert(0,'/content/drive/Othercomputers/My Laptop/github-repositories/GNN-trajectory-prediction-2023/utils/') # Libraries folder

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle as pkl
import torch

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# EXPERIMENT PARAMETERS
in_timesteps = 8
out_timesteps = 12
in_features = 5
out_features = 2

meta_features = 2 # "case_id","object_id",

total_features = in_features + meta_features # because we need to keep object_id features and other identifiers
total_timesteps = in_timesteps + out_timesteps

# Run name
#wandb_project_name = "intersection-motion-predictors"

train_dataset_path = "/content/drive/MyDrive/diffusion/data/DR_USA_Intersection_MA/_train.csv"
test_dataset_path = '/content/drive/MyDrive/diffusion/data/DR_USA_Intersection_MA/_val.csv'

scaler_path = '/content/drive/MyDrive/diffusion/models/diffusion/scaler.pkl'

model_save_path = '/content/drive/MyDrive/diffusion/models/diffusion/'
model_reload_path = '/content/drive/MyDrive/diffusion/models/diffusion/'


def scale_dataframe(dataframe, scaler_path, scaler_reload = True, inverse_transform = False, scaler_type=MinMaxScaler()):
   
    x = dataframe.to_numpy()
    
    features = x[:,meta_features:]
    meta = x[:,0:meta_features]
    
    if scaler_reload is True: # if this is test set
        scaler = pd.read_pickle(scaler_path)
    else:
        scaler = scaler_type
  
    if inverse_transform is True:
        features = scaler.inverse_transform(features)
    else:
        features = scaler.fit_transform(features)

    x  = np.concatenate((meta,features),axis=1)
    
    if scaler_reload is False:  # if this is training set save scaler to file
        # Save scaler to file
        with open(scaler_path, "wb") as outfile:
            pkl.dump(scaler, outfile)

    # Return a Pandas DataFrame
    x = pd.DataFrame(data = x, columns = dataframe.columns)
#    x["timestamp"] = pd.to_datetime(x["timestamp"])
    x["case_id"] = pd.to_numeric(x["case_id"], downcast='integer')
    x["object_id"] = pd.to_numeric(x["object_id"])
    x["x"] = pd.to_numeric(x["x"])
    x["y"] = pd.to_numeric(x["y"])
    x["vx"] = pd.to_numeric(x["vx"])
    x["vy"] = pd.to_numeric(x["vy"])
    x["psi_rad"] = pd.to_numeric(x["psi_rad"])
    
    return x

In [4]:
dataframe = pd.read_csv(train_dataset_path, sep=',')
#dataframe["timestamp"] = pd.to_datetime(dataframe["timestamp"])
dataframe["case_id"] = pd.to_numeric(dataframe["case_id"],downcast='integer')
dataframe["object_id"] = pd.to_numeric(dataframe["object_id"])

In [5]:
# get only a part of dataset for testing
#dataframe = dataframe.iloc[0:1000,:]

In [5]:
# Scale data
df = scale_dataframe(dataframe, scaler_path, scaler_reload = False, inverse_transform=False, scaler_type= StandardScaler())

In [6]:
# Create a validation split from full train dataframe

# Randomize trajectories samples
import random
ids = df["object_id"].unique()

random.seed(1)
random.shuffle(ids)

df = df.set_index("object_id").loc[ids].reset_index()

# Make the validation split

num_val_rows = 80000  # 20 steps x 4000 trajectories aprox 10% from the training dataset

df_val = df.iloc[0:num_val_rows,:]
df_train = df.iloc[num_val_rows:,:]

In [11]:
# Create gluonTS dataset from pandas dataframe (train)

start = pd.Timestamp("1900-1-1 00:00:00", freq="H")

series = [df_train['x'].values, df_train['y'].values, df_train['vx'].values, df_train['vy'].values, df_train['psi_rad'].values]
series =np.array(series)

data_train = [dict(target = series, start = start)]

In [12]:
# Create gluonTS dataset from pandas dataframe (validation)

start = pd.Timestamp("2000-1-1 00:00:00", freq="H")

series = [df_val['x'].values, df_val['y'].values, df_val['vx'].values, df_val['vy'].values, df_val['psi_rad'].values]
series =np.array(series)

data_val = [dict(target = series, start = start)]

In [14]:
# Create gluonTS dataset from pandas dataframe (test)

dataframe_test = pd.read_csv(test_dataset_path, sep=',')
#dataframe_test["timestamp"] = pd.to_datetime(dataframe_test["timestamp"])
dataframe_test["case_id"] = pd.to_numeric(dataframe_test["case_id"],downcast='integer')
dataframe_test["object_id"] = pd.to_numeric(dataframe_test["object_id"])

# Scale data
df_test = scale_dataframe(dataframe_test, scaler_path, scaler_reload = True, inverse_transform=False, scaler_type = StandardScaler())

start = pd.Timestamp("2100-1-1 00:00:00", freq="H")

data_test = list()

# Create an element for each vehicle in test set
cars = df_test.groupby(["object_id"])

for ix,seq in cars:

    sub_seq = seq.to_numpy()

    series = [sub_seq[:,2],sub_seq[:,3],sub_seq[:,4],sub_seq[:,5],sub_seq[:,6]]
    series = np.array(series)

    data_test.append( [dict(target = series, start = start)])
    

In [16]:
estimator = TimeGradEstimator(
    num_parallel_samples = 100,
    target_dim=int(5),
    prediction_length=12,
    context_length=8,
    cell_type='GRU',
    input_size=24,
    freq='H',
    loss_type='l2',
    scaling=False,
    diff_steps=100, #100
    beta_end=0.1,
    beta_schedule="linear",
    trainer=Trainer(device=device,
                    epochs=1,
                    learning_rate=1e-3,
                    num_batches_per_epoch=100,
                    batch_size=64)
)

In [17]:
predictor = estimator.train(training_data = data_train, validation_data = data_val, num_workers=2)

  0%|          | 0/99 [00:00<?, ?it/s]

  0%|          | 0/99 [00:00<?, ?it/s]

In [18]:
# save model
with open(model_save_path+"predictor.pkl", "wb") as outfile:
    pkl.dump(predictor, outfile)

In [19]:
# load model
predictor = pd.read_pickle(model_save_path+"predictor.pkl")

In [None]:
results = list()
input_data = list()

for element in data_test:
    forecast_it, ts_it = make_evaluation_predictions(dataset=element,
                                                    predictor=predictor,
                                                    num_samples=100)
    
    forecasts = list(forecast_it)
    targets = list(ts_it)
    mean = forecasts[0].mean

    input = targets[0].values[0:8,:]
    ground_truth = targets[0].values[8:,:]
    prediction = np.concatenate((input, mean), axis=0)
    
    results.append(prediction)
    input_data.append(targets[0].values)


results = np.array(results)
results = results.reshape(-1,5)

input_data = np.array(input_data)
input_data = input_data.reshape(-1,5)

df_results = pd.DataFrame(results, columns = ['x','y','vx','vy','psi_rad'])
df_input = pd.DataFrame(input_data, columns = ['x','y','vx','vy','psi_rad'])

# save results to file
with open(model_save_path+"df_results.pkl", "wb") as outfile:
    pkl.dump(df_results, outfile)

with open(model_save_path+"df_input.pkl", "wb") as outfile:
    pkl.dump(df_input, outfile)


# Save the results with meta information added

meta_columns = df_test.to_numpy()
meta_columns = meta_columns[:,0:2]
results_meta = np.concatenate((meta_columns,results), axis=1)

df_results_meta = pd.DataFrame(results_meta, columns = ['case_id','object_id','x','y','vx','vy','psi_rad'])

# Denormalize results
df_results_meta = scale_dataframe(df_results_meta, scaler_path, scaler_reload = True, inverse_transform=True, scaler_type= StandardScaler())

# save results to file
with open(model_save_path+"df_results_meta.pkl", "wb") as outfile:
    pkl.dump(df_results_meta, outfile)

