In [None]:
'''
This notebook compares the results of different route prediction models in terms of evaluation metrics
- experiment data is loaded from file (neptune export)
- plots for subtasks 1 and 2 are generated
'''

In [None]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
from datetime import timedelta, datetime
import time
import networkx as nx
import matplotlib.pyplot as plt
import folium
import warnings
import sys

warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

In [None]:
# read experiment results from file (neptune export)
eval_df = pd.read_csv('../reports/RoutePredictions-2.csv')
eval_df.info()

In [None]:
# print overview of hyperparameters
print('Models', sorted(eval_df.Model.unique()))
print('Task', sorted(eval_df.Task.unique()))
print('n_steps', sorted(eval_df.n_steps.unique()))
print('n_start_nodes', sorted(eval_df.n_start_nodes.unique()))
print('n_training_paths', sorted(eval_df.n_training_paths.unique()))
print('n_test_paths', sorted(eval_df.n_test_paths.unique()))
print('node_features', eval_df.node_features.unique())

In [None]:
#### Plot performance of subtask 2 (next_node prediction task) for all networks
# specify networks
networks = ['Tromsø', 'Oslo', 'Stavanger']
networks_n_paths = [4449, 21058, 36924]

# define metrics
metric1 = 'mean_abs_err'
metric2 = 'choice_accuracy'
ylims = [[50, 3000], [100,5000], [100,6000]]

# plot for each network
for i in range(3):
    network = networks[i]
    n = networks_n_paths[i]

    # sort data
    eval_df.sort_values(by='n_steps', inplace=True)

    # prepare figure and axes
    fig, axes = plt.subplots(1, 2, figsize=(12, 4))  # Set the figure size
    filter = ((eval_df['n_training_paths']==n) & (eval_df['Model'] == 'MOGen'))
    x = eval_df[filter]['n_steps'].unique()
    
    axes[0].set_xlabel('k')
    axes[0].set_ylabel('$MD_{SSPD}$')
    axes[0].tick_params(axis='y')
    axes[0].set_xticks(x)
    axes[0].set_yscale('log')
    axes[0].set_ylim(ylims[i])
    
    axes[1].set_xlabel('k')
    axes[1].set_ylabel('$CACC$') 
    axes[1].tick_params(axis='y')
    axes[1].set_xticks(x)

    # plot Markov model results
    filter = ((eval_df['Model'] == 'Markov') & (eval_df['n_training_paths']==n) & (eval_df['Task']=='next_nodes') & (eval_df['eval_mode']=='path'))
    y1 = eval_df[filter][metric1]
    y2 = eval_df[filter][metric2]
    axes[0].plot(x, y1, label='Markov')
    axes[1].plot(x, y2, label='Markov')

    # plot MOGen model results
    filter = ((eval_df['Model'] == 'MOGen') & (eval_df['n_training_paths']==n) & (eval_df['Task']=='next_nodes') & (eval_df['MOGen_optimal_order']==2) & (eval_df['eval_mode']=='path'))
    y1 = eval_df[filter][metric1]
    y2 = eval_df[filter][metric2]
    axes[0].plot(x, y1, label='MOGen')
    axes[1].plot(x, y2, label='MOGen')

    # plot GRETEL model results
    filter = ((eval_df['Model'] == 'Gretel') & (eval_df['n_training_paths']==n) & (eval_df['Task']=='next_nodes') & (eval_df['loss']=='target_only') & 
              (eval_df['node_features']=="['n_members', 'speed', 'cog_before', 'cog_after', 'lat', 'lon']") & (eval_df['eval_mode']=='path'))
    y1 = eval_df[filter][metric1]
    y2 = eval_df[filter][metric2]
    axes[0].plot(x, y1, label='GRETEL')
    axes[1].plot(x, y2, label='GRETEL')
    
    # add legend
    plt.legend()
    
    # Add title
    fig.suptitle(network)
    
    fig.tight_layout()  # Adjust layout
    plt.savefig('subtask2_'+network+'_log.pdf')
    plt.show()

In [None]:
#### Plot performance of subtask 1 (path prediction task) for all networks
# filter data for subtask 1
subtask1_df = eval_df[eval_df.Task=='path']

# specify networks
networks = ['Tromso', 'Oslo', 'Stavanger']
networks_n_paths = [4449, 21058, 36924]

# define metrics
metric1 = 'choice_accuracy'
metric2 = 'mean_abs_err'
metric3 = 'median_abs_err'

# sort data
eval_df.sort_values(by='n_training_paths', inplace=True)

# prepare figure and axes
fig, ax = plt.subplots(1, 2, figsize=(12, 4))  # Set the figure size

ax[0].set_xlabel('$CACC$')
ax[0].set_ylabel('$MD_{SSPD}$')

ax[1].set_xlabel('$CACC$')
ax[1].set_ylabel('$MedD_{SSPD}$')


# plot Dijkstra model results
filter = ((eval_df['Model'] == 'Dijkstra') & (eval_df['weight']=='inverse_density') & (eval_df['Task']=='path') & (eval_df['eval_mode']=='path'))
x1 = eval_df[filter][metric1]
y1 = eval_df[filter][metric2]
y11 = eval_df[filter][metric3]
scatter = ax[0].scatter(x1, y1, color='blue', marker='o', s=100)
scatter = ax[1].scatter(x1, y11, color='blue', marker='o', s=100)
# Annotate each point with the ship category name
for i, network in enumerate(networks):
    ax[0].annotate(network, (x1.iloc[i], y1.iloc[i]), textcoords="offset points", xytext=(5,5), ha='left')
    ax[1].annotate(network, (x1.iloc[i], y11.iloc[i]), textcoords="offset points", xytext=(5,5), ha='left')

# plot MOGen model results
filter = ((eval_df['Model'] == 'MOGen') & (eval_df['Task']=='path') & (eval_df['MOGen_optimal_order']==2) & (eval_df['eval_mode']=='path'))
x2 = eval_df[filter][metric1]
y2 = eval_df[filter][metric2]
y22 = eval_df[filter][metric3]
scatter = ax[0].scatter(x2, y2, color='orange', marker='o', s=100)
scatter = ax[1].scatter(x2, y22, color='orange', marker='o', s=100)
# Annotate each point with the ship category name
for i, network in enumerate(networks):
    ax[0].annotate(network, (x2.iloc[i], y2.iloc[i]), textcoords="offset points", xytext=(5,5), ha='left')
    ax[1].annotate(network, (x2.iloc[i], y22.iloc[i]), textcoords="offset points", xytext=(5,5), ha='left')

# plot GRETEL model results
filter = ((eval_df['Model'] == 'Gretel') & (eval_df['Task']=='path') & (eval_df['n_test_paths']!=191) & (eval_df['loss']=='target_only') &
          (eval_df['node_features']=="['n_members', 'speed', 'cog_before', 'cog_after', 'lat', 'lon']") & (eval_df['eval_mode']=='path'))
x3 = eval_df[filter][metric1]
y3 = eval_df[filter][metric2]
y33 = eval_df[filter][metric3]
scatter = ax[0].scatter(x3, y3, color='green', marker='o', s=100)
scatter = ax[1].scatter(x3, y33, color='green', marker='o', s=100)
# Annotate each point with the ship category name
for i, network in enumerate(networks):
    ax[0].annotate(network, (x3.iloc[i], y3.iloc[i]), textcoords="offset points", xytext=(5,5), ha='left')
    ax[1].annotate(network, (x3.iloc[i], y33.iloc[i]), textcoords="offset points", xytext=(5,5), ha='left')

# adjust axes limits
ax[0].set_ylim(0,800)
ax[0].set_xlim(0.3,0.75)
ax[1].set_ylim(0,100)
ax[1].set_xlim(0.3,0.75)

# add legend
legend_labels = ['Dijkstra', 'MOGen', 'GRETEL']
ax[0].legend(legend_labels, loc='upper left')

# Add title
fig.suptitle('performance subtask 1')

fig.tight_layout()  # Adjust layout
plt.savefig('subtask1.pdf')
plt.show()

In [None]:
# Plot performance for subtask 2 to compare evaluation against path or trajectory
# specify networks
networks = ['Tromsø', 'Oslo', 'Stavanger']
networks_n_paths = [4449, 21058, 36924]

# define metrics
metric1 = 'median_abs_err'

# sort data
eval_df.sort_values(by='n_steps', inplace=True)

# prepare figure and axes
fig, ax = plt.subplots(figsize=(5, 4))  # Set the figure size
filter = ((eval_df['n_training_paths']==n) & (eval_df['Model'] == 'Markov'))
x = np.array([1, 2, 3, 5, 10, 15])

ax.set_xlabel('k')
ax.set_ylabel('$\Delta MedD_{SSPD}$') 
ax.tick_params(axis='y')
ax.set_xticks(x)

# plot for each network
for i in range(3):
    network = networks[i]
    n = networks_n_paths[i]
    
    # retrieve Markov model results
    filter = ((eval_df['Model'] == 'Markov') & (eval_df['n_training_paths']==n) & (eval_df['Task']=='next_nodes') & (eval_df['eval_mode']=='path') & (eval_df['n_steps']<20))
    y1_path = np.array(eval_df[filter][metric1])

    filter = ((eval_df['Model'] == 'Markov') & (eval_df['n_training_paths']==n) & (eval_df['Task']=='next_nodes') & (eval_df['eval_mode']=='trajectory') & (eval_df['n_steps']<20))
    y1_traj = np.array(eval_df[filter][metric1])
    
    # plot
    ax.plot(x, y1_traj-y1_path, label=network)
    
    
# add legend
plt.legend()  
plt.title('Difference in Median SSPD when evaluating the prediction against ground truth path vs ground truth trajectory')
fig.tight_layout()  # Adjust layout
plt.savefig('subtask2__pathVStraj.pdf')
plt.show()

In [None]:
# Compare performance for subtask 1 for evaluation against path or trajectory
eval_df.sort_values(by='n_training_paths', inplace=True)
filter = ((eval_df['Model'] == 'Dijkstra') & (eval_df['weight']=='inverse_density') & (eval_df['Task']=='path'))
columns = ['network_name', 'eval_mode', 'median_abs_err', 'mean_abs_err']
eval_df[filter][columns]