In [None]:
'''
This notebook performs the evaluation of a network against some test trajectory set:
- network evaluation metrics are computed (either using Trajectory2Path or Leuven map matching) and plotted
- a plot of an example trajectory and its mapped path is created
- network evaluation metrics are broken down by ship category and plotted
'''

In [None]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
from datetime import timedelta, datetime
import time
from scipy.sparse import coo_matrix
from shapely.geometry import Point, LineString, MultiLineString
from shapely import ops
import networkx as nx
import matplotlib.pyplot as plt
import folium
import pickle
import warnings
import sys

warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

In [None]:
# add paths for modules
sys.path.append('../src/models')
sys.path.append('../src/visualization')
sys.path.append('../src/features')
# import modules
import visualize
import geometry_utils
from maritime_traffic_network import MaritimeTrafficNetwork

In [None]:
# load network from pickle
model = '202204_waypoints_DP30_HDBSCAN25_stavanger_full_UTM'
model_path = '../models/networks/best_networks/' + model + '.obj'
fileObj = open(model_path, 'rb')
network = pickle.load(fileObj)
fileObj.close()

In [None]:
network.get_trajectories_info()
network.hyperparameters

In [None]:
# Prune graph and merge stop points if not already done during network creation
# Note: the current network generation method does this automatically
# repeating the following steps does not change the network if the steps were performed during network generation
network.prune_graph(1)
merge_stops = True
merge_stops_speed = 2
network.merge_stop_points(max_speed=merge_stops_speed)
pruning = 1
network.prune_graph(pruning)

In [None]:
# load evaluation data
eval_file = '202205_points_stavanger_cleaned_meta_full_dualSplit_2'
filename = '../data/processed/' + eval_file + '.parquet'
gdf = gpd.read_parquet(filename)
crs = 32632  # Coordinate reference system
gdf.to_crs(crs, inplace=True)  # Transformation
all_trajectories = mpd.TrajectoryCollection(gdf, traj_id_col='mmsi', obj_id_col='mmsi')

# select evaluation data
selection_start = 0
selection_end = len(all_trajectories)
selection_step = 20
selection = np.arange(selection_start, selection_end, selection_step)
n_trajectories = len(selection)
mmsis = gdf.mmsi.unique()[selection]
trajectories = all_trajectories.filter('mmsi', mmsis.tolist())

In [None]:
# evaluate network with Trajectory2Path
all_paths, all_evaluation_results, summary, fig = network.evaluate_graph(trajectories, k_max=500, l_max=5, algorithm='standard')

# evaluate network with Leuven Map Matching algorithm
# all_paths_MM, all_evaluation_results_MM, summary_MM, fig_MM = network.evaluate_graph(trajectories, algorithm='leuven')

In [None]:
# Generate plot
map = network.map_graph(pruned=True, location='tromso', line_weight=1, min_passages=3, opacity=0.3)

# define trajectory to plot
k = 3
mmsi = mmsis[k]
trajectory = trajectories.get_trajectory(mmsi)
path_t2p = all_paths[all_paths.mmsi == mmsi]
#path_leuven = all_paths_MM[all_paths_MM.mmsi == mmsi]

# plot trajectory and path
map = trajectory.to_line_gdf()[['geometry', 'mmsi']].explore(m=map, style_kwds={'weight':4, 'color':'black'}, 
                                                               name='trajectory')
map = path_t2p.explore(m=map, style_kwds={'weight':4, 'color':'cyan'}, name='T2P')
#map = path_leuven.explore(m=map, style_kwds={'weight':4, 'color':'orange'}, name='leuven')
folium.LayerControl().add_to(map)

map
#map.save('compare_leuven_T2P.html')

In [None]:
"""
# Save experiment results with neptune
import neptune


run = neptune.init_run(
    project="project_name",
    api_token="token",
)  # your credentials

run["model"]=model
run["algorithm"]='V7.0(SSPD)'
run["n_points"]=len(network.gdf)
run["n_nodes"]=network.G_pruned.number_of_nodes()
run["n_edges"]=network.G_pruned.number_of_edges()
run["n_isolated"]=nx.number_of_isolates(network.G_pruned)
run["merge_stops"] = merge_stops
run["merge_stops_speed"] = merge_stops_speed
run["pruning"] = pruning

params = network.hyperparameters
params['clustering_metric_V_coord'] = params['clustering_metric_V'][0][0]
params['clustering_metric_V_cog'] = params['clustering_metric_V'][2][2]
params['clustering_metric_V_speed'] = params['clustering_metric_V'][4][4]
run["parameters"] = params

run["test_data"] = {'eval_file':eval_file,
                    'selection_start':selection_start,
                    'selection_end':selection_end,
                    'selection_step':selection_step,
                    'n_trajectories':n_trajectories}

run["plot"].upload(fig)
run["summary"] = summary

run.stop()
"""

In [None]:
#### Enrich evaluation results with ship metadata
# prepare evaluation results for merge with ship metadata
all_evaluation_results.rename(columns={'mmsi':'id'}, inplace=True)
all_evaluation_results['mmsi'] = all_evaluation_results['id'].str[:9].astype(int)

In [None]:
# add ship metadata
sys.path.append('../src/datawrangling')
from make_trajectories_from_AIS import add_ship_metadata
meta_file = '../data/external/seilas-2022.csv'
all_evaluation_results_meta = add_ship_metadata(meta_file, all_evaluation_results)

In [None]:
# plot failure rate and MAE by shipgroup
# MAE
mae_by_group = all_evaluation_results_meta.groupby(['skipsgruppe'])['SSPD'].mean()
# Failure rate
success_by_group = all_evaluation_results_meta[all_evaluation_results_meta.message=='success'].groupby(['skipsgruppe'])['message'].count()
msg_by_group = all_evaluation_results_meta.groupby(['skipsgruppe'])['message'].count()
failure_by_group = 1 - success_by_group / msg_by_group
# X-axis
x = mae_by_group.index

# prepare plot and axes
fig, ax1 = plt.subplots(figsize=[8, 5])
ax1.set_xlabel('ship category')
ax1.set_ylabel('Mean SSPD (m)', color='red')
ax1.tick_params(axis='y', labelcolor='red')
ax2 = ax1.twinx()
ax2.set_ylabel('failure rate', color='blue') 
ax2.tick_params(axis='y', labelcolor='blue')

y1 = mae_by_group.values
y2 = failure_by_group.values

# Set the limits for the y-axes
max_y1_value = max(y1)
max_y2_value = max(y2)
ax1.set_ylim(0, max_y1_value+50)
ax2.set_ylim(0, max_y2_value+0.05)

# plot
ax1.scatter(x, y1, color='red')
ax2.scatter(x, y2, color='blue')

plt.title('Network performance depending on ship category')
fig.tight_layout()  # otherwise the right y-label is slightly clipped
#plt.savefig('min_samples.png')
fig.show()

In [None]:
# Visualize MAE and failure rate per ship category in one plot
x = failure_by_group.values
y = mae_by_group.values
categories = mae_by_group.index

fig, ax = plt.subplots(figsize=[5, 4])

# Scatter plot with different colors and markers for each category
scatter = ax.scatter(x, y, c=range(len(categories)), cmap='viridis', marker='o', s=100)

# Annotate each point with the ship category name
for i, category in enumerate(categories):
    ax.annotate(category, (x[i], y[i]), textcoords="offset points", xytext=(5,5), ha='left')

max_x_value = max(x)
max_y_value = max(y)
ax.set_xlim(0.1, max_x_value+0.05)
ax.set_ylim(0, max_y_value+50)

ax.set_xlabel('failure rate')
ax.set_ylabel('Mean SSPD (m)')
plt.title('Network performance depending on ship category')
plt.savefig('Network_performance_by_ship_category.pdf')

plt.show()

In [None]:
# Plot SSPD against the trajectory length (only when mapping was successful)
x = []
y = []
z = []
for mmsi in mmsis:
    trajectory = trajectories.get_trajectory(mmsi)
    x.append(trajectory.get_length())
    sspd = all_evaluation_results_meta[all_evaluation_results_meta.id==mmsi]['SSPD'].item()
    y.append(sspd)
    flag = all_evaluation_results_meta[all_evaluation_results_meta.id==mmsi]['message'].item()
    if flag == 'success':
        z.append(True)
    else:
        z.append(False)
x = np.array(x)
y = np.array(y)
z = np.array(z)
plt.scatter(x[z], y[z])

In [None]:
# Plot the distribution of MAE (only for successfully mapped trajectories)
success_mask = all_evaluation_results['message']=='success'
success_eval_results = all_evaluation_results[success_mask]
distances = success_eval_results['distances'].tolist()
distances = [item for sublist in distances for item in sublist]

# Plot results
fig, axes = plt.subplots(1, 2, figsize=(12, 6))
axes[0].hist(distances, bins=np.arange(0, 1000, 20).tolist(), orientation='horizontal')
axes[0].set_title('Distribution of distance')
axes[0].set_ylabel('Distance (m)')
    
axes[1].boxplot(distances)
axes[1].set_title('Distance between all trajectories \n and edge sequences \n (outlier cutoff at 2000m)')
axes[1].set_ylabel('Distance (m)')
axes[1].set_ylim([0, 1000])

#plt.savefig('distance_distribution_oslo.pdf')

len(distances)