In [None]:
'''
This notebook uses a model based on Dijkstra's algorithm for route prediction with target information.
- specify the maritime traffic network to predict routes on
- specify training data to train the prediction model
- specify test data to evaluate the prediction model
- specify prediction model parameters
The notebook will train the prediction model, evaluate it based on test data and visualize evaluation metrics.
Optionally, predictions can be visualized against the ground truth on a map.
'''

In [None]:
import warnings
import pickle
import sys
import pandas as pd
import time
warnings.filterwarnings('ignore')

# add paths for modules
sys.path.append('../visualization')
sys.path.append('../features')
sys.path.append('../datawrangling')

# import modules
import visualize
import geometry_utils
import dataloader_paths, dataloader_geo

from maritime_traffic_network import MaritimeTrafficNetwork
from dijkstra_path_prediction import DijkstraPathPrediction
import prediction_model_evaluation_metrics as metrics

In [None]:
# Specify maritime traffic network
datasize = 'full'
location = 'oslo'
model_date = '202208'
DP_tol = 10
min_samples = 20

# Specify training data
train_dates = ['202206', '202207', '202208']
train_filter = None        #None, 'Last', 'Passasjer', 'Tank',  'Fisk', 'Unknown', 'Auxiliary'

# Specify test data
test_dates = ['202209']
test_filter = None         #None, 'Last', 'Passasjer', 'Tank',  'Fisk', 'Unknown', 'Auxiliary'
selection_start = 0    # for sampling
selection_end = -1     # for sampling
selection_step = 80    # for sampling

# Specify prediction model parameters
weight = 'inverse_density'  # 'inverse_density', 'inverse_passages', 'length'
n_start_nodes = 1      # number of observed nodes (defaults to 1)
eval_mode = 'path'     # evaluation mode: 'path' evaluates against the ground truth path, 
                       #                  'trajectory' against the ground truth trajectory

In [None]:
# Load maritime traffic network from pickle
network_name = model_date+'_waypoints_DP'+str(DP_tol)+'_HDBSCAN'+str(min_samples)+'_'+location+'_'+datasize+'_UTM'
network_path = '../../models/networks/best_networks/' + network_name + '.obj'
fileObj = open(network_path, 'rb')
network = pickle.load(fileObj)
fileObj.close()
network.hyperparameters

In [None]:
# Load training data from file
path_prefix = '../../data/paths/'
training_paths = dataloader_paths.load_path_training_data(path_prefix, network_name, train_dates, filter=train_filter, data_version='')

In [None]:
# Train model
model = DijkstraPathPrediction()
model.train(network.G_pruned, training_paths)

In [None]:
# Load test data from file
traj_path_prefix = '../../data/processed/'
test_paths = dataloader_paths.load_path_test_data(path_prefix, network_name, test_dates, 
                                                  selection_start, selection_end, selection_step, filter=test_filter, data_version='')
test_trajectories = dataloader_geo.load_trajectories(traj_path_prefix, location, network.crs, test_dates)
print('# of training paths:', len(training_paths))
print('# of test paths:', len(test_paths))

In [None]:
# Predict
start_time = time.time()

predictions = model.predict(test_paths, n_start_nodes=n_start_nodes, weight=weight)

end_time = time.time()  # end timer
print(f'Time elapsed: {(end_time-start_time)/60:.2f} minutes')
pps = len(test_paths)/(end_time-start_time)
print('Predictions per second: ', pps)


In [None]:
# Evaluate
evaluation_results, fig = metrics.evaluate_given_predictions('path', predictions, test_trajectories, 
                                                             network, n_start_nodes=n_start_nodes, eval_mode=eval_mode)

In [None]:
# For visualization: Make predictions with the Dijkstra model and plot
i = 2
example = test_paths.iloc[i]
mmsi = example.mmsi
start_node = [example.path[0]]
end_node = example.path[-1]
true_path = example.path
trajectory = test_trajectories.get_trajectory(mmsi)

predictions, flag = model.predict_path(start_node[-1], end_node)
predictions = start_node[:-1] + predictions
predictions = {tuple(predictions):1}

map = visualize.map_prediction_and_ground_truth(predictions, start_node, trajectory, true_path, network, 
                                                min_passages=2, opacity=0.2, location=location, end_node=end_node)
map.save('../../reports/maps/'+network_name+'_dijkstra_predictions.html')
print(f'Map saved under {"../../reports/maps/"+network_name+"_dijkstra_predictions.html"}')

In [None]:
from make_trajectories_from_AIS import add_ship_metadata
import matplotlib.pyplot as plt

# Plot prediction performance by ship group

# add original mmsi column back to dataframe
evaluation_results.rename(columns={'mmsi':'id'}, inplace=True)
evaluation_results['mmsi'] = evaluation_results['id'].str[:9].astype(int)

# add metadata to each mmsi
meta_file = '../../data/external/seilas-2022.csv'
evaluation_results_meta = add_ship_metadata(meta_file, evaluation_results)

# get choice accuracy and SSPD by shipgroup
sspd_by_group = evaluation_results_meta.groupby(['skipsgruppe'])['SSPD'].mean()
cacc_by_group = evaluation_results_meta.groupby(['skipsgruppe'])['choice_accuracy'].mean()

x = cacc_by_group.values
y = sspd_by_group.values
categories = sspd_by_group.index

fig, ax = plt.subplots(figsize=[6, 4])

# Scatter plot with different colors and markers for each category
scatter = ax.scatter(x, y, c=range(len(categories)), cmap='viridis', marker='o', s=100)

# Annotate each point with the ship category name
for i, category in enumerate(categories):
    ax.annotate(category, (x[i], y[i]), textcoords="offset points", xytext=(5,5), ha='left')

max_x_value = max(x)
max_y_value = max(y)
min_x_value = min(x)
min_y_value = min(y)
ax.set_xlim(min_x_value-0.01, max_x_value+0.01)
ax.set_ylim(min_y_value, max_y_value+50)

ax.set_xlabel('$CACC$')
ax.set_ylabel('$MD_{SSPD}$ (m)')
plt.title('Prediction performance by Shipgroup')
#plt.savefig('prediction_performance_by_ship_category_tromso_Dijkstra.pdf')

plt.show()