In [None]:
'''
This notebook uses a statistical model for route prediction without target information.
The model can take the form of a random predictor or a Markov chain.
- specify the maritime traffic network to predict routes on
- specify training data to train the prediction model
- specify test data to evaluate the prediction model
- specify prediction model parameters
The notebook will train the prediction model, evaluate it based on test data and visualize evaluation metrics.
Optionally, predictions can be visualized against the ground truth on a map.
'''

In [None]:
import warnings
import pickle
import sys
import numpy as np
import time
warnings.filterwarnings('ignore')

# add paths for modules
sys.path.append('../visualization')
sys.path.append('../features')
sys.path.append('../datawrangling')

# import modules
import visualize
import geometry_utils
import dataloader_paths, dataloader_geo

from maritime_traffic_network import MaritimeTrafficNetwork
from random_path_prediction import RandomWalkPathPrediction
import prediction_model_evaluation_metrics as metrics

In [None]:
# Specify maritime traffic network
datasize = 'full'
location = 'oslo'
model_date = '202208'
DP_tol = 10
min_samples = 20

# Specify training data
train_dates = ['202206', '202207', '202208']
train_filter = None        #None, 'Last', 'Passasjer', 'Tank',  'Fisk', 'Unknown', 'Auxiliary'

# Specify test data
test_dates = ['202209']
test_filter = None         #None, 'Last', 'Passasjer', 'Tank',  'Fisk', 'Unknown', 'Auxiliary'
selection_start = 0    # for subpath sampling
selection_end = -1     # for subpath sampling
selection_step = 200    # for subpath sampling

# Specify prediction model parameters
method = 'weighted'    # 'weighted' for Markov chain, 'random' for random predictor
n_start_nodes = 1      # number of observed nodes (defaults to 1)
n_steps = 5           # prediction horizon
n_walks = 1000         # number of random walks for sampling
n_predictions = 1      # top n_predictions predictions will be output
eval_mode = 'path'     # evaluation mode: 'path' evaluates against the ground truth path, 
                       #                  'trajectory' against the ground truth trajectory

In [None]:
# Load maritime traffic network from pickle
network_name = model_date+'_waypoints_DP'+str(DP_tol)+'_HDBSCAN'+str(min_samples)+'_'+location+'_'+datasize+'_UTM'
network_path = '../../models/networks/best_networks/' + network_name + '.obj'
fileObj = open(network_path, 'rb')
network = pickle.load(fileObj)
fileObj.close()
network.hyperparameters

In [None]:
# Load training data from file
path_prefix = '../../data/paths/'
training_paths = dataloader_paths.load_path_training_data(path_prefix, network_name, train_dates, filter=train_filter, data_version='')

In [None]:
# Train model
model = RandomWalkPathPrediction()
model.train(network.G_pruned, training_paths)

In [None]:
# Load test data from file
traj_path_prefix = '../../data/processed/'
all_test_paths = dataloader_paths.load_path_test_data(path_prefix, network_name, test_dates, 
                                                      0, -1, 1, filter=test_filter,)
test_trajectories = dataloader_geo.load_trajectories(traj_path_prefix, location, network.crs, test_dates)

In [None]:
# Split test paths in subpaths and sample
sub_paths = dataloader_paths.split_path_data(all_test_paths, n_steps+n_start_nodes)
test_paths = dataloader_paths.sample_path_data(sub_paths, selection_start, selection_end, selection_step)

In [None]:
# Predict
start_time = time.time()

predictions = model.predict(test_paths, n_start_nodes, n_steps, n_predictions, n_walks, method=method)

end_time = time.time()  # end timer
print(f'Time elapsed: {(end_time-start_time)/60:.2f} minutes')
pps = len(test_paths)/(end_time-start_time)
print('Predictions per second: ', pps)

In [None]:
# Evaluate
evaluation_results, fig = metrics.evaluate_given_predictions('next_nodes', predictions, test_trajectories, 
                                                             network, n_start_nodes=n_start_nodes, n_steps=n_steps, 
                                                             eval_mode=eval_mode)

In [None]:
# For visualization: Make predictions with the model and plot
i = 8    # choose an example from the test set
example = test_paths.iloc[i]
mmsi = example.mmsi
start_node = [example.path[0]]
true_path = example.path
trajectory = test_trajectories.get_trajectory(mmsi)

predictions = model.sample_paths(start_node, n_walks, n_steps)
total_sum = sum(predictions.values())
probabilities = {key: value / total_sum for key, value in predictions.items()}
for key in predictions:
    predictions[key] = probabilities[key]

# filter predictions with a probability > 3%
predictions_filtered = {path: prob for path, prob in predictions.items() if prob > 0.03}

map = visualize.map_prediction_and_ground_truth(predictions_filtered, start_node, trajectory, true_path, network, 
                                                min_passages=2, opacity=0.3, location=location)
map.save('../../reports/maps/'+network_name+'_random_predictions.html')
print(f'Map saved under {"../../reports/maps/"+network_name+"_random_predictions.html"}')

In [None]:
# Compute the average out-degree
out_degrees = dict(network.G.out_degree())
average_out_degree = sum(out_degrees.values()) / len(out_degrees)
print(average_out_degree)