In [None]:
'''
This notebook allows us to perform a grid search over prediction model parameters
Supported models: 'Random', 'Markov', 'MOGen', 'Dijkstra'. GRETEL does not run in this environment
- Load a maritime traffic network, training and test data from file
- Specify the model to be trained and the hyperparameters for a grid search
The specified models will be trained on the training data and evaluated on the test data. Experiment results will be logged with neptune.
'''

In [None]:
import pathpy as pp
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
from datetime import timedelta, datetime
from ast import literal_eval
import time
import warnings
import pickle
import sys
import neptune

warnings.filterwarnings('ignore')

In [None]:
# add paths for modules
sys.path.append('../src/models')
sys.path.append('../src/features')
sys.path.append('../src/visualization')
sys.path.append('../src/datawrangling')

import dataloader_paths, dataloader_geo
from maritime_traffic_network import MaritimeTrafficNetwork
from MOGen_path_prediction import MOGenPathPrediction
from dijkstra_path_prediction import DijkstraPathPrediction
from random_path_prediction import RandomWalkPathPrediction
import prediction_model_evaluation_metrics as metrics

In [None]:
# Specify maritime traffic network
datasize = 'full'
location = 'tromso'
network_date = '202204'
DP_tol = 10
min_samples = 13

# Specify training and test data
train_dates = ['202204', '202205', '202207']
test_dates = ['202206']
train_filter = 'Last'  # Ship category filter: None, 'Last', 'Passasjer', 'Tank', 'Fisk', 'Auxiliary', 'Unknown'
test_filter = 'Last'  # Ship category filter: None, 'Last', 'Passasjer', 'Tank', 'Fisk', 'Auxiliary', 'Unknown'

In [None]:
# Load a maritime traffic network from pickle
network_name = network_date+'_waypoints_DP'+str(DP_tol)+'_HDBSCAN'+str(min_samples)+'_'+location+'_'+datasize+'_UTM'
network_path = '../models/networks/best_networks/' + network_name + '.obj'
fileObj = open(network_path, 'rb')
network = pickle.load(fileObj)
fileObj.close()
network.hyperparameters

In [None]:
# Load training data from file
path_prefix = '../data/paths/'
training_paths = dataloader_paths.load_path_training_data(path_prefix, network_name, train_dates, filter=train_filter, data_version='')

In [None]:
# Load test data from file
traj_path_prefix = '../data/processed/'
all_test_paths = dataloader_paths.load_path_test_data(path_prefix, network_name, test_dates, 
                                                      0, -1, 1, filter=test_filter, data_version='')
test_trajectories = dataloader_geo.load_trajectories(traj_path_prefix, location, network.crs, test_dates)

In [None]:
# Specify parameters for grid search
prediction_task = 'next_nodes'  # 'path' for subtask 1 and 'next_nodes' for subtask 2
eval_mode = 'path'              # evaluate the prediction against 'path' or 'trajectory'
model_type = 'MOGen'            # supported models: 'Random', 'Markov', 'MOGen', 'Dijkstra'. GRETEL does not run in this environment
vals_n_steps = [10]             # prediction horizon
vals_n_walks = [1000]           # number of random walks
vals_max_order = [4]            # Maximum order of a MOGen model
vals_order = [0]                # Force order of a MOGen model (if 0, optimal order is used by the model)
MOGen_training_mode = 'partial' # MOGen only: 'partial' (recommended) or 'full' (splits paths into subpaths and trains the model on these)
vals_n_start_nodes = [1]        # number of observed start nodes
weight_vals = ['length', 'inverse_passages', 'inverse_density']  # edge weights for Dijkstra model

# sample test paths
selection_start = 0
selection_end = -1
selection_step = 2

# run experiments
for i in range (0, len(vals_n_walks)):
    run = neptune.init_run(
        project="project",
        api_token="token",
    )  # your credentials

    # training parameters (specify manually or loop through parameters specified above)
    n_walks = vals_n_walks[i]
    MOGen_max_order = vals_max_order[i]
    n_start_nodes = vals_n_start_nodes[i]
    n_steps = vals_n_steps[i]
    order = vals_order[i]
    weight = weight_vals[i]

    # prepare test data
    if prediction_task == 'next_nodes':
        # split test paths in subpaths and sample from the subpaths
        sub_paths = dataloader_paths.split_path_data(all_test_paths, n_steps+1)
        test_paths = dataloader_paths.sample_path_data(sub_paths, selection_start, selection_end, selection_step)
    else:
        # sample from original test paths
        test_paths = dataloader_paths.sample_path_data(all_test_paths, selection_start, selection_end, selection_step)
    n_test_paths=len(test_paths)
        
    # train and predict
    if model_type == 'Dijkstra':
        model = DijkstraPathPrediction()
        model.train(network.G_pruned, training_paths)
        predictions = model.predict(test_paths, n_start_nodes=n_start_nodes, weight=weight)
    if model_type == 'MOGen':
        model = MOGenPathPrediction()
        model.train(training_paths, max_order=MOGen_max_order, model_selection=True, training_mode=MOGen_training_mode)
        predictions = model.predict(prediction_task, test_paths, network.G, 
                                    n_start_nodes=n_start_nodes, n_steps=n_steps, 
                                    n_predictions=1, n_walks=n_walks, order=order)
    if model_type == 'Random':
        model = RandomWalkPathPrediction()
        model.train(network.G_pruned, training_paths)
        predictions = model.predict(test_paths, n_start_nodes, n_steps, 
                                    1, n_walks, method='random')
    if model_type == 'Markov':
        model = RandomWalkPathPrediction()
        model.train(network.G_pruned, training_paths)
        predictions = model.predict(test_paths, n_start_nodes, n_steps, 
                                    1, n_walks, method='weighted')
        
    # evaluate
    evaluation_results, fig = metrics.evaluate_given_predictions(prediction_task, predictions, test_trajectories, 
                                                                 network, n_start_nodes=n_start_nodes, n_steps=n_steps, eval_mode=eval_mode)
    nan_mask = evaluation_results.isna().any(axis=1)
    failure_rate = nan_mask.sum() / len(evaluation_results)
    mean_abs_err = np.mean(evaluation_results[~nan_mask]["SSPD"])
    median_abs_err = np.median(evaluation_results[~nan_mask]["SSPD"])
    choice_accuracy = np.mean(evaluation_results[~nan_mask]["choice_accuracy"])

    # save experiment
    run["network_name"] = network_name
    run["n_points"]=len(network.gdf)
    run["n_nodes"]=network.G.number_of_nodes()
    run["n_edges"]=network.G.number_of_edges()
    
    params = network.hyperparameters
    params['clustering_metric_V_coord'] = params['clustering_metric_V'][0][0]
    params['clustering_metric_V_cog'] = params['clustering_metric_V'][2][2]
    params['clustering_metric_V_speed'] = params['clustering_metric_V'][4][4]
    run["network_parameters"] = params
    
    run["training_data"] = {'training_dates':str(train_dates),
                            'n_training_paths':len(training_paths)}
    
    run["test_data"] = {'test_dates':str(test_dates),
                        'selection_start':selection_start,
                        'selection_end':selection_end,
                        'selection_step':selection_step,
                        'n_test_paths':n_test_paths}
    
    run["prediction_task"] = prediction_task
    run["eval_mode"] = eval_mode
    run["model_type"] = model_type
    run["MOGen_n_walks"] = n_walks
    run["MOGen_max_order"] = MOGen_max_order
    if model_type == 'MOGen':
        run["MOGen_optimal_order"] = model.order
    if model_type == 'Dijkstra':
        run["weight"] = weight
    run["MOGen_training_mode"] = MOGen_training_mode
    run["n_start_nodes"] = n_start_nodes
    run["n_steps"] = n_steps
    
    run["plot"].upload(fig)
    run["failure_rate"] = failure_rate
    run["mean_abs_err"] = mean_abs_err
    run["median_abs_err"] = median_abs_err
    run["choice_accuracy"] = choice_accuracy
    run["train_filter"] = train_filter
    run["test_filter"] = test_filter
    
    run.stop()