In [1]:
import warnings
import pickle
import sys
import os
import json

warnings.filterwarnings('ignore')

# add paths for modules
sys.path.append('../visualization')
sys.path.append('../features')
sys.path.append('../models')
sys.path.append('../datawrangling')
from maritime_traffic_network import MaritimeTrafficNetwork
import dataloader_paths

In [2]:
# Load a model from pickle
datasize = 'full'
location = 'tromso'
network_date = '202204'
train_dates = ['202205']
DP_tol = 10
min_samples = 13
data_version = ''

network_name = network_date+'_waypoints_DP'+str(DP_tol)+'_HDBSCAN'+str(min_samples)+'_'+location+'_'+datasize+'_UTM'
network_path = '../../models/networks/best_networks/' + network_name + '.obj'
fileObj = open(network_path, 'rb')
network = pickle.load(fileObj)
fileObj.close()
network.hyperparameters

{'Data': '../../data/processed/202204_points_tromso_cleaned_meta_full_dualSplit_2.parquet',
 'DP_tolerance': 10,
 'clustering_method': 'HDBSCAN',
 'clustering_metric': 'mahalanobis',
 'clustering_min_samples': 13,
 'clustering_min_cluster_size': 13,
 'clustering_eps': 0,
 'clustering_metric_V': array([[1.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 1.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.01, 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.01, 0.  ],
        [0.  , 0.  , 0.  , 0.  , 1.  ]]),
 'graph_generation_max_distance': 20,
 'graph_generation_max_angle': 45}

In [3]:
# Load training data from file
path_prefix = '../../data/paths/'
training_paths = dataloader_paths.load_path_training_data(path_prefix, network_name, train_dates, data_version=data_version)

In [4]:
# destination_path
dest_path = '../../data/interim/RNN_input/'

In [5]:
# write graph to files 'nodes.txt', 'edges.txt'
G = network.G.copy()

# drop some node features
for node, data in G.nodes(data=True):
    if 'position' in data:
        lon, lat = data['position']
        data['lat'] = lat
        data['lon'] = lon
        del data['position']
    del data['cog_before']
    del data['cog_after']
    del data['speed']
    del data['n_members']

# write nodes to file
with open(os.path.join(dest_path, 'nodeOSM.txt'), 'w') as f:
    if G.nodes is not None:
        for i, (id, features) in enumerate(G.nodes.data()):
            line = str(id) + "\t" + "\t".join(
                map(str, [val for key, val in features.items()])) + "\n"
            f.write(line)

# rearrange edge features
edges = G.edges
edge_dict = {}  # dictionary mapping edge ID to sender and receiver node
with open(os.path.join(dest_path, 'edgeOSM.txt'), 'w') as f:
    for i, (sender, receiver, features) in enumerate(edges.data()):
        lat1 = network.waypoints[network.waypoints['clusterID'] == sender]['lat'].item()
        lon1 = network.waypoints[network.waypoints['clusterID'] == sender]['lon'].item()
        lat2 = network.waypoints[network.waypoints['clusterID'] == receiver]['lat'].item()
        lon2 = network.waypoints[network.waypoints['clusterID'] == receiver]['lon'].item()
        line = str(i) + "\t" + str(sender) + "\t" + str(receiver) + "\t" + str(2) + "\t" + \
                str(lat1) + "\t" + str(lon1) + "\t" + str(lat2) + "\t" + str(lon2) + "\n"
        f.write(line)
        edge_dict[(sender, receiver)] = i  # save id for later mapping

In [6]:
# write path data to file 'paths.txt'
#training_paths = training_paths[0:200]

# write edge between each node pair to file 'path.txt'
# we need to convert the sequence of node ids to a sequence of edge ids
with open(os.path.join(dest_path, "paths.txt"), "w") as f:
    for path in training_paths:
        for i in range(0, len(path)-2):
            orig_dest = (path[i], path[i+1])
            edge_id = edge_dict[orig_dest]
            f.write("{},".format(edge_id))
        orig_dest = (path[-2], path[-1])
        edge_id = edge_dict[orig_dest]
        f.write("{}".format(edge_id))
        f.write("\n")
    

In [7]:
from geometry_utils import is_valid_path
for path in training_paths:
    if is_valid_path(G, path) == False:
        print(path)

In [8]:
max(len(path) for path in training_paths)

101

In [9]:
# save metadata to file
meta_dict = {'network_name': network_name,
             'n_points': len(network.gdf),
             'n_nodes': network.G.number_of_nodes(),
             'n_edges': network.G.number_of_edges(),
             'training_dates': str(train_dates),
             'data_version': data_version,
             'n_training_paths': len(training_paths)}
with open(dest_path+'metadata.json', 'w') as json_file:
    json.dump(meta_dict, json_file)