In [1]:
import warnings
import pickle
import sys
import os
import json

warnings.filterwarnings('ignore')

# add paths for modules
sys.path.append('../visualization')
sys.path.append('../features')
sys.path.append('../models')
sys.path.append('../datawrangling')
from maritime_traffic_network import MaritimeTrafficNetwork
import dataloader_paths

In [2]:
# Load a model from pickle
datasize = 'full'
location = 'tromso'
network_date = '202204'
train_dates = ['202205']
DP_tol = 10
min_samples = 13

network_name = network_date+'_waypoints_DP'+str(DP_tol)+'_HDBSCAN'+str(min_samples)+'_'+location+'_'+datasize+'_UTM'
network_path = '../../models/networks/best_networks/' + network_name + '.obj'
fileObj = open(network_path, 'rb')
network = pickle.load(fileObj)
fileObj.close()
network.hyperparameters

{'Data': '../../data/processed/202204_points_tromso_cleaned_meta_full_dualSplit_2.parquet',
 'DP_tolerance': 10,
 'clustering_method': 'HDBSCAN',
 'clustering_metric': 'mahalanobis',
 'clustering_min_samples': 13,
 'clustering_min_cluster_size': 13,
 'clustering_eps': 0,
 'clustering_metric_V': array([[1.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 1.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.01, 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.01, 0.  ],
        [0.  , 0.  , 0.  , 0.  , 1.  ]]),
 'graph_generation_max_distance': 20,
 'graph_generation_max_angle': 45}

In [3]:
# Load training data from file
path_prefix = '../../data/paths/'
training_paths = dataloader_paths.load_path_training_data(path_prefix, network_name, train_dates)

In [4]:
# destination_path
dest_path = '../../data/interim/gretel_input/'

In [5]:
# write graph to files 'nodes.txt', 'edges.txt'
G = network.G.copy()

# drop some node features
for node, data in G.nodes(data=True):
    if 'position' in data:
        #lat, lon = data['position']
        #data['lat'] = lat
        #data['lon'] = lon
        del data['position']
    del data['cog_before']
    del data['cog_after']
    del data['speed']

# write nodes to file
with open(os.path.join(dest_path, 'nodes.txt'), 'w') as f:
    f.write("{}\t{}\n".format(
        G.number_of_nodes(), 0 if G.nodes is None else len(G.nodes[0])))
    if G.nodes is not None:
        for i, (id, features) in enumerate(G.nodes.data()):
            line = str(id) + "\t" + "\t".join(
                map(str, [val for key, val in features.items()])) + "\n"
            f.write(line)

# rearrange edge features
edges = G.edges
unique_edge_features = set()
for i, (sender, receiver, features) in enumerate(edges.data()):
    del edges[sender, receiver]['geometry']
    del edges[sender, receiver]['inverse_weight']
    #del edges[sender, receiver]['length']
    #del edges[sender, receiver]['direction']
    unique_edge_features.update(features.keys())
n_edge_features = len(unique_edge_features)

edge_dict = {}  # dictionary mapping edge ID to sender and receiver node
with open(os.path.join(dest_path, 'edges.txt'), 'w') as f:
    f.write("{}\t{}\n".format(
        G.number_of_edges(), 0 if edges is None else n_edge_features))
    for i, (sender, receiver, features) in enumerate(edges.data()):
        line = "\t".join(map(str, [i, sender, receiver])) + \
               "\t" + \
               "\t".join(map(str, [val for key, val in features.items()])) + "\n"
        f.write(line)
        edge_dict[(sender, receiver)] = i  # save id for later mapping

In [6]:
# write path data to files 'lengths.txt', 'observations.txt', 'paths.txt'
mode = 'start2target' #'node2node' #'start2target'
#training_paths = training_paths[0:200]

if mode == 'start2target':
    # write path lengths to file 'lengths.txt'
    # in this case, path length is always 2, directly from start observation to target observation
    with open(os.path.join(dest_path, "lengths.txt"), "w") as f:
        for i, l in enumerate(training_paths):
            f.write("{}\t{}\n".format(i, 2))
    
    # write observations to file 'observations.txt'
    # for each path, we have 2 observations: start observation and target target observation, which each get their own line
    # the value 1 denotes the probability of this observation (peculiarity of gretel)
    with open(os.path.join(dest_path, "observations.txt"), "w") as f:
        f.write("{}\t{}\n".format(2*len(training_paths), 1))
        for path in training_paths:
            f.write("{}\t{}\n".format(path[0], 1.0))
            f.write("{}\t{}\n".format(path[-1], 1.0))

    # write edge sequence to file 'path.txt'
    # we need to convert the sequence of node ids to a sequence of edge ids
    with open(os.path.join(dest_path, "paths.txt"), "w") as f:
        f.write("{}\t{}\n".format(len(training_paths), max(len(path) for path in training_paths)))
        for path in training_paths:
            for i in range(0, len(path)-1):
                orig_dest = (path[i], path[i+1])
                edge_id = edge_dict[orig_dest]
                f.write("{}\t".format(edge_id))
            f.write("\n")

if mode == 'node2node':
    # write path lengths to file 'lengths.txt'
    all_observations=0  # count the number of observations for later
    with open(os.path.join(dest_path, "lengths.txt"), "w") as f:
        for i, path in enumerate(training_paths):
            f.write("{}\t{}\n".format(i, len(path)))
            all_observations += len(path)  # count the number of observations for later
    
    # write observations to file 'observations.txt'
    # for each path, we have len(path) observations
    # the value 1 denotes the probability of this observation (peculiarity of gretel)
    with open(os.path.join(dest_path, "observations.txt"), "w") as f:
        f.write("{}\t{}\n".format(all_observations, 1))
        for path in training_paths:
            for node in path:
                f.write("{}\t{}\n".format(node, 1.0))

    # write edge between each node pair to file 'path.txt'
    # we need to convert the sequence of node ids to a sequence of edge ids
    with open(os.path.join(dest_path, "paths.txt"), "w") as f:
        f.write("{}\t{}\n".format(all_observations - len(training_paths), 1))
        for path in training_paths:
            for i in range(0, len(path)-1):
                orig_dest = (path[i], path[i+1])
                edge_id = edge_dict[orig_dest]
                f.write("{}\n".format(edge_id))
    

In [7]:
from geometry_utils import is_valid_path
for path in training_paths:
    if is_valid_path(G, path) == False:
        print(path)

In [8]:
max(len(path) for path in training_paths)

101

In [9]:
# save metadata to file
node_features = list(G.nodes(data=True))[0][1]

meta_dict = {'network_name': network_name,
             'n_points': len(network.gdf),
             'n_nodes': network.G.number_of_nodes(),
             'n_edges': network.G.number_of_edges(),
             'training_dates': str(train_dates),
             'n_training_paths': len(training_paths),
             'node_features': str(list(node_features)),
             'egde_features': str(list(unique_edge_features)),
             'path_format': mode}
with open(dest_path+'metadata.json', 'w') as json_file:
    json.dump(meta_dict, json_file)