In [1]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
from ast import literal_eval
import time
import warnings
import pickle
import sys
import os

warnings.filterwarnings('ignore')
print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

# add paths for modules
sys.path.append('../visualization')
sys.path.append('../features')
sys.path.append('../models')
from maritime_traffic_network import MaritimeTrafficNetwork

Geopandas has version 0.13.2
Movingpandas has version 0.17.1


In [2]:
# Load a model from pickle
datasize = 'full'
location = 'tromso'
model_date = '202204'
train_date = '202205'
test_date = '202206'
DP_tol = 10
min_samples = 13

model_name = model_date+'_waypoints_DP'+str(DP_tol)+'_HDBSCAN'+str(min_samples)+'_'+location+'_'+datasize+'_UTM'
model_path = '../../models/networks/best_networks/' + model_name + '.obj'
fileObj = open(model_path, 'rb')
network = pickle.load(fileObj)
fileObj.close()
network.hyperparameters

{'Data': '../../data/processed/202204_points_tromso_cleaned_meta_full_dualSplit_2.parquet',
 'DP_tolerance': 10,
 'clustering_method': 'HDBSCAN',
 'clustering_metric': 'mahalanobis',
 'clustering_min_samples': 13,
 'clustering_min_cluster_size': 13,
 'clustering_eps': 0,
 'clustering_metric_V': array([[1.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 1.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.01, 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.01, 0.  ],
        [0.  , 0.  , 0.  , 0.  , 1.  ]]),
 'graph_generation_max_distance': 20,
 'graph_generation_max_angle': 45}

In [3]:
# Load training data from file
filename = model_name+'_'+train_date+'_paths.csv'
training_data = pd.read_csv('../../data/paths/'+filename)
training_data['path'] = training_data['path'].apply(literal_eval)
training_data = gpd.GeoDataFrame(training_data, geometry=gpd.GeoSeries.from_wkt(training_data['geometry']), crs=network.crs)
training_data = training_data[training_data['message']=='success']
# extract paths from the training data
training_paths = training_data['path'].tolist()
training_data.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 1433 entries, 0 to 1451
Data columns (total 14 columns):
 #   Column            Non-Null Count  Dtype   
---  ------            --------------  -----   
 0   Unnamed: 0        1433 non-null   int64   
 1   mmsi              1433 non-null   object  
 2   SSPD              1433 non-null   float64 
 3   distances         1433 non-null   object  
 4   fraction_covered  1433 non-null   float64 
 5   message           1433 non-null   object  
 6   path              1433 non-null   object  
 7   path_linestring   1433 non-null   object  
 8   lengde            1433 non-null   int64   
 9   bredde            1039 non-null   float64 
 10  dypgaaende        942 non-null    float64 
 11  skipstype         1039 non-null   object  
 12  skipsgruppe       1039 non-null   object  
 13  geometry          1433 non-null   geometry
dtypes: float64(4), geometry(1), int64(2), object(7)
memory usage: 167.9+ KB


In [4]:
# destination_path
dest_path = '../../data/interim/gretel_input/'

In [5]:
# write graph to files 'nodes.txt', 'edges.txt'
G = network.G.copy()

# rearrange node features: split position into lat and lon
for node, data in G.nodes(data=True):
    if 'position' in data:
        lat, lon = data['position']
        data['lat'] = lat
        data['lon'] = lon
        del data['position']

# write nodes to file
with open(os.path.join(dest_path, 'nodes.txt'), 'w') as f:
    f.write("{}\t{}\n".format(
        G.number_of_nodes(), 0 if G.nodes is None else len(G.nodes[0])))
    if G.nodes is not None:
        for i, (id, features) in enumerate(G.nodes.data()):
            line = str(id) + "\t" + "\t".join(
                map(str, [val for key, val in features.items()])) + "\n"
            f.write(line)

# rearrange edge features
edges = G.edges
unique_edge_features = set()
for i, (sender, receiver, features) in enumerate(edges.data()):
    del edges[sender, receiver]['geometry']
    del edges[sender, receiver]['inverse_weight']
    #del edges[sender, receiver]['length']
    #del edges[sender, receiver]['direction']
    unique_edge_features.update(features.keys())
n_edge_features = len(unique_edge_features)

edge_dict = {}  # dictionary mapping edge ID to sender and receiver node
with open(os.path.join(dest_path, 'edges.txt'), 'w') as f:
    f.write("{}\t{}\n".format(
        G.number_of_edges(), 0 if edges is None else n_edge_features))
    for i, (sender, receiver, features) in enumerate(edges.data()):
        line = "\t".join(map(str, [i, sender, receiver])) + \
               "\t" + \
               "\t".join(map(str, [val for key, val in features.items()])) + "\n"
        f.write(line)
        edge_dict[(sender, receiver)] = i  # save id for later mapping

In [6]:
# write path data to files 'lengths.txt', 'oberservations.txt', 'paths.txt'
mode = 'one_start_node_to_target'
training_paths = training_paths[0:600]

if mode == 'one_start_node_to_target':
    # write path lengths to file 'lengths.txt'
    # in this case, path length is always 2, directly from start observation to target observation
    with open(os.path.join(dest_path, "lengths.txt"), "w") as f:
        for i, l in enumerate(training_paths):
            f.write("{}\t{}\n".format(i, 2))
    
    # write observations to file 'observations.txt'
    # for each path, we have 2 observations: start observation and target target observation, which each get their own line
    # the value 1 denotes the probability of this observation (peculiarity of gretel)
    with open(os.path.join(dest_path, "observations.txt"), "w") as f:
        f.write("{}\t{}\n".format(2*len(training_paths), 1))
        for path in training_paths:
            f.write("{}\t{}\n".format(path[0], 1.0))
            f.write("{}\t{}\n".format(path[-1], 1.0))

    # write edge sequence to file 'path.txt'
    # we need to convert the sequence of node ids to a sequence of edge ids
    with open(os.path.join(dest_path, "paths.txt"), "w") as f:
        f.write("{}\t{}\n".format(len(training_paths), max(len(path) for path in training_paths)))
        for path in training_paths:
            for i in range(0, len(path)-1):
                orig_dest = (path[i], path[i+1])
                edge_id = edge_dict[orig_dest]
                f.write("{}\t".format(edge_id))
            f.write("\n")

In [7]:
from geometry_utils import is_valid_path
for path in training_paths:
    if is_valid_path(G, path) == False:
        print(path)