In [1]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
from datetime import timedelta, datetime
from maritime_traffic_network import MaritimeTrafficNetwork
import folium
import warnings
import sys
import pickle
warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

Geopandas has version 0.13.2
Movingpandas has version 0.17.1


In [2]:
# add paths for modules
sys.path.append('../visualization')
sys.path.append('../features')
print(sys.path)

# import modules
import visualize

['/Users/janhendrikwebert/maritime_route_prediction/src/models', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python311.zip', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11/lib-dynload', '', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11/site-packages', '../visualization', '../features', '../visualization', '../features']


In [3]:
# read data from file
datasize = 'full'
location = 'tromso'
data_date = '202204'
eval_date = '202205'
filename = '../../data/processed/'+data_date+'_points_'+location+'_cleaned_meta_'+datasize+'_dualSplit_2.parquet'
#filename = '../../data/processed/202204_points_stavanger_cleaned_meta_full.parquet'
gdf = gpd.read_parquet(filename)

In [4]:
# filter the data according to ship type
#shiptypes = ['Tank', 'Last', 'Passasjer', None]
#gdf = gdf[gdf.skipsgruppe.isin(shiptypes)]

In [5]:
# Transform to desired CRS
# 4326 for WGS 84 (global) // 32632 for UTM 32N (Norway)
crs = 32632  # Coordinate reference system
gdf.to_crs(crs, inplace=True)  # Transformation
gdf.head()

Unnamed: 0_level_0,mmsi,imo_nr,length,lon,lat,sog,cog,true_heading,nav_status,message_nr,bredde,dypgaaende,skipstype,skipsgruppe,fartoynavn,geometry,speed
date_time_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-04-01 04:02:37,210055000_4_2022-04-01 04:02:37,9448279,117,18.9728,69.6592,0.7,45.7,313,5,3,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (885399.291 7759417.052),0.79562
2022-04-01 04:05:06,210055000_4_2022-04-01 04:02:37,9448279,117,18.9758,69.659,3.1,113.0,295,5,3,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (885517.967 7759413.977),0.79562
2022-04-01 04:05:17,210055000_4_2022-04-01 04:02:37,9448279,117,18.9762,69.6589,3.4,112.3,295,5,3,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (885535.123 7759405.484),1.737872
2022-04-01 04:05:27,210055000_4_2022-04-01 04:02:37,9448279,117,18.9767,69.6588,3.6,111.5,294,5,3,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (885556.115 7759397.623),2.238317
2022-04-01 04:05:38,210055000_4_2022-04-01 04:02:37,9448279,117,18.9773,69.6588,3.9,111.8,293,5,3,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (885579.123 7759401.419),2.116917


In [6]:
# initialize maritime traffic network
network = MaritimeTrafficNetwork(gdf, crs)
network.get_trajectories_info()

Number of AIS messages: 796177
Number of trajectories: 2119
Coordinate Reference System (CRS): EPSG:32632


In [7]:
# calculate significant turning points using Douglas Peucker algorithm
tolerance = 10 # DP tolerance parameter 0.0002
network.calc_significant_points_DP(tolerance)

Calculating significant turning points with Douglas Peucker algorithm (tolerance = 10) ...
Number of significant points detected: 64537 (8.11% of AIS messages)
Time elapsed: 0.62 minutes
Adding course over ground before and after each turn ...
Done. Time elapsed: 0.25 minutes


In [8]:
# detect waypoints using spatial clustering
method = 'HDBSCAN'      # 'DBSCAN' , 'HDBSCAN', 'OPTICS'
metric = 'mahalanobis'  # 'euclidean', 'mahalanobis', 'haversine'
min_samples = 13
min_cluster_size = 13
eps = 0
V = np.diag([1, 1, 0.01, 0.01, 1])  # mahalanobis distance parameter matrix V = np.diag([1, 1, 0.01, 0.01, 1e-3])  seems to be good
network.calc_waypoints_clustering(method=method, min_samples=min_samples, min_cluster_size=min_cluster_size,
                                  eps=eps, metric=metric, V=V)

Calculating waypoints with HDBSCAN (min_samples = 13) ...
Distance metric: mahalanobis
541 clusters detected
Time elapsed: 0.72 minutes


In [9]:
# make graph from waypoints
max_distance=10
max_angle=45
network.make_graph_from_waypoints(max_distance=max_distance, max_angle=max_angle)

Constructing maritime traffic network graph from waypoints and trajectories...
Progress: 10%...20%...30%...40%...50%...60%...70%...80%...90%...Done!
------------------------
Unpruned Graph:
Number of nodes: 541 (1 isolated)
Number of edges: 2209
Network is (weakly) connected: False
------------------------
Time elapsed: 2.79 minutes


In [10]:
model = data_date+'_waypoints_DP' + str(tolerance) + '_' + method + str(min_samples) +'_'+location+'_'+datasize+'_UTM_filtered'
merge_stops = True
merge_stops_speed = 2
pruning = 1
network.merge_stop_points(max_speed=merge_stops_speed)
network.prune_graph(pruning)

Pruning...
------------------------
Pruned Graph:
Number of nodes: 541 (1 isolated)
Number of edges: 2137
------------------------


In [11]:
# save hyperparameters
params = {
    'Data':filename,
    'DP_tolerance':tolerance,
    'clustering_method':method,
    'clustering_metric':metric,
    'clustering_min_samples':min_samples,
    'clustering_min_cluster_size':min_cluster_size,
    'clustering_eps':eps,
    'clustering_metric_V':V,
    'graph_generation_max_distance':max_distance,
    'graph_generation_max_angle':max_angle
}
network.set_hyperparameters(params)

In [12]:
# save network as pickle object
fileObj = open('../../data/interim/'+data_date+'_waypoints_DP'+str(tolerance)+'_HDBSCAN'+str(min_samples)+'_'+location+'_'+datasize+'_UTM.obj', 'wb')
pickle.dump(network, fileObj)
fileObj.close()