In [1]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
from datetime import timedelta, datetime
from maritime_traffic_network import MaritimeTrafficNetwork
import folium
import warnings
import sys
import pickle
warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

Geopandas has version 0.13.2
Movingpandas has version 0.17.1


In [2]:
# add paths for modules
sys.path.append('../visualization')
sys.path.append('../features')
print(sys.path)

# import modules
import visualize

['/Users/janhendrikwebert/maritime_route_prediction/src/models', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python311.zip', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11/lib-dynload', '', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11/site-packages', '../visualization', '../features', '../models', '../features', '../visualization', '../features']


In [3]:
# read data from file
datasize = 'full'
location = 'tromso'
data_dates = ['202204']
filenames = []

for i in range(0, len(data_dates)):
    data_date = data_dates[i]
    # load path data from file
    filename = '../../data/processed/'+data_date+'_points_'+location+'_cleaned_meta_'+datasize+'_dualSplit_3.parquet'
    filenames.append(filename)
    gdf_part = gpd.read_parquet(filename)

    if i==0:
        gdf = gdf_part
    else:
        gdf = pd.concat([gdf, gdf_part])

data_date = str(data_dates)
filename = str(filenames)

In [4]:
# filter the data according to ship type
#shiptypes = ['Tank', 'Last', 'Passasjer', None]
#gdf = gdf[gdf.skipsgruppe.isin(shiptypes)]

In [5]:
# Transform to desired CRS
# 4326 for WGS 84 (global) // 32632 for UTM 32N (Norway)
crs = 32632  # Coordinate reference system
gdf.to_crs(crs, inplace=True)  # Transformation
gdf.head()

Unnamed: 0_level_0,mmsi,imo_nr,length,lon,lat,sog,cog,true_heading,nav_status,message_nr,bredde,dypgaaende,skipstype,skipsgruppe,fartoynavn,geometry,speed
date_time_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2022-04-01 05:28:44,210055000_0_2022-04-01 05:28:44,9448279,117,18.8522,69.592,13.2,193.5,190,0,1,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (881980.308 7751249.735),7.635761
2022-04-01 05:28:53,210055000_0_2022-04-01 05:28:44,9448279,117,18.8518,69.5914,13.2,193.6,190,0,1,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (881975.684 7751181.073),7.635761
2022-04-01 05:29:04,210055000_0_2022-04-01 05:28:44,9448279,117,18.8514,69.5908,13.2,193.8,190,0,1,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (881971.059 7751112.412),6.247449
2022-04-01 05:29:13,210055000_0_2022-04-01 05:28:44,9448279,117,18.851,69.5902,13.2,193.7,190,0,1,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (881966.433 7751043.750),7.635782
2022-04-01 05:29:23,210055000_0_2022-04-01 05:28:44,9448279,117,18.8506,69.5896,13.2,193.2,192,0,1,19.700001,6.8,Cement Carrier,Last,UBC CORK,POINT (881961.806 7750975.089),6.872213


In [6]:
# initialize maritime traffic network
network = MaritimeTrafficNetwork(gdf, crs)
network.get_trajectories_info()

Number of AIS messages: 771730
Number of trajectories: 1956
Coordinate Reference System (CRS): EPSG:32632


In [7]:
# calculate significant turning points using Douglas Peucker algorithm
tolerance = 10 # DP tolerance parameter 0.0002
network.calc_significant_points_DP(tolerance)

Calculating significant turning points with Douglas Peucker algorithm (tolerance = 10) ...
Number of significant points detected: 61214 (7.93% of AIS messages)
Time elapsed: 0.58 minutes
Adding course over ground before and after each turn ...
Done. Time elapsed: 0.22 minutes


In [8]:
# detect waypoints using spatial clustering
method = 'HDBSCAN'      # 'DBSCAN' , 'HDBSCAN', 'OPTICS'
metric = 'euclidean'  # 'euclidean', 'mahalanobis', 'haversine'
min_samples = 10
min_cluster_size = 10
eps = 0
V = np.diag([1, 1, 0.01, 0.01, 1])  # mahalanobis distance parameter matrix V = np.diag([1, 1, 0.01, 0.01, 1e-3])  seems to be good
network.calc_waypoints_clustering(method=method, min_samples=min_samples, min_cluster_size=min_cluster_size,
                                  eps=eps, metric=metric, V=V)

Calculating waypoints with HDBSCAN (min_samples = 10) ...
Distance metric: euclidean
996 clusters detected
Time elapsed: 0.23 minutes


In [9]:
# make graph from waypoints
max_distance=20
max_angle=45
network.make_graph_from_waypoints(max_distance=max_distance, max_angle=max_angle)

Constructing maritime traffic network graph from waypoints and trajectories...
Progress: 10%...20%...30%...40%...50%...60%...70%...80%...90%...Done!
------------------------
Unpruned Graph:
Number of nodes: 996 (91 isolated)
Number of edges: 3290
Network is (weakly) connected: False
------------------------
Time elapsed: 2.73 minutes


In [10]:
model = data_date+'_waypoints_DP' + str(tolerance) + '_' + method + str(min_samples) +'_'+location+'_'+datasize+'_UTM_filtered'
merge_stops = True
merge_stops_speed = 2
pruning = 1
network.merge_stop_points(max_speed=merge_stops_speed)
network.prune_graph(pruning)

Pruning...
------------------------
Pruned Graph:
Number of nodes: 996 (91 isolated)
Number of edges: 3265
------------------------


In [11]:
# save hyperparameters
params = {
    'Data':filename,
    'DP_tolerance':tolerance,
    'clustering_method':method,
    'clustering_metric':metric,
    'clustering_min_samples':min_samples,
    'clustering_min_cluster_size':min_cluster_size,
    'clustering_eps':eps,
    'clustering_metric_V':V,
    'graph_generation_max_distance':max_distance,
    'graph_generation_max_angle':max_angle,
    'merge_stops_speed':merge_stops_speed
}
network.set_hyperparameters(params)

In [12]:
# save network as pickle object
fileObj = open('../../data/interim/'+data_date+'_waypoints_DP'+str(tolerance)+'_'+method+str(min_samples)+'_'+location+'_'+datasize+'_UTM_euclidean.obj', 'wb')
pickle.dump(network, fileObj)
fileObj.close()