In [None]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
from datetime import timedelta, datetime
import folium
import warnings
import sys
warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

In [None]:
# add paths for modules
sys.path.append('../visualization')
print(sys.path)

# import modules
import visualize

In [None]:
# read data from file
# filename = '../../data/processed/202204_points_stavanger_cleaned_500k.parquet'
filename = '../../data/processed/202204_points_stavanger_cleaned_full.parquet'
gdf = gpd.read_parquet(filename)
gdf.head()

In [None]:
# convert to Trajectory Collection
trajectories = mpd.TrajectoryCollection(gdf, traj_id_col='mmsi', obj_id_col='mmsi')

In [None]:
# Douglas Peucker trajectory generalization to reduce the number of trajectory points (for plotting purposes)
tolerance = 0.0005  # the smaller the tolerance, the more detailed the trajectories
simplified_trajectories = mpd.DouglasPeuckerGeneralizer(trajectories).generalize(tolerance=tolerance)
n_points, n_DP_points = len(gdf), len(simplified_trajectories.to_point_gdf())
print(f'DP reduced {n_points} AIS messages to {n_DP_points} points ({n_DP_points/n_points*100:.2f}%)')

In [None]:
# plot n random trajectories against the DP simplified trajectories
plot_comparison = False
if plot_comparison:
    n_trajectories = 5  # -1 selects all trajectories
    columns = ['mmsi', 'geometry']  # columns to be plotted
    selection = np.random.randint(0, high=len(trajectories), size=n_trajectories)
    map = trajectories.to_traj_gdf()[columns].iloc[selection].explore(cmap='jet', column='mmsi', name='Trajectories', style_kwds={'opacity':0.5, 'weight':1})
    map = simplified_trajectories.to_traj_gdf()[columns].iloc[selection].explore(m=map, cmap='jet', column='mmsi', name='Simplified trajectories')
    folium.LayerControl().add_to(map)
    map

In [None]:
# detect waypoints
waypoints = mpd.DouglasPeuckerGeneralizer(trajectories).generalize(tolerance=0.001)
print(f'Number of waypoints detected: {len(waypoints.to_point_gdf())}')

In [None]:
df_waypoints = waypoints.to_point_gdf()[['lat', 'lon']]

In [None]:
################
### Apply DBSCAN
################
from sklearn.cluster import DBSCAN
# DBSCAN parameters
eps = 0.001
min_samples = 10
# 
clustering = DBSCAN(eps=eps, min_samples=min_samples).fit(df_waypoints)

# compute cluster centroids
cluster_centroids = pd.DataFrame(columns=['clusterID', 'lat', 'lon'])
for i in range(0, max(clustering.labels_)+1):
    lat = df_waypoints[clustering.labels_ == i].lat.mean()
    lon = df_waypoints[clustering.labels_ == i].lon.mean()
    centroid = pd.DataFrame([[i, lat, lon]], columns=['clusterID', 'lat', 'lon'])
    cluster_centroids = pd.concat([cluster_centroids, centroid])

df_waypoints['clusterID'] = clustering.labels_  # assign clusterID to each waypoint

# convert waypoint and cluster centroid DataFrames to GeoDataFrames
df_waypoints = gpd.GeoDataFrame(df_waypoints, geometry=gpd.points_from_xy(df_waypoints.lon, df_waypoints.lat), crs="EPSG:4326")
df_waypoints.reset_index(inplace=True)
cluster_centroids = gpd.GeoDataFrame(cluster_centroids, geometry=gpd.points_from_xy(cluster_centroids.lon, cluster_centroids.lat), crs="EPSG:4326")
print(f'{len(cluster_centroids)} clusters detected')

In [None]:
# plotting
n_trajectories = -1  # -1 selects all trajectories
if n_trajectories > 0:
    selection = np.random.randint(0, high=len(trajectories), size=n_trajectories)
else:
    selection = range(0,len(trajectories))

columns = ['geometry', 'mmsi']  # columns to be plotted
map = simplified_trajectories.to_traj_gdf()[columns].iloc[selection].explore(column='mmsi', name='Simplified trajectories', 
                                                                             style_kwds={'weight':1, 'color':'black', 'opacity':0.5}, 
                                                                             legend=False)
map = df_waypoints[['clusterID', 'geometry']].explore(m=map, name='all waypoints with cluster ID', legend=False,
                                                      marker_kwds={'radius':2},
                                                      style_kwds={'opacity':0.2})
map = cluster_centroids[['clusterID', 'geometry']].explore(m=map, name='cluster centroids', legend=False,
                                                           marker_kwds={'radius':3},
                                                           style_kwds={'color':'red', 'fillColor':'red', 'fillOpacity':1})
folium.LayerControl().add_to(map)
map