In [None]:
'''
This notebook is for exploratory data analysis of the processed AIS data
'''

In [None]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
from datetime import timedelta, datetime
import folium
import warnings
import sys
warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

In [None]:
# read data from file
filename = '../data/processed/202204_points_stavanger_cleaned_meta_full_dualSplit_2.parquet'
gdf = gpd.read_parquet(filename)

# Convert to UTM coordinate reference system
crs = 32632  # Coordinate reference system
gdf.to_crs(crs, inplace=True)  # Transformation
gdf.head()

In [None]:
# convert to Trajectory Collection
trajectories = mpd.TrajectoryCollection(gdf, traj_id_col='mmsi', obj_id_col='mmsi')

print(f'Loaded dataset: {filename}')
print(f'AIS messages: {len(gdf)}')
print(f'Trajectories: {len(trajectories)}')

In [None]:
# compute sampling interval statistics
sampling_intervals = []
for trajectory in trajectories:
    sampling_intervals.append(trajectory.get_sampling_interval().total_seconds())
print(f'Median sampling interval of all trajectories: {np.median(np.array(sampling_intervals))} seconds')
print(f'Mean sampling interval of all trajectories: {np.mean(np.array(sampling_intervals))} seconds')
print(f'Max sampling interval of all trajectories: {np.max(np.array(sampling_intervals))} seconds')

In [None]:
# Douglas Peucker trajectory generalization to reduce the number of trajectory points (for plotting purposes)
simplified_trajectories = mpd.DouglasPeuckerGeneralizer(trajectories).generalize(tolerance=10)
n_points, n_DP_points = len(gdf), len(simplified_trajectories.to_point_gdf())
print(f'DP reduced {n_points} AIS messages to {n_DP_points} points ({n_DP_points/n_points*100:.2f}%)')

In [None]:
# plot n random trajectories against the DP simplified trajectories
plot_comparison = True
if plot_comparison:
    n_trajectories = 5  # -1 selects all trajectories
    columns = ['mmsi', 'geometry']  # columns to be plotted
    selection = np.random.randint(0, high=len(trajectories), size=n_trajectories)
    mmsis = gdf.mmsi.unique()[selection]
    trajs = trajectories.filter('mmsi', mmsis.tolist())
    simplified_trajs = simplified_trajectories.filter('mmsi', mmsis.tolist())
    
    map = trajs.to_traj_gdf()[columns].explore(cmap='jet', column='mmsi', name='Trajectories', style_kwds={'weight':5})
    messages = trajs.to_point_gdf()
    messages.reset_index(inplace = True)
    #messages = messages[messages.mmsi.isin(mmsis)]
    map = messages[columns].explore(m=map, cmap='jet', column='mmsi', name='AIS messages', marker_kwds={'radius':6, 'opacity':1})
    
    map = simplified_trajs.to_traj_gdf()[columns].explore(m=map, cmap='jet', column='mmsi', name='Simplified trajectories', style_kwds={'weight':5})
    messages = simplified_trajs.to_point_gdf()
    messages.reset_index(inplace = True)
    #messages = messages[messages.mmsi.isin(mmsis)]
    map = messages[columns].explore(m=map, cmap='jet', column='mmsi', name='Significant Points', marker_kwds={'radius':6, 'opacity':1})
    folium.LayerControl().add_to(map)
map

In [None]:
map.save('../reports/maps/rawAIS_and_DP.html')