In [1]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
from hvplot import pandas
from datetime import timedelta, datetime
import folium
import warnings
import sys
warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

Geopandas has version 0.13.2
Movingpandas has version 0.17.1


In [2]:
# add paths for modules
sys.path.append('../visualization')
print(sys.path)

# import modules
import visualize

['/Users/janhendrikwebert/maritime_route_prediction/src/datawrangling', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python311.zip', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11/lib-dynload', '', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11/site-packages', '../visualization']


In [3]:
# read data from file
filename = '../../data/raw/AIS_04-09_2022/ais_202204.csv'
df = pd.read_csv(filename, delimiter=';', decimal='.')

# convert to geopandas df
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs="EPSG:4326")
df = []  # free memory

# drop duplicate AIS data (reported by multiple stations)
gdf.drop_duplicates(subset = ['mmsi', 'lat', 'lon'],
                    keep = 'last', inplace=True)

In [4]:
# convert to trajectories
size = 100000
trajectories = mpd.TrajectoryCollection(gdf.iloc[0:size], traj_id_col='mmsi', 
                                        obj_id_col='mmsi', t='date_time_utc')

In [5]:
# add a trajectory splitter
split_trajectories = mpd.ObservationGapSplitter(trajectories).split(gap=timedelta(minutes=30), min_length=100)
print(f'Trajectory splitter split {len(trajectories)} trajectories into {len(split_trajectories)} sub-trajectories')

Trajectory splitter split 78 trajectories into 99 sub-trajectories


In [6]:
# Douglas Peucker trajectory generalization to reduce the number of AIS messages
tolerance = 0.0005  # the smaller the tolerance, the more detailed the trajectories
simplified_trajectories = mpd.DouglasPeuckerGeneralizer(split_trajectories).generalize(tolerance=tolerance)
n_AIS, n_DP_points = len(gdf), len(simplified_trajectories.to_point_gdf())
print(f'DP reduced {n_AIS} AIS messages to {n_DP_points} points ({n_DP_points/n_AIS*100:.2f}%)')

DP reduced 6016737 AIS messages to 2644 points (0.04%)


In [7]:
simplified_trajectories.to_line_gdf().hvplot(geo=True, c='mmsi', tiles='OSM', width=1000, height=1000, cmap='jet')

In [8]:
no = 257201000
print(gdf[gdf.mmsi==no])
traj = trajectories.get_trajectory(no)
traj.to_point_gdf().hvplot(geo=True, tiles='OSM', width=700, height=700)

               mmsi   imo_nr  length        date_time_utc      lon      lat  \
149372    257201000  7319072      50  2022-04-01 02:20:58  5.56179  59.5774   
149373    257201000  7319072      50  2022-04-01 02:21:18  5.56180  59.5774   
149381    257201000  7319072      50  2022-04-01 02:22:47  5.56186  59.5775   
149543    257201000  7319072      50  2022-04-01 02:55:37  5.56160  59.5776   
149587    257201000  7319072      50  2022-04-01 03:04:57  5.56176  59.5774   
...             ...      ...     ...                  ...      ...      ...   
13729428  257201000  7319072      50  2022-04-30 23:58:28  5.71038  58.9831   
13729429  257201000  7319072      50  2022-04-30 23:58:37  5.71039  58.9831   
13729430  257201000  7319072      50  2022-04-30 23:58:58  5.71042  58.9831   
13729433  257201000  7319072      50  2022-04-30 23:59:28  5.71045  58.9831   
13729434  257201000  7319072      50  2022-04-30 23:59:37  5.71046  58.9831   

          sog    cog  true_heading  nav_status  mes