In [1]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
from hvplot import pandas
from datetime import timedelta, datetime
import folium
import warnings
import sys
warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

Geopandas has version 0.13.2
Movingpandas has version 0.17.1


In [2]:
# add paths for modules
sys.path.append('../visualization')
print(sys.path)

# import modules
import visualize

['/Users/janhendrikwebert/maritime_route_prediction/src/datawrangling', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python311.zip', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11/lib-dynload', '', '/Users/janhendrikwebert/miniforge3/envs/env_geo/lib/python3.11/site-packages', '../visualization']


In [3]:
# read data from file
filename = '../../data/raw/AIS_04-09_2022/ais_202204.csv'
df = pd.read_csv(filename, delimiter=';', decimal='.')

# convert to geopandas df
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.lon, df.lat), crs="EPSG:4326")
df = []  # free memory

# drop duplicate AIS data (reported by multiple stations)
gdf.drop_duplicates(subset = ['mmsi', 'lat', 'lon'],
                    keep = 'last', inplace=True)

In [4]:
# convert to trajectories
size = 500000
trajectories = mpd.TrajectoryCollection(gdf.iloc[0:size], traj_id_col='mmsi', 
                                        obj_id_col='mmsi', t='date_time_utc')

In [5]:
# add a trajectory splitter
split_trajectories = mpd.ObservationGapSplitter(trajectories).split(gap=timedelta(minutes=10), min_length=100)
print(f'Trajectory splitter split {len(trajectories)} trajectories into {len(split_trajectories)} sub-trajectories')

Trajectory splitter split 221 trajectories into 1030 sub-trajectories


In [6]:
# Douglas Peucker trajectory generalization to reduce the number of AIS messages
tolerance = 0.0005  # the smaller the tolerance, the more detailed the trajectories
simplified_trajectories = mpd.DouglasPeuckerGeneralizer(split_trajectories).generalize(tolerance=tolerance)
n_AIS, n_DP_points = size, len(simplified_trajectories.to_point_gdf())
print(f'DP reduced {n_AIS} AIS messages to {n_DP_points} points ({n_DP_points/n_AIS*100:.2f}%)')

DP reduced 500000 AIS messages to 15299 points (3.06%)


In [7]:
simplified_trajectories.to_traj_gdf().info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   mmsi       1030 non-null   object        
 1   start_t    1030 non-null   datetime64[ns]
 2   end_t      1030 non-null   datetime64[ns]
 3   geometry   1030 non-null   geometry      
 4   length     1030 non-null   float64       
 5   direction  1030 non-null   float64       
dtypes: datetime64[ns](2), float64(2), geometry(1), object(1)
memory usage: 48.4+ KB


In [8]:
simplified_trajectories.to_traj_gdf()[['mmsi', 'geometry']].explore(cmap='jet', column='mmsi', name='DP simplified trajectories')

In [9]:
no = 258315000
print(gdf[gdf.mmsi==no])
traj = trajectories.get_trajectory(no)
traj.to_point_gdf().hvplot(geo=True, tiles='OSM', width=700, height=700)

               mmsi   imo_nr  length        date_time_utc      lon      lat  \
336985    258315000  6514895      51  2022-04-01 00:00:13  6.02349  59.3640   
336991    258315000  6514895      51  2022-04-01 00:01:17  6.13929  58.8628   
336992    258315000  6514895      51  2022-04-01 00:01:36  6.13799  58.8629   
336993    258315000  6514895      51  2022-04-01 00:01:47  6.13714  58.8630   
336994    258315000  6514895      51  2022-04-01 00:01:56  6.13649  58.8631   
...             ...      ...     ...                  ...      ...      ...   
13485084  258315000  6514895      51  2022-04-29 01:03:58  5.37769  59.6721   
13485085  258315000  6514895      51  2022-04-29 01:04:09  5.37813  59.6723   
13485086  258315000  6514895      51  2022-04-29 01:04:19  5.37853  59.6726   
13485087  258315000  6514895      51  2022-04-29 01:04:28  5.37894  59.6728   
13485088  258315000  6514895      51  2022-04-29 01:04:38  5.37934  59.6731   

          sog    cog  true_heading  nav_status  mes