In [1]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
import matplotlib.pyplot as plt
import folium
import warnings
import sys
#warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))

Geopandas has version 0.13.2


In [2]:
# read ship metadata from file
filename = '../../data/external/seilas2019.csv'
df_meta = pd.read_csv(filename, delimiter=';', decimal=',', encoding='ISO-8859-1')
# rename MMSI column
df_meta.rename(columns={'mmsi_nummer':'mmsi'}, inplace=True)
# drop duplicate MMSI's
df_meta.drop_duplicates(subset='mmsi', inplace=True)

# read ship trajectory data from file
filename = '../../data/raw/routes_all.parquet'
df_trajectories = gpd.read_parquet(filename)

In [3]:
# match MMSI columns from both dataframes with each other
n_matching = len(pd.Series(list(set(df_meta['mmsi']).intersection(set((df_trajectories['mmsi']))))))
print(f'Ship metadata has   {df_meta.mmsi.nunique()} unique MMSIs')
print(f'Trajectory file has {df_trajectories.mmsi.nunique()} unique MMSIs')
print(f'Overlap:            {n_matching} MMSIs')

Ship metadata has   4373 unique MMSIs
Trajectory file has 916 unique MMSIs
Overlap:            840 MMSIs


In [4]:
# merge dataframes on mmsi
merge_columns = ['mmsi', 'lengde', 'bredde', 'dypgaaende', 'skipstype', 'skipsgruppe', 'fartoynavn']
df = df_trajectories.merge(df_meta[merge_columns], on='mmsi', how='left')
df = gpd.GeoDataFrame(df)

In [5]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 58546 entries, 0 to 58545
Data columns (total 30 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   mmsi             58546 non-null  int64         
 1   imo              58546 non-null  float64       
 2   length           58546 non-null  float64       
 3   start_lon        58546 non-null  float64       
 4   start_lat        58546 non-null  float64       
 5   stop_lon         58546 non-null  float64       
 6   stop_lat         58546 non-null  float64       
 7   start_loc        58546 non-null  object        
 8   stop_loc         58546 non-null  object        
 9   start_geom       58546 non-null  object        
 10  stop_geom        58546 non-null  object        
 11  start_time       58546 non-null  datetime64[us]
 12  stop_time        58546 non-null  datetime64[us]
 13  cog              58546 non-null  object        
 14  avg_cog          58546 non-nul

In [10]:
df.iloc[30000]

mmsi                                                       258509000
imo                                                        9481166.0
length                                                          50.0
start_lon                                                    10.7295
start_lat                                                    59.9103
stop_lon                                                   10.729453
stop_lat                                                   59.910153
start_loc                                    POINT (10.7295 59.9103)
stop_loc                 POINT (10.72945294117647 59.91015294117647)
start_geom         0101000020E61000002FDD2406817525403FC6DCB584F4...
stop_geom          0101000020E6100000DDA51CDB7A752540075B3EE47FF4...
start_time                                       2019-12-10 22:03:21
stop_time                                        2019-12-10 22:48:21
cog                [199.1, 196.6, 228.5625, 235.16666666666666, 2...
avg_cog                           

Unnamed: 0,seilas_id,skips_id,imo_nummer,mmsi,kallesignal,fartoynavn,byggeaar,bruttotonnasje_bt,doedvekttonn_dwt,lengde,...,ankomsthavn_kode,ankomsthavn_navn,ankomsttidspunkt,land_ankomst,landkode_ankomst_totegn,fylkesnavn_ankomst,fylkesnr_ankomst,kommunenavn_ankomst,kommunenr_ankomst,lokasjonstype_ankomst
