In [1]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
import matplotlib.pyplot as plt
import folium
import warnings
import sys
#warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))

Geopandas has version 0.13.2


In [2]:
# read ship metadata from file
filename = '../../data/external/seilas2019.csv'
df_meta = pd.read_csv(filename, delimiter=';', decimal=',', encoding='ISO-8859-1')
# rename MMSI column
df_meta.rename(columns={'mmsi_nummer':'mmsi'}, inplace=True)
# drop duplicate MMSI's
df_meta.drop_duplicates(subset='mmsi', inplace=True)

# read ship trajectory data from file
filename = '../../data/raw/routes_all.parquet'
df_trajectories = gpd.read_parquet(filename)

In [3]:
# match MMSI columns from both dataframes with each other
n_matching = len(pd.Series(list(set(df_meta['mmsi']).intersection(set((df_trajectories['mmsi']))))))
print(f'Ship metadata has   {df_meta.mmsi.nunique()} unique MMSIs')
print(f'Trajectory file has {df_trajectories.mmsi.nunique()} unique MMSIs')
print(f'Overlap:            {n_matching} MMSIs')

Ship metadata has   4373 unique MMSIs
Trajectory file has 916 unique MMSIs
Overlap:            840 MMSIs


In [4]:
# merge dataframes on mmsi
merge_columns = ['mmsi', 'lengde', 'bredde', 'dypgaaende', 'skipstype', 'skipsgruppe', 'fartoynavn']
df = df_trajectories.merge(df_meta[merge_columns], on='mmsi', how='left')
df = gpd.GeoDataFrame(df)

In [5]:
df.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 58546 entries, 0 to 58545
Data columns (total 30 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   mmsi             58546 non-null  int64         
 1   imo              58546 non-null  float64       
 2   length           58546 non-null  float64       
 3   start_lon        58546 non-null  float64       
 4   start_lat        58546 non-null  float64       
 5   stop_lon         58546 non-null  float64       
 6   stop_lat         58546 non-null  float64       
 7   start_loc        58546 non-null  object        
 8   stop_loc         58546 non-null  object        
 9   start_geom       58546 non-null  object        
 10  stop_geom        58546 non-null  object        
 11  start_time       58546 non-null  datetime64[us]
 12  stop_time        58546 non-null  datetime64[us]
 13  cog              58546 non-null  object        
 14  avg_cog          58546 non-nul

In [15]:
df.iloc[40000]

mmsi                                                       257847600
imo                                                        9769219.0
length                                                         144.0
start_lon                                                    10.6541
start_lat                                                    59.4286
stop_lon                                                     10.4927
stop_lat                                                     59.4136
start_loc                                    POINT (10.6541 59.4286)
stop_loc                                     POINT (10.4927 59.4136)
start_geom         0101000020E610000090A0F831E64E25406688635DDCB6...
stop_geom          0101000020E6100000787AA52C43FC244014D044D8F0B4...
start_time                                       2019-08-19 20:31:20
stop_time                                        2019-08-19 21:18:20
cog                [214.5, 200.03333333333333, 216.8, 216.1, 216....
avg_cog                           

In [18]:
df[df.skipsgruppe=='Passasjer']

Unnamed: 0,mmsi,imo,length,start_lon,start_lat,stop_lon,stop_lat,start_loc,stop_loc,start_geom,...,start_label,stop_label,cluster_nr,unique_route_id,lengde,bredde,dypgaaende,skipstype,skipsgruppe,fartoynavn
1,319072300,1012189.0,66.0,10.5514,59.3742,11.043038,58.973550,POINT (10.5514 59.3742),POINT (11.0430375 58.97355),0101000020E61000005F07CE19511A2540E4141DC9E5AF...,...,-1,16,-1,2,65.989998,11.60,3.50,Yacht,Passasjer,ELYSIAN
57,211232340,5221491.0,50.0,10.3862,58.9075,10.531111,59.391000,POINT (10.3862 58.9075),POINT (10.53111052631579 59.391),0101000020E61000002F6EA301BCC524405C8FC2F52874...,...,0,-1,-1,58,49.799999,6.48,2.21,Passenger Ship,Passasjer,THOR HEYERDAHL
58,211232340,5221491.0,50.0,10.5290,59.3919,10.747800,59.907100,POINT (10.529 59.3919),POINT (10.7478 59.9071),0101000020E6100000022B8716D90E2540B5A679C729B2...,...,-1,1,-1,59,49.799999,6.48,2.21,Passenger Ship,Passasjer,THOR HEYERDAHL
59,211232340,5221491.0,50.0,10.7477,59.9072,10.634300,59.488500,POINT (10.7477 59.9072),POINT (10.6343 59.4885),0101000020E61000003B70CE88D27E254032772D211FF4...,...,1,-1,-1,60,49.799999,6.48,2.21,Passenger Ship,Passasjer,THOR HEYERDAHL
60,211232340,5221491.0,50.0,10.6342,59.4888,11.006531,59.012769,POINT (10.6342 59.4888),POINT (11.006530555555557 59.012769444444444),0101000020E6100000143FC6DCB5442540742497FF90BE...,...,-1,-1,-1,61,49.799999,6.48,2.21,Passenger Ship,Passasjer,THOR HEYERDAHL
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58501,257055930,9825805.0,134.0,10.4668,59.4673,10.489513,59.466700,POINT (10.4668 59.4673),POINT (10.4895125 59.4667),0101000020E6100000AC8BDB6800EF2440DDB5847CD0BB...,...,-1,-1,-1,58502,134.000000,21.00,4.75,Passenger/Ro-Ro Ship (Vehicles),Passasjer,HUFTARØY
58502,257055930,9825805.0,134.0,10.4801,59.4274,10.502581,59.457440,POINT (10.4801 59.4274),POINT (10.502580898876404 59.4574404494382),0101000020E6100000849ECDAACFF52440BB270F0BB5B6...,...,3,-1,-1,58503,134.000000,21.00,4.75,Passenger/Ro-Ro Ship (Vehicles),Passasjer,HUFTARØY
58503,257055930,9825805.0,134.0,10.5024,59.4561,10.480500,59.427400,POINT (10.5024 59.4561),POINT (10.4805 59.4274),0101000020E61000005305A3923A01254051DA1B7C61BA...,...,-1,3,-1,58504,134.000000,21.00,4.75,Passenger/Ro-Ro Ship (Vehicles),Passasjer,HUFTARØY
58504,257055930,9825805.0,134.0,10.4807,59.4275,10.480646,59.427400,POINT (10.4807 59.4275),POINT (10.480645833333334 59.4274),0101000020E6100000D95F764F1EF624401F85EB51B8B6...,...,3,3,-1,58505,134.000000,21.00,4.75,Passenger/Ro-Ro Ship (Vehicles),Passasjer,HUFTARØY
