In [None]:
'''
This notebook is for exploratory data analysis of a trajectory file from Brian
These trajectories have been pre-processed and are not used for maritime traffic network modelling
'''

In [None]:
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import numpy as np
import matplotlib.pyplot as plt
import folium
import warnings
import sys
#warnings.filterwarnings('ignore')

print("Geopandas has version {}".format(gpd.__version__))
print("Movingpandas has version {}".format(mpd.__version__))

In [None]:
# add paths for modules
sys.path.append('../src/visualization')
print(sys.path)

# import modules
import visualize

In [None]:
# read data from file
filename = '../data/raw/routes_all.parquet'
df = gpd.read_parquet(filename)

In [None]:
# print information about loaded data
print(df.info())

In [None]:
# print first 5 lines
df.head()

In [None]:
# some dataset characteristics
columns = ['mmsi', 'imo', 'start_label', 'stop_label', 'cluster_nr', 'unique_route_id']
for column in columns:
    print(f'{df[column].nunique()} unique {column}')

# print start and stop labels
print(sorted(df['start_label'].unique()))
print(sorted(df['stop_label'].unique()))

# plot ditribution of mmsi
plt.subplot(1, 1, 1)
df.mmsi.hist(bins=df.mmsi.nunique())
plt.title('Distribution of MMSIs')
plt.show()

In [None]:
# drop superfluous geolocation columns to enable easy plotting
df.drop(['start_loc', 'stop_loc', 'loc', 'start_geom', 'stop_geom'], axis=1, inplace=True)

In [None]:
# plot data (n random trajectories)
n_trajectories = 10000  # -1 selects all trajectories
columns = ['mmsi', 'imo', 'start_label', 'stop_label', 'cluster_nr', 'unique_route_id', 'line_geom', 'length']
if n_trajectories > 0:
    selection = np.random.randint(0, high=len(df), size=n_trajectories)
    map = df[columns].iloc[selection].explore(cmap='jet', column='start_label', name='trajectories')
else:
    map = df[columns].explore(cmap='jet', column='start_label', name='trajectories')

In [None]:
# get a bounding box of the geographic area
bbox = visualize.get_bounding_box(df)
map = bbox.explore(m=map, color='red', name='bounds')
folium.LayerControl().add_to(map)
map