<a href="https://colab.research.google.com/github/yaobviously/sym-cargo/blob/main/sym_cargo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install plotly-express --quiet

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.neighbors import NearestNeighbors
import plotly_express as px

In [4]:
port_file = '/content/drive/MyDrive/Ports/ports.csv'
tracking_file = '/content/drive/MyDrive/Ports/tracking.csv'

In [5]:
def wrangle():
  df1 = pd.read_csv(port_file)

  # converting lat and long to radians to compute haversine distance
  df1['lat_rad'] = np.radians(df1['lat'])
  df1['long_rad'] = np.radians(df1['long'])
  
  # rounding lat and long in port df
  df1['lat'] = df1['lat'].round(1)
  df1['long'] = df1['long'].round(1)

  df2 = pd.read_csv(tracking_file, parse_dates=['datetime'])
  
  # converting lat and long to radians to compute haversine distance  
  df2['lat_rad'] = np.radians(df2['lat'])
  df2['long_rad'] = np.radians(df2['long'])
  
  df2['month'] = df2['datetime'].dt.month
  df2['day'] = df2['datetime'].dt.day
  
  df2 = df2.sort_values(['vessel', 'datetime'])
  df2 = df2.reset_index(drop=True)

  return df1, df2

In [6]:
df1, df2 = wrangle()

In [23]:
for i in range(1, 8):
  df2[f'speed_{i}_back'] = df2.groupby('vessel')['speed'].transform(lambda x: x.shift(i))

In [29]:
for i in range(1, 8):
  df2[f'speed_{i}_ahead'] = df2.groupby('vessel')['speed'].transform(lambda x: x.shift(-i))

In [25]:
for i in range(1, 8):
  df2[f'draft_{i}_back'] = df2.groupby('vessel')['draft'].transform(lambda x: x.shift(i))

In [30]:
for i in range(1,8):
  df2[f'draft_{i}_ahead'] = df2.groupby('vessel')['draft'].transform(lambda x: x.shift(-i))

In [115]:
# creating a dictionary of ports with their lat and longs - will be used to
# continually

ports = {port:(lat, long) for port, lat, long in zip(df1['port'], df1['lat_rad'], df1['long_rad'])}

# a dictionary to retrieve the port id from the index
idx_ports = {idx:port for idx, port in zip(df1.index, df1.port)}

In [110]:
# training a nearest neighbor model to  find the closest port when the 
# conditions indicating an extended stop have occurred. the metric is haversine
# in order to compute the 'great circle' distance. so i don't forget, the model
# returns the *index* of the port, not the port's identifying label

ports_train = df1[['lat_rad', 'long_rad']]

neigh_ports = NearestNeighbors(n_neighbors=3, algorithm='ball_tree', metric='haversine')
neigh_ports.fit(ports_train)

neigh_ports.kneighbors(np.array([0.9, 1.1]).reshape(1,-1))

(array([[0.35136142, 0.43278349, 0.43782938]]), array([[110,  83,  30]]))

In [10]:
# functions to quickly inspect dataframe to figure out what's up

def get_dates(vessel=111, start='2019-01-01', end='2019-12-25'):

  df = df2[(df2.datetime >= start) & (df2.datetime <=end)]
  df = df[df['vessel'] == vessel]
  return df.sort_values(by='datetime')

def in_port():
  
  lat_range =  (-33.5 <= df2['lat']) & (df2['lat'] <= -31.5)
  long_range = (-73 <= df2['long']) & (df2['long'] <= -72.5)

  df_ = df2[lat_range & long_range]

  return df_

def where_zero(vessel = 75):
  
  df_ = df2[(df2['vessel'] == vessel) & (df2['speed'] <= 0.5)]
  
  return df_

In [None]:
# 1. label stoppages
# 2. create a function that returns nearest distance and nearest neighbor during
#    an extended stoppage
# 3. add nearest distance and predicted port to dataframe 


In [103]:
# our test dataframe - using vessel 3 for now
v3 = get_dates(vessel=3, start='2019-01-01', end='2019-12-31')

In [105]:
conditions = (v3['speed'] <= 0.3) & (v3['speed_1_ahead'] <= 0.3) & (v3['speed_2_ahead'] <= 0.3)
v3['stoppage'] = np.where(conditions, 'stopped', 'moving')

In [173]:
def nearest_port(df):
  """
  a simple function to return the port (identifier) of the nearest port in the
  dataset. 
  """

  data = np.array([df['lat_rad'], df['long_rad']]).reshape(1, -1)
  dist, pred = neigh_ports.radius_neighbors(data, radius=0.05, sort_results = True) 

  if len(dist[0]) == 0:
    return "not a port"
  
  else:
    return idx_ports[pred[0][0]]

In [172]:
def nearest_distance(df):
  """
  a simple function to return the distance of the nearest port in the dataset
  """
  
  data = np.array([df['lat_rad'], df['long_rad']]).reshape(1, -1)
  dist, pred = neigh_ports.radius_neighbors(data, radius=0.05, sort_results = True) 

  if len(dist[0]) == 0:
    return "not a port"
  
  else:
    return dist[0][0]


In [174]:
v3ports = v3.apply(nearest_port, axis=1)
v3dist = v3.apply(nearest_distance, axis=1)
v3['pred_port'] = v3ports
v3['pred_dist'] = v3dist

In [176]:
# simple mapping tool to inspect routes

fig = px.scatter_geo(v3,lat='lat',lon='long', hover_name="pred_port")

fig.update_layout(title = 'Vessel Locations', title_x=0.5)
fig.show()

In [None]:
# the dataframes to submit

voyages = pd.DataFrame(columns= ['vessel', 'begin_date', 'end_date', 'begin_port_id', 'end_port_id'])
predict = pd.DataFrame(columns= ['vessel', 'begin_port_id', 'end_port_id', 'voyage'])