# Pathfinding Model
Objectives:
- build a pathfinding model wrapped inside a function called `find_shortest_path`
- this function should accept the user's starting and ending geocoordinates along with the current time
- data has been preprocessed in a previous notebook and filtered only include regular weekday service; ie. stop times are for any given Monday to Friday

---
## Import Modules & Data

In [1]:
import pandas as pd
from datetime import timedelta as td
import time

In [2]:
schedule = pd.read_feather('data/model/schedule.ftr')
stop_times = pd.read_feather('data/model/stop_times.ftr')
trips = pd.read_feather('data/model/trips.ftr')
stops = pd.read_feather('data/model/stops.ftr')

In [3]:
schedule.head(1)

Unnamed: 0,trip_id,route_short_name,route_long_name,shape_id,trip_headsign,stop_sequence,stop_time,stop_time_delta,stop_id,stop_code,stop_name,stop_lat,stop_lon
0,42990004,10,VAN HORNE,886387,EAST - 10 VAN HORNE towards VICTORIA PARK,1,7:00:29,0 days 07:00:29,14155,14633,Don Mills Station,43.776222,-79.347048


In [4]:
stop_times.head(1)

Unnamed: 0,trip_id,stop_sequence,stop_id,stop_time,stop_time_delta
0,42990004,1,14155,7:00:29,0 days 07:00:29


In [5]:
trips.head(1)

Unnamed: 0,trip_id,shape_id,route_short_name,route_long_name,trip_headsign
0,42990004,886387,10,VAN HORNE,EAST - 10 VAN HORNE towards VICTORIA PARK


In [6]:
stops.head(1)

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon
0,262,662,Danforth Rd at Kennedy Rd,43.714379,-79.260939


---
# Create Helper Tables

In [7]:
# reference table to capture the unique relationship between shape_id & stop_sequence with stop_id
shapes = schedule[['shape_id', 'stop_sequence', 'stop_id']].drop_duplicates().sort_values(by = ['shape_id', 'stop_sequence']).reset_index(drop = True)
shapes.head(3)

Unnamed: 0,shape_id,stop_sequence,stop_id
0,886387,1,14155
1,886387,2,3807
2,886387,3,6904


---
## Define Helper Functions

In [8]:
def find_closest_stop_id(input_lat, input_lon):
    stop_distance = stops.loc[:, ['stop_id', 'stop_lat', 'stop_lon']]
    stop_distance['distance'] = ( abs(input_lat - stop_distance['stop_lat'])**2 + abs(input_lon - stop_distance['stop_lon'])**2 )**(1/2)
    closest_stop_id = stop_distance.sort_values(by = 'distance').stop_id.iloc[0]
    return closest_stop_id

In [9]:
def find_closest_stops(input_lat, input_lon):
    stop_distance = stops.loc[:, ['stop_id', 'stop_lat', 'stop_lon']]
    stop_distance['distance'] = ( abs(input_lat - stop_distance['stop_lat'])**2 + abs(input_lon - stop_distance['stop_lon'])**2 )**(1/2)
    stop_distance = stop_distance.sort_values(by = 'distance').reset_index(drop = True)
    return stop_distance

In [10]:
def walking_speed_estimate():
    stop_id_A = 917
    stop_id_B = 9946
    time_in_minutes = 20
    stop_A_lat = stops.loc[stops.stop_id == stop_id_A, 'stop_lat'].values[0]
    stop_A_lon = stops.loc[stops.stop_id == stop_id_A, 'stop_lon'].values[0]
    stop_B_lat = stops.loc[stops.stop_id == stop_id_B, 'stop_lat'].values[0]
    stop_B_lon = stops.loc[stops.stop_id == stop_id_B, 'stop_lon'].values[0]
    distance = abs(stop_B_lat - stop_A_lat) + abs(stop_B_lon - stop_A_lon)
    walking_speed_in_seconds = distance / (time_in_minutes * 60)
    return walking_speed_in_seconds

In [11]:
def build_shortest_path_table(start_stop_id, current_time_delta):
    start_stop_lat = stops.loc[stops.stop_id == start_stop_id, 'stop_lat'].values[0]
    start_stop_lon = stops.loc[stops.stop_id == start_stop_id, 'stop_lon'].values[0]
    shortest_path = stops.copy()
    walking_distance = abs(shortest_path['stop_lat'] - start_stop_lat) + abs(shortest_path['stop_lon'] - start_stop_lon)
    walking_speed = walking_speed_estimate()
    walking_time = round(walking_distance / walking_speed, 0)
    shortest_path['arrival_time_delta'] = current_time_delta + pd.to_timedelta(walking_time, 'seconds')
    shortest_path = shortest_path.sort_values(by = 'arrival_time_delta').reset_index(drop = True)
    shortest_path['previous_stop'] = start_stop_id
    shortest_path['previous_mode'] = 'W'
    shortest_path['trip_id'] = None
    shortest_path['visited'] = False
    return shortest_path

In [12]:
def build_stop_schedule(start_stop_id, current_time_delta):
    stop_schedule = schedule.copy()
    stop_schedule = stop_schedule.loc[stop_schedule.stop_id == start_stop_id]
    stop_schedule['next_day'] = False
    stop_schedule.loc[stop_schedule.stop_time_delta < current_time_delta, 'next_day'] = True
    stop_schedule.loc[stop_schedule.stop_time_delta < current_time_delta, 'stop_time_delta'] = stop_schedule.loc[stop_schedule.stop_time_delta < current_time_delta, 'stop_time_delta'] + td(days = 1)
    stop_schedule = stop_schedule.sort_values(by = 'stop_time_delta').reset_index(drop = True)
    stop_schedule = stop_schedule.drop_duplicates(subset = 'shape_id', keep = 'first', ignore_index = True)
    return stop_schedule

In [13]:
def update_shortest_path(shortest_path, stop_schedule, current_time_delta, visiting_stop_id):
    
    upcoming_trip_stops = []
    for i in range(0, len(stop_schedule)):
        current_trip_id = stop_schedule.loc[i, 'trip_id']
        current_stop_sequence = stop_schedule.loc[i, 'stop_sequence']
        next_day = stop_schedule.loc[i, 'next_day']
        next_stops = schedule.loc[(schedule.trip_id == current_trip_id) & (schedule.stop_sequence > current_stop_sequence)].copy()
        if (next_day):
            next_stops.stop_time_delta = next_stops.stop_time_delta + td(days = 1)        
        upcoming_trip_stops.append(next_stops)
    upcoming_trip_stops = pd.concat(upcoming_trip_stops)
    upcoming_trip_stops = upcoming_trip_stops.sort_values(by = 'stop_time_delta').drop_duplicates(subset = 'stop_id', keep = 'first').reset_index(drop = True)
    
    for i in range(0, len(upcoming_trip_stops)):
        current_stop_id = upcoming_trip_stops.loc[i, 'stop_id']
        current_stop_time_delta = upcoming_trip_stops.loc[i, 'stop_time_delta']
        current_trip_id = upcoming_trip_stops.loc[i, 'trip_id']
        current_arrival_time = shortest_path.loc[shortest_path.stop_id == current_stop_id, 'arrival_time_delta'].values[0]
        
        if (current_arrival_time > current_stop_time_delta):
            shortest_path.loc[shortest_path.stop_id == current_stop_id, 'arrival_time_delta'] = current_stop_time_delta
            shortest_path.loc[shortest_path.stop_id == current_stop_id, 'previous_stop'] = visiting_stop_id
            shortest_path.loc[shortest_path.stop_id == current_stop_id, 'previous_mode'] = 'T'
            shortest_path.loc[shortest_path.stop_id == current_stop_id, 'trip_id'] = current_trip_id
            
    shortest_path = shortest_path.sort_values(by = 'arrival_time_delta').reset_index(drop = True)
    shortest_path.loc[shortest_path.stop_id == visiting_stop_id, 'visited'] = True
    
    return shortest_path

In [14]:
def update_walking_path(shortest_path, current_time_delta, visiting_stop_id):
    visiting_stop_lat = stops.loc[stops.stop_id == visiting_stop_id, 'stop_lat'].values[0]
    visiting_stop_lon = stops.loc[stops.stop_id == visiting_stop_id, 'stop_lon'].values[0]
    walking_distance = abs(shortest_path['stop_lat'] - visiting_stop_lat) + abs(shortest_path['stop_lon'] - visiting_stop_lon)
    walking_speed = walking_speed_estimate()
    walking_time = round(walking_distance / walking_speed, 0)
    shortest_path['walking_arrival_time_delta'] = current_time_delta + pd.to_timedelta(walking_time, 'seconds')
    
    mask = shortest_path.arrival_time_delta > shortest_path.walking_arrival_time_delta
    shortest_path.loc[mask, 'arrival_time_delta'] = shortest_path.loc[mask, 'walking_arrival_time_delta']
    shortest_path.loc[mask, 'previous_stop'] = visiting_stop_id
    shortest_path.loc[mask, 'previous_mode'] = 'W'
    shortest_path.loc[mask, 'trip_id'] = None
        
    
#     for i in range(0, len(shortest_path)):
        
#         arrival_time_delta = shortest_path.loc[i, 'arrival_time_delta']
#         walking_arrival_time_delta = shortest_path.loc[i, 'walking_arrival_time_delta']
#         if (arrival_time_delta > walking_arrival_time_delta):
#             shortest_path.loc[i, 'arrival_time_delta'] = walking_arrival_time_delta
#             shortest_path.loc[i, 'previous_stop'] = visiting_stop_id
#             shortest_path.loc[i, 'previous_mode'] = 'W'
#             shortest_path.loc[i, 'trip_id'] = None

    
    shortest_path = shortest_path.sort_values(by = 'arrival_time_delta').reset_index(drop = True)
    shortest_path = shortest_path.drop(columns = 'walking_arrival_time_delta')
    
    return shortest_path

---
## Define Master Function

In [29]:
def find_shortest_path(start_lat, start_lon, end_lat, end_lon, start_time_delta):
    
    time_start = time.perf_counter()
    #print('start', time.perf_counter() - time_start, 'seconds')
    
    start_stop_id = find_closest_stop_id(start_lat, start_lon)
    #print('find_closest_stop_id', time.perf_counter() - time_start, 'seconds')
    
    shortest_path = build_shortest_path_table(start_stop_id, start_time_delta)
    #print('build_shortest_path_table', time.perf_counter() - time_start, 'seconds')
    #print('---')
    #print()
    #print()
    
    for i in range(0, len(shortest_path)):
               
        if (i < 20):
            pass
        else:
            break
        
        time_loop = time.perf_counter()
        #print('loop_start', time.perf_counter() - time_start, 'seconds')
        
        next_stop_record = shortest_path.loc[shortest_path.visited == False].iloc[0]
        current_stop_id = next_stop_record.stop_id
        current_time_delta = next_stop_record.arrival_time_delta
        previous_mode = next_stop_record.previous_mode
        #print('loop_values', i, current_stop_id, current_time_delta, previous_mode)
        
        #print('loop_setup', time.perf_counter() - time_loop, 'seconds')
        
        if (previous_mode == 'T'):
            shortest_path = update_walking_path(shortest_path, current_time_delta, current_stop_id)
            #print('*')
            #print('update_walking_path', time.perf_counter() - time_loop, 'seconds')
            
            pass
            # insert logic for calculating walking time to other stops
            # update shortest_path if walking time < current arrival time
        stop_schedule = build_stop_schedule(current_stop_id, current_time_delta)
        #print('build_stop_schedule', time.perf_counter() - time_loop, 'seconds')

        shortest_path = update_shortest_path(shortest_path, stop_schedule, current_time_delta, current_stop_id)
        #print('update_shortest_path', time.perf_counter() - time_loop, 'seconds')
        #print()
        #print()

    #print('finish', time.perf_counter() - time_start, 'seconds')
    
    return shortest_path

---
## Test Functions

In [16]:
home = [43.76008911645013, -79.33181124795766]
longos = [43.75447805630398, -79.35689569243047]
gonoe = [43.7459232592541, -79.34612864369309]
current_time_delta = td(hours = 19, minutes = 0, seconds = 0)

In [30]:
%%time

output = find_shortest_path(home[0], home[1], longos[0], longos[1], current_time_delta)

CPU times: user 2.84 s, sys: 836 ms, total: 3.68 s
Wall time: 3.04 s


In [18]:
output

NameError: name 'output' is not defined

In [None]:
output.to_feather('data/export/output.ftr')

In [None]:
find_closest_stop_id(gonoe[0], gonoe[1])

In [None]:
output

In [None]:
output.query('stop_id == 917')

In [None]:
schedule.query('(stop_id == 9834 | stop_id == 917) & trip_id == 43027160')

In [None]:
output.query('stop_id == 9834')

In [None]:
output.query('stop_id == 4590')

In [None]:
schedule.query('(stop_id == 4590 | stop_id == 3736) & trip_id == 43000162')

In [None]:
output.query('stop_id == 3736')

In [None]:
output.query('stop_id == 5740')

In [None]:
output