# Pathfinding Model
Objectives:
- build a pathfinding model wrapped inside a function called `find_shortest_path`
- this function should accept the user's starting and ending geocoordinates along with the current time
- data has been preprocessed in a previous notebook and filtered only include regular weekday service; ie. stop times are for any given Monday to Friday

---
## Import Modules & Data

In [1]:
import time
import numpy as np
import pandas as pd
import numexpr as ne
from datetime import timedelta as td

In [2]:
%load_ext line_profiler

In [3]:
stops = pd.read_feather('data/model/stops.ftr')
schedule = pd.read_feather('data/model/schedule.ftr')

In [4]:
schedule.head(1)

Unnamed: 0,trip_id,route_short_name,route_long_name,shape_id,trip_headsign,stop_sequence,stop_time,stop_time_delta,stop_id,stop_code,stop_name,stop_lat,stop_lon
0,42990004,10,VAN HORNE,886387,EAST - 10 VAN HORNE towards VICTORIA PARK,1,7:00:29,0 days 07:00:29,14155,14633,Don Mills Station,43.776222,-79.347048


In [5]:
stops.head(1)

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon
0,262,662,Danforth Rd at Kennedy Rd,43.714379,-79.260939


---
## Define Helper Functions

In [6]:
def find_closest_stops(input_lat, input_lon):
    df = stops.loc[:, ['stop_id', 'stop_lat', 'stop_lon']]
    df['distance'] = ( abs(input_lat - df['stop_lat'])**2 + abs(input_lon - df['stop_lon'])**2 )**(1/2)
    df = df.sort_values(by = 'distance').reset_index(drop = True)
    return df

---
## Define Functions

## test arguments

In [7]:
test_start_time_delta = td(hours = 19)

## find closest stop id

In [8]:
def find_closest_stop_id(input_lat, input_lon):
    df = stops.loc[:, ['stop_id', 'stop_lat', 'stop_lon']]
    df['distance'] = ( abs(input_lat - df['stop_lat'])**2 + abs(input_lon - df['stop_lon'])**2 )**(1/2)
    closest_stop_id = df.sort_values(by = 'distance').stop_id.values[0]
    return closest_stop_id

In [9]:
home = [43.76008911645013, -79.33181124795766]
test_stop_id = find_closest_stop_id(home[0], home[1])
test_stop_id

917

## walking speed

In [10]:
def walking_speed(stop_id_A = 917, stop_id_B = 9946, walk_time = 20 * 60):
    stop_A_lat = stops.loc[stops.stop_id == stop_id_A, 'stop_lat'].values[0]
    stop_A_lon = stops.loc[stops.stop_id == stop_id_A, 'stop_lon'].values[0]
    stop_B_lat = stops.loc[stops.stop_id == stop_id_B, 'stop_lat'].values[0]
    stop_B_lon = stops.loc[stops.stop_id == stop_id_B, 'stop_lon'].values[0]
    distance = abs(stop_B_lat - stop_A_lat) + abs(stop_B_lon - stop_A_lon)
    walking_speed = distance / walk_time
    return walking_speed

In [11]:
walking_speed()

1.4267500000002541e-05

## build shortest path

In [12]:
stops.head(1)

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon
0,262,662,Danforth Rd at Kennedy Rd,43.714379,-79.260939


In [13]:
stops_mini = stops.loc[:, ['stop_id', 'stop_lat', 'stop_lon']].copy().set_index('stop_id')
stops_mini.head(1)

Unnamed: 0_level_0,stop_lat,stop_lon
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1
262,43.714379,-79.260939


In [14]:
def build_shortest_path(stops_df, start_stop_id, current_time_delta):
    start_stop_lat = stops_df.at[start_stop_id, 'stop_lat']
    start_stop_lon = stops_df.at[start_stop_id, 'stop_lon']
    walking_distance = abs(stops_df['stop_lat'] - start_stop_lat) + abs(stops_df['stop_lon'] - start_stop_lon)
    walking_time = round(walking_distance / walking_speed(), 0)
    stops_df['arrival_time_delta'] = current_time_delta + pd.to_timedelta(walking_time, 'seconds')
    stops_df['previous_stop'] = start_stop_id
    stops_df['previous_mode'] = 'W'
    stops_df['trip_id'] = None
    stops_df['visited'] = False
    return stops_df

In [15]:
test_shortest_path = build_shortest_path(stops_mini, test_stop_id, test_start_time_delta)
test_shortest_path.head(1)

Unnamed: 0_level_0,stop_lat,stop_lon,arrival_time_delta,previous_stop,previous_mode,trip_id,visited
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
262,43.714379,-79.260939,0 days 21:15:48,917,W,,False


## select unvisited stop with the earliest arrival

In [16]:
test_unvisited_stops = test_shortest_path.loc[test_shortest_path.visited == False]
test_unvisited_stops.head(1)

Unnamed: 0_level_0,stop_lat,stop_lon,arrival_time_delta,previous_stop,previous_mode,trip_id,visited
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
262,43.714379,-79.260939,0 days 21:15:48,917,W,,False


In [17]:
test_current_stop_id = test_unvisited_stops.loc[test_unvisited_stops.arrival_time_delta == test_unvisited_stops.arrival_time_delta.min(), 'previous_stop'].values[0]
test_current_stop_id

917

In [18]:
test_current_time_delta = test_shortest_path.at[test_current_stop_id, 'arrival_time_delta']
test_current_time_delta

Timedelta('0 days 19:00:00')

In [19]:
test_previous_mode = test_shortest_path.at[test_current_stop_id, 'previous_mode']
test_previous_mode

'W'

## build stop schedule

In [21]:
schedule.head(1)

Unnamed: 0,trip_id,route_short_name,route_long_name,shape_id,trip_headsign,stop_sequence,stop_time,stop_time_delta,stop_id,stop_code,stop_name,stop_lat,stop_lon
0,42990004,10,VAN HORNE,886387,EAST - 10 VAN HORNE towards VICTORIA PARK,1,7:00:29,0 days 07:00:29,14155,14633,Don Mills Station,43.776222,-79.347048


In [88]:
schedule_mini = schedule.loc[:, ['trip_id', 'shape_id', 'stop_sequence', 'stop_id', 'stop_time_delta']].copy()
schedule_mini['trip_seq'] = (schedule_mini.trip_id.astype(str) + schedule_mini.stop_sequence.astype(str).str.zfill(2)).astype(int)
schedule_mini = schedule_mini.set_index('trip_seq')
schedule_mini = schedule_mini.sort_values(by = 'stop_time_delta')
schedule_mini['next_day'] = False
schedule_mini.head(1)

Unnamed: 0_level_0,trip_id,shape_id,stop_sequence,stop_id,stop_time_delta,next_day
trip_seq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4300346901,43003469,887148,1,8533,0 days 03:32:05,False


In [109]:
def build_stop_schedule(schedule_df, current_stop_id, current_time_delta):
    stop_id_values = schedule_df.stop_id.values
    stop_schedule = schedule_df[ne.evaluate('(stop_id_values == current_stop_id)')].copy()
    stop_schedule['next_day'] = np.where(stop_schedule.stop_time_delta < current_time_delta, True, False)
    stop_schedule['stop_time_delta'] = np.where(stop_schedule.next_day.values == True, stop_schedule.stop_time_delta + td(days = 1), stop_schedule.stop_time_delta)
    stop_schedule = pd.concat([stop_schedule[stop_schedule.next_day == False], stop_schedule[stop_schedule.next_day == True]])
    stop_schedule = stop_schedule.drop_duplicates(subset = 'shape_id')
    return stop_schedule

In [77]:
def build_table(df):
    
    select_condition = ((df.stop_id.values == 917) & (df.stop_time_delta > td(hours = 19)).values)
    df['select_trip'] = np.where(select_condition, df.trip_id.values, 0)
    trip_list = np.unique(df.select_trip.values)
    df['apply_trip'] = np.where(np.isin(df.trip_id.values, trip_list), df.trip_id.values, 0)

    df['select_shape'] = np.where(select_condition, df.shape_id.values, 0)
    shape_list = np.unique(df.shape_id.values)
    df['apply_shape'] = np.where(np.isin(df.shape_id.values, shape_list), df.shape_id.values, 0)

    df['select_sequence'] = np.where(select_condition, df.stop_sequence.values, 0)
    df2 = df.loc[select_condition, ['select_shape', 'select_sequence']].drop_duplicates()
    df['apply_sequence'] = df.apply_shape.map(df2.set_index('select_shape')['select_sequence'])

    compare_selection = ((df.apply_trip.values != 0) & (df.stop_sequence.values > df.apply_sequence.values))
    df['compare_sequence'] = np.where(compare_selection, True, False)
    df = df[compare_selection].drop_duplicates(subset = 'stop_id')

    return df

In [110]:
test_stop_schedule = build_stop_schedule(schedule_mini, test_current_stop_id, test_current_time_delta)
test_stop_schedule.head(1)

Unnamed: 0_level_0,trip_id,shape_id,stop_sequence,stop_id,stop_time_delta,next_day
trip_seq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4302716048,43027160,888274,48,917,0 days 19:00:42,False


In [111]:
test_stop_schedule

Unnamed: 0_level_0,trip_id,shape_id,stop_sequence,stop_id,stop_time_delta,next_day
trip_seq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4302716048,43027160,888274,48,917,0 days 19:00:42,False
4302930021,43029300,888390,21,917,0 days 19:02:47,False
4302715917,43027159,888280,17,917,0 days 19:07:45,False
4302721539,43027215,888281,39,917,0 days 20:20:45,False
4300560453,43005604,887271,53,917,1 days 02:52:20,False


In [94]:
%lprun -f build_stop_schedule build_stop_schedule(schedule_mini, test_current_stop_id, test_current_time_delta)

Timer unit: 1e-06 s

Total time: 0.050779 s
File: <ipython-input-90-b41ec0588bad>
Function: build_stop_schedule at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def build_stop_schedule(schedule_df, current_stop_id, current_time_delta):
     2         1         88.0     88.0      0.2      stop_id_values = schedule_df.stop_id.values
     3         1       5800.0   5800.0     11.4      stop_schedule = schedule_df[ne.evaluate('(stop_id_values == current_stop_id)')].copy()
     4         1        780.0    780.0      1.5      stop_schedule['next_day'] = np.where(stop_schedule.stop_time_delta < current_time_delta, True, False)
     5         1        986.0    986.0      1.9      stop_schedule['stop_time_delta'] = np.where(stop_schedule.next_day.values == True, stop_schedule.stop_time_delta + td(days = 1), stop_schedule.stop_time_delta)
     6         1       7978.0   7978.0     15.7      stop_schedule = pd.concat([stop_

In [20]:
# Do Not Delete!

def find_shortest_path(start_lat, start_lon, end_lat, end_lon, start_time_delta): 
    
    start_stop_id = find_closest_stop_id(start_lat, start_lon) # update parameters to include use of stops df
    shortest_path = build_shortest_path(stops_mini, start_stop_id, start_time_delta)
    
    for i in range(0, len(shortest_path)):
    # for i in range(0, 94):
                
        unvisited_stops = shortest_path.loc[shortest_path.visited == False]
        current_stop_id = unvisited_stops.loc[unvisited_stops.arrival_time_delta == unvisited_stops.arrival_time_delta.min(), 'previous_stop'].values[0]
        current_time_delta = shortest_path.at[next_visit_stop_id, 'arrival_time_delta']
        previous_mode = shortest_path.at[next_visit_stop_id, 'previous_mode']

        # *** IGNORE FOR NOW ***
        # if (previous_mode == 'T'):
            # shortest_path = update_walking_path(shortest_path, current_time_delta, current_stop_id)
        # **********************
        
        stop_schedule = build_stop_schedule(schedule_mini, current_stop_id, current_time_delta)
        upcoming_trip_stops = build_upcoming_trip_stops(stop_schedule)
        shortest_path = update_shortest_path(shortest_path, upcoming_trip_stops, current_time_delta, current_stop_id)
        
        shortest_path.loc[next_visit_stop_id, 'visited'] = True
        
    return shortest_path

# Notes
# include stops & schedule df (modified) as a parameter vs referencing it as a global variable?

#### can you filter a list of every stop with a trip that leaves from the current stop, and the associated earliest trip which will get me there

#### create a set of all trips that will leave from the current stop (mark True); for each trip hardcode (even if duplicated) the next stop sequence in question (mark True all stop seq greater than this value); where both these values are True; select these into a further step to achieve perhaps same result without a for loop

#### is there a way to get rid of the stops table all together and rely completely on the schedule df (even if data is duplicated)

## build upcoming trip stops

In [95]:
test_stop_schedule

Unnamed: 0_level_0,trip_id,shape_id,stop_sequence,stop_id,stop_time_delta,next_day
trip_seq,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4302716048,43027160,888274,48,917,0 days 19:00:42,False
4302930021,43029300,888390,21,917,0 days 19:02:47,False
4302715917,43027159,888280,17,917,0 days 19:07:45,False
4302721539,43027215,888281,39,917,0 days 20:20:45,False
4300560453,43005604,887271,53,917,1 days 02:52:20,False


In [105]:
## BREAK THE FOR LOOP VIA THE FUNCTION BEFORE
# First grab each required column in its entirety
# Ultimately create the filter to search schedule_mini for the requirements 
# (trips where trip_id is in stop_schedule and where stop_sequence is greater than the one on stop_schedule for the same trip in question)
# Then filter out duplicate records on stop_id
# END RESULT should be df with a list of unique stop_id values as the key and the associated earliest trip_id that will get you to that stop_id from the current_stop_id in question

In [97]:
def build_upcoming_trip_stops(stop_schedule):
    trip_id = stop_schedule.trip_id.values
    stop_sequence = stop_schedule.stop_sequence.values
    next_day = stop_schedule.next_day.values
    
    return upcoming_trip_stops

In [97]:
def build_upcoming_trip_stops(stop_schedule):
    upcoming_trip_stops = []
    for i in range(0, len(stop_schedule)):
        current_trip_id = stop_schedule['trip_id'].values[i]
        current_stop_sequence = stop_schedule['stop_sequence'].values[i]
        next_day = stop_schedule['next_day'].values[i]
        trip_id = schedule.trip_id.values
        stop_sequence = schedule.stop_sequence.values
        next_stops = schedule[ne.evaluate('(trip_id == current_trip_id) & (stop_sequence > current_stop_sequence)')].copy()
        if (next_day):
            next_stops.stop_time_delta = next_stops.stop_time_delta + td(days = 1)        
        upcoming_trip_stops.append(next_stops)
    upcoming_trip_stops = pd.concat(upcoming_trip_stops)
    upcoming_trip_stops = upcoming_trip_stops.sort_values(by = 'stop_time_delta').drop_duplicates(subset = 'stop_id', keep = 'first').reset_index(drop = True)
    return upcoming_trip_stops

In [100]:
test_upcoming_trip_stops = build_upcoming_trip_stops(test_stop_schedule)
test_upcoming_trip_stops.head(1)

Unnamed: 0,trip_id,route_short_name,route_long_name,shape_id,trip_headsign,stop_sequence,stop_time,stop_time_delta,stop_id,stop_code,stop_name,stop_lat,stop_lon
0,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,49,19:01:15,0 days 19:01:15,5191,11397,1200-1202 York Mills Rd,43.759132,-79.333893


In [101]:
test_upcoming_trip_stops

Unnamed: 0,trip_id,route_short_name,route_long_name,shape_id,trip_headsign,stop_sequence,stop_time,stop_time_delta,stop_id,stop_code,stop_name,stop_lat,stop_lon
0,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,49,19:01:15,0 days 19:01:15,5191,11397,1200-1202 York Mills Rd,43.759132,-79.333893
1,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,50,19:02:24,0 days 19:02:24,7238,9079,York Mills Rd at Lochinvar Cres,43.757624,-79.339319
2,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,51,19:02:46,0 days 19:02:46,5738,9087,York Mills Rd at Valentine Dr,43.757207,-79.341181
3,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,52,19:03:52,0 days 19:03:52,9834,9071,York Mills Rd at Don Mills Rd,43.756053,-79.3466
4,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,53,19:05:13,0 days 19:05:13,6330,10605,900 York Mills Rd,43.755228,-79.350256
5,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,54,19:06:05,0 days 19:06:05,916,9078,York Mills Rd at Lesmill Rd West Side,43.754734,-79.352534
6,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,55,19:07:58,0 days 19:07:58,9974,13469,York Mills Rd at Scarsdale Rd,43.75366,-79.357427
7,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,56,19:09:09,0 days 19:09:09,944,9077,York Mills Rd at Leslie St West Side,43.753001,-79.360766
8,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,57,19:10:02,0 days 19:10:02,1509,7466,York Mills Rd at Banbury Rd (East),43.752639,-79.365268
9,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,58,19:11:03,0 days 19:11:03,9959,7464,York Mills Rd at Banbury Rd (West),43.751639,-79.370302


In [103]:
%lprun -f build_upcoming_trip_stops build_upcoming_trip_stops(test_stop_schedule)

Timer unit: 1e-06 s

Total time: 0.038005 s
File: <ipython-input-97-4ec59065ae06>
Function: build_upcoming_trip_stops at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def build_upcoming_trip_stops(stop_schedule):
     2         1          2.0      2.0      0.0      upcoming_trip_stops = []
     3         6         18.0      3.0      0.0      for i in range(0, len(stop_schedule)):
     4         5        180.0     36.0      0.5          current_trip_id = stop_schedule['trip_id'].values[i]
     5         5         91.0     18.2      0.2          current_stop_sequence = stop_schedule['stop_sequence'].values[i]
     6         5         80.0     16.0      0.2          next_day = stop_schedule['next_day'].values[i]
     7         5        166.0     33.2      0.4          trip_id = schedule.trip_id.values
     8         5        128.0     25.6      0.3          stop_sequence = schedule.stop_sequence.values
     9       

In [104]:
%%timeit
build_upcoming_trip_stops(test_stop_schedule)

28.1 ms ± 1.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## update shortest path

In [23]:
def update_shortest_path(shortest_path, upcoming_trip_stops, current_time_delta, visiting_stop_id):
    
    for i in range(0, len(upcoming_trip_stops)):
        current_stop_id = upcoming_trip_stops.loc[i, 'stop_id']
        current_stop_time_delta = upcoming_trip_stops.loc[i, 'stop_time_delta']
        current_trip_id = upcoming_trip_stops.loc[i, 'trip_id']
        current_arrival_time = shortest_path.loc[shortest_path.stop_id == current_stop_id, 'arrival_time_delta'].values[0]
        
        if (current_arrival_time > current_stop_time_delta):
            shortest_path.loc[shortest_path.stop_id == current_stop_id, 'arrival_time_delta'] = current_stop_time_delta
            shortest_path.loc[shortest_path.stop_id == current_stop_id, 'previous_stop'] = visiting_stop_id
            shortest_path.loc[shortest_path.stop_id == current_stop_id, 'previous_mode'] = 'T'
            shortest_path.loc[shortest_path.stop_id == current_stop_id, 'trip_id'] = current_trip_id
            
    shortest_path = shortest_path.sort_values(by = 'arrival_time_delta').reset_index(drop = True)
    shortest_path.loc[shortest_path.stop_id == visiting_stop_id, 'visited'] = True
    
    return shortest_path

In [70]:
input_shortest_path

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,arrival_time_delta,previous_stop,previous_mode,trip_id,visited
0,917,9083,York Mills Rd at Sandover Dr (1222 York Mills),43.759813,-79.331751,0 days 19:00:00,917,W,,False
1,8081,9088,York Mills Rd at Valley Woods Rd East Side,43.759595,-79.332036,0 days 19:00:35,917,W,,False
2,10256,11533,113 Valley Woods Rd,43.758960,-79.332778,0 days 19:02:12,917,W,,False
3,6913,11527,Valley Woods Rd at York Mills Rd,43.758994,-79.333070,0 days 19:02:30,917,W,,False
4,5191,11397,1200-1202 York Mills Rd,43.759132,-79.333893,0 days 19:03:18,917,W,,False
...,...,...,...,...,...,...,...,...,...,...
9414,4305,1632,The West Mall at Sherway Dr North Side,43.609781,-79.560638,1 days 02:22:38,917,W,,False
9415,7093,1631,The West Mall at Trillium Health Centre (North...,43.610381,-79.561480,1 days 02:22:55,917,W,,False
9416,880,1633,The West Mall at Sherway Dr (Trillium Health C...,43.609464,-79.560579,1 days 02:22:56,917,W,,False
9417,477,4435,Long Branch Loop,43.592111,-79.543700,1 days 02:23:29,917,W,,False


In [71]:
input_shortest_path.set_index('stop_id')

Unnamed: 0_level_0,stop_code,stop_name,stop_lat,stop_lon,arrival_time_delta,previous_stop,previous_mode,trip_id,visited
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
917,9083,York Mills Rd at Sandover Dr (1222 York Mills),43.759813,-79.331751,0 days 19:00:00,917,W,,False
8081,9088,York Mills Rd at Valley Woods Rd East Side,43.759595,-79.332036,0 days 19:00:35,917,W,,False
10256,11533,113 Valley Woods Rd,43.758960,-79.332778,0 days 19:02:12,917,W,,False
6913,11527,Valley Woods Rd at York Mills Rd,43.758994,-79.333070,0 days 19:02:30,917,W,,False
5191,11397,1200-1202 York Mills Rd,43.759132,-79.333893,0 days 19:03:18,917,W,,False
...,...,...,...,...,...,...,...,...,...
4305,1632,The West Mall at Sherway Dr North Side,43.609781,-79.560638,1 days 02:22:38,917,W,,False
7093,1631,The West Mall at Trillium Health Centre (North...,43.610381,-79.561480,1 days 02:22:55,917,W,,False
880,1633,The West Mall at Sherway Dr (Trillium Health C...,43.609464,-79.560579,1 days 02:22:56,917,W,,False
477,4435,Long Branch Loop,43.592111,-79.543700,1 days 02:23:29,917,W,,False


In [53]:
ex_s_path = pd.merge(input_shortest_path, upcoming_trip_stops, 'left', 'stop_id', suffixes = ('', '_uts'))

In [61]:
ex_s_path.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9409,9410,9411,9412,9413,9414,9415,9416,9417,9418
stop_id,917,8081,10256,6913,5191,4880,6061,6223,10406,6235,...,6979,9397,4588,881,1523,4305,7093,880,477,1750
stop_code,9083,9088,11533,11527,11397,9072,9073,13959,11532,13958,...,1271,1638,3169,11119,1269,1632,1631,1633,4435,5511
stop_name,York Mills Rd at Sandover Dr (1222 York Mills),York Mills Rd at Valley Woods Rd East Side,113 Valley Woods Rd,Valley Woods Rd at York Mills Rd,1200-1202 York Mills Rd,York Mills Rd at Fenside Dr,York Mills Rd at Fenside Dr,Lynedock Cres at Fenside Dr East Side,Opposite 44 Valley Woods Rd,Fenside Dr at Slidell Cres,...,Markland Dr at Cherry Post Cres (North),The West Mall at The Queensway,The West Mall at Sherway Dr (Trillium Health C...,The West Mall at Queensway South Side,Markland Dr (West) at Bloor St West North Side,The West Mall at Sherway Dr North Side,The West Mall at Trillium Health Centre (North...,The West Mall at Sherway Dr (Trillium Health C...,Long Branch Loop,Long Branch Loop
stop_lat,43.759813,43.759595,43.75896,43.758994,43.759132,43.760561,43.760782,43.761865,43.755229,43.76331,...,43.633138,43.611806,43.609417,43.61175,43.629667,43.609781,43.610381,43.609464,43.592111,43.591811
stop_lon,-79.331751,-79.332036,-79.332778,-79.33307,-79.333893,-79.327819,-79.327546,-79.328158,-79.333336,-79.329061,...,-79.582671,-79.562012,-79.559813,-79.562334,-79.580248,-79.560638,-79.56148,-79.560579,-79.5437,-79.544124
arrival_time_delta,0 days 19:00:00,0 days 19:00:35,0 days 19:02:12,0 days 19:02:30,0 days 19:03:18,0 days 19:05:28,0 days 19:06:03,0 days 19:06:36,0 days 19:07:12,0 days 19:07:14,...,1 days 02:21:05,1 days 02:21:53,1 days 02:22:06,1 days 02:22:19,1 days 02:22:19,1 days 02:22:38,1 days 02:22:55,1 days 02:22:56,1 days 02:23:29,1 days 02:24:20
previous_stop,917,917,917,917,917,917,917,917,917,917,...,917,917,917,917,917,917,917,917,917,917
previous_mode,W,W,W,W,W,W,W,W,W,W,...,W,W,W,W,W,W,W,W,W,W
trip_id,,,,,,,,,,,...,,,,,,,,,,
visited,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False


In [205]:
def update_shortest_path(s_path, upcoming_trip_stops, current_time_delta, visiting_stop_id):
    
    #s_path = s_path.set_index('stop_id')
    #upcoming_trip_stops = upcoming_trip_stops.set_index('stop_id')
    
    s_path = s_path.join(upcoming_trip_stops, on = 'stop_id', how = 'left', rsuffix = '_uts')
    
    s_path.previous_stop = np.where(
        s_path.stop_time_delta.values < s_path.arrival_time_delta.values,
        visiting_stop_id,
        s_path.previous_stop.values
    )
    s_path.previous_mode = np.where(
        s_path.stop_time_delta.values < s_path.arrival_time_delta.values,
        'T',
        s_path.previous_mode.values
    )
    s_path.trip_id = np.where(
        s_path.stop_time_delta.values < s_path.arrival_time_delta.values,
        s_path.trip_id_uts.values,
        s_path.trip_id.values
    )
    s_path.arrival_time_delta = np.where(
        s_path.stop_time_delta.values < s_path.arrival_time_delta.values,
        s_path.stop_time_delta.values,
        s_path.arrival_time_delta.values
    )
            
    s_path.loc[visiting_stop_id, 'visited'] = True
    s_path = s_path.sort_values(by = 'arrival_time_delta').reset_index(drop = False)
    
    return s_path.loc[:, 'stop_id':'visited']

In [186]:
input_shortest_path.set_index('stop_id', inplace = True)
upcoming_trip_stops.set_index('stop_id', inplace = True)

In [187]:
input_shortest_path.head(3)

Unnamed: 0_level_0,stop_code,stop_name,stop_lat,stop_lon,arrival_time_delta,previous_stop,previous_mode,trip_id,visited
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
917,9083,York Mills Rd at Sandover Dr (1222 York Mills),43.759813,-79.331751,0 days 19:00:00,917,W,,False
8081,9088,York Mills Rd at Valley Woods Rd East Side,43.759595,-79.332036,0 days 19:00:35,917,W,,False
10256,11533,113 Valley Woods Rd,43.75896,-79.332778,0 days 19:02:12,917,W,,False


In [188]:
upcoming_trip_stops.head(3)

Unnamed: 0_level_0,trip_id,route_short_name,route_long_name,shape_id,trip_headsign,stop_sequence,stop_time,stop_time_delta,stop_code,stop_name,stop_lat,stop_lon
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
5191,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,49,19:01:15,0 days 19:01:15,11397,1200-1202 York Mills Rd,43.759132,-79.333893
7238,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,50,19:02:24,0 days 19:02:24,9079,York Mills Rd at Lochinvar Cres,43.757624,-79.339319
5738,43027160,95,YORK MILLS,888274,WEST - 95 YORK MILLS towards YORK MILLS STATION,51,19:02:46,0 days 19:02:46,9087,York Mills Rd at Valentine Dr,43.757207,-79.341181


In [206]:
input_shortest_path = build_shortest_path(917, td(hours = 19))
stop_schedule = build_stop_schedule(917, td(hours = 19))
upcoming_trip_stops = build_upcoming_trip_stops(stop_schedule)

In [207]:
%%time
s_path = update_shortest_path(input_shortest_path, upcoming_trip_stops, td(hours = 19), 917)

CPU times: user 22.9 ms, sys: 3.63 ms, total: 26.5 ms
Wall time: 25.4 ms


In [208]:
%%timeit
s_path = update_shortest_path(input_shortest_path, upcoming_trip_stops, td(hours = 19), 917)

15.9 ms ± 457 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
s_path

In [201]:
s_path

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,arrival_time_delta,previous_stop,previous_mode,trip_id,visited
0,917,9083,York Mills Rd at Sandover Dr (1222 York Mills),43.759813,-79.331751,0 days 19:00:00,917,W,,True
1,8081,9088,York Mills Rd at Valley Woods Rd East Side,43.759595,-79.332036,0 days 19:00:35,917,W,,False
2,5191,11397,1200-1202 York Mills Rd,43.759132,-79.333893,0 days 19:01:15,917,T,43027160.0,False
3,10256,11533,113 Valley Woods Rd,43.758960,-79.332778,0 days 19:02:12,917,W,,False
4,7238,9079,York Mills Rd at Lochinvar Cres,43.757624,-79.339319,0 days 19:02:24,917,T,43027160.0,False
...,...,...,...,...,...,...,...,...,...,...
9414,4305,1632,The West Mall at Sherway Dr North Side,43.609781,-79.560638,1 days 02:22:38,917,W,,False
9415,7093,1631,The West Mall at Trillium Health Centre (North...,43.610381,-79.561480,1 days 02:22:55,917,W,,False
9416,880,1633,The West Mall at Sherway Dr (Trillium Health C...,43.609464,-79.560579,1 days 02:22:56,917,W,,False
9417,477,4435,Long Branch Loop,43.592111,-79.543700,1 days 02:23:29,917,W,,False


In [181]:
s_path

Unnamed: 0,stop_code,stop_name,stop_lat,stop_lon,arrival_time_delta,previous_stop,previous_mode,trip_id,visited,trip_id_uts,...,route_long_name,shape_id,trip_headsign,stop_sequence,stop_time,stop_time_delta,stop_code_uts,stop_name_uts,stop_lat_uts,stop_lon_uts
0,9083,York Mills Rd at Sandover Dr (1222 York Mills),43.759813,-79.331751,0 days 19:00:00,917,W,,True,,...,,,,,,NaT,,,,
1,9088,York Mills Rd at Valley Woods Rd East Side,43.759595,-79.332036,0 days 19:00:35,917,W,,False,,...,,,,,,NaT,,,,
2,11397,1200-1202 York Mills Rd,43.759132,-79.333893,0 days 19:01:15,917,T,43027160.0,False,43027160.0,...,YORK MILLS,888274.0,WEST - 95 YORK MILLS towards YORK MILLS STATION,49.0,19:01:15,0 days 19:01:15,11397.0,1200-1202 York Mills Rd,43.759132,-79.333893
3,11533,113 Valley Woods Rd,43.75896,-79.332778,0 days 19:02:12,917,W,,False,,...,,,,,,NaT,,,,
4,9079,York Mills Rd at Lochinvar Cres,43.757624,-79.339319,0 days 19:02:24,917,T,43027160.0,False,43027160.0,...,YORK MILLS,888274.0,WEST - 95 YORK MILLS towards YORK MILLS STATION,50.0,19:02:24,0 days 19:02:24,9079.0,York Mills Rd at Lochinvar Cres,43.757624,-79.339319
5,11527,Valley Woods Rd at York Mills Rd,43.758994,-79.33307,0 days 19:02:30,917,W,,False,,...,,,,,,NaT,,,,
6,9087,York Mills Rd at Valentine Dr,43.757207,-79.341181,0 days 19:02:46,917,T,43027160.0,False,43027160.0,...,YORK MILLS,888274.0,WEST - 95 YORK MILLS towards YORK MILLS STATION,51.0,19:02:46,0 days 19:02:46,9087.0,York Mills Rd at Valentine Dr,43.757207,-79.341181
7,9071,York Mills Rd at Don Mills Rd,43.756053,-79.3466,0 days 19:03:52,917,T,43027160.0,False,43027160.0,...,YORK MILLS,888274.0,WEST - 95 YORK MILLS towards YORK MILLS STATION,52.0,19:03:52,0 days 19:03:52,9071.0,York Mills Rd at Don Mills Rd,43.756053,-79.3466
8,10605,900 York Mills Rd,43.755228,-79.350256,0 days 19:05:13,917,T,43027160.0,False,43027160.0,...,YORK MILLS,888274.0,WEST - 95 YORK MILLS towards YORK MILLS STATION,53.0,19:05:13,0 days 19:05:13,10605.0,900 York Mills Rd,43.755228,-79.350256


In [209]:
%lprun -f update_shortest_path update_shortest_path(input_shortest_path, upcoming_trip_stops, td(hours = 19), 917)

Timer unit: 1e-06 s

Total time: 0.02572 s
File: <ipython-input-205-68356ea5be88>
Function: update_shortest_path at line 1

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           def update_shortest_path(s_path, upcoming_trip_stops, current_time_delta, visiting_stop_id):
     2                                               
     3                                               #s_path = s_path.set_index('stop_id')
     4                                               #upcoming_trip_stops = upcoming_trip_stops.set_index('stop_id')
     5                                               
     6         1       7121.0   7121.0     27.7      s_path = s_path.join(upcoming_trip_stops, on = 'stop_id', how = 'left', rsuffix = '_uts')
     7                                               
     8         2        151.0     75.5      0.6      s_path.previous_stop = np.where(
     9         1        250.0    250.0      1.0          s_path.stop_tim

## update walking path

In [10]:
def update_walking_path(shortest_path, current_time_delta, visiting_stop_id):
    visiting_stop_lat = stops.loc[stops.stop_id == visiting_stop_id, 'stop_lat'].values[0]
    visiting_stop_lon = stops.loc[stops.stop_id == visiting_stop_id, 'stop_lon'].values[0]
    walking_distance = abs(shortest_path['stop_lat'] - visiting_stop_lat) + abs(shortest_path['stop_lon'] - visiting_stop_lon)
    walking_x    
    mask = shortest_path.arrival_time_delta > shortest_path.walking_arrival_time_delta
    shortest_path.loc[mask, 'arrival_time_delta'] = shortest_path.loc[mask, 'walking_arrival_time_delta']
    shortest_path.loc[mask, 'previous_stop'] = visiting_stop_id
    shortest_path.loc[mask, 'previous_mode'] = 'W'
    shortest_path.loc[mask, 'trip_id'] = None
    
    shortest_path = shortest_path.sort_values(by = 'arrival_time_delta').reset_index(drop = True)
    shortest_path = shortest_path.drop(columns = 'walking_arrival_time_delta')
    
    return shortest_path

---
## Define Master Function

In [56]:
def find_shortest_path(start_lat, start_lon, end_lat, end_lon, start_time_delta):
    
    start_stop_id = find_closest_stop_id(start_lat, start_lon)
    shortest_path = build_shortest_path(stops_mini, start_stop_id, start_time_delta)
    
    #for i in range(0, len(shortest_path)):
    for i in range(0, 94):
                
        next_stop_record = shortest_path.loc[shortest_path.visited == False].iloc[0]
        current_stop_id = next_stop_record.stop_id
        current_time_delta = next_stop_record.arrival_time_delta
        previous_mode = next_stop_record.previous_mode
                
        if (previous_mode == 'T'):
            shortest_path = update_walking_path(shortest_path, current_time_delta, current_stop_id)
        stop_schedule = build_stop_schedule(current_stop_id, current_time_delta)
        upcoming_trip_stops = build_upcoming_trip_stops(stop_schedule)
        shortest_path = update_shortest_path(shortest_path, upcoming_trip_stops, current_time_delta, current_stop_id)
        
    return shortest_path

---
## Test Functions

In [51]:
home = [43.76008911645013, -79.33181124795766]
longos = [43.75447805630398, -79.35689569243047]
gonoe = [43.7459232592541, -79.34612864369309]
current_time_delta = td(hours = 19, minutes = 0, seconds = 0)

In [57]:
%%time

output = find_shortest_path(home[0], home[1], longos[0], longos[1], td(hours = 19))

AttributeError: 'Series' object has no attribute 'stop_id'

In [220]:
output

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,arrival_time_delta,previous_stop,previous_mode,trip_id,visited
0,917,9083,York Mills Rd at Sandover Dr (1222 York Mills),43.759813,-79.331751,0 days 19:00:00,917,W,,False
1,8081,9088,York Mills Rd at Valley Woods Rd East Side,43.759595,-79.332036,0 days 19:00:35,917,W,,False
2,10256,11533,113 Valley Woods Rd,43.758960,-79.332778,0 days 19:02:12,917,W,,False
3,6913,11527,Valley Woods Rd at York Mills Rd,43.758994,-79.333070,0 days 19:02:30,917,W,,False
4,5191,11397,1200-1202 York Mills Rd,43.759132,-79.333893,0 days 19:03:18,917,W,,False
...,...,...,...,...,...,...,...,...,...,...
9414,4305,1632,The West Mall at Sherway Dr North Side,43.609781,-79.560638,1 days 02:22:38,917,W,,False
9415,7093,1631,The West Mall at Trillium Health Centre (North...,43.610381,-79.561480,1 days 02:22:55,917,W,,False
9416,880,1633,The West Mall at Sherway Dr (Trillium Health C...,43.609464,-79.560579,1 days 02:22:56,917,W,,False
9417,477,4435,Long Branch Loop,43.592111,-79.543700,1 days 02:23:29,917,W,,False


In [221]:
output.head(50)

Unnamed: 0,stop_id,stop_code,stop_name,stop_lat,stop_lon,arrival_time_delta,previous_stop,previous_mode,trip_id,visited
0,917,9083,York Mills Rd at Sandover Dr (1222 York Mills),43.759813,-79.331751,0 days 19:00:00,917,W,,False
1,8081,9088,York Mills Rd at Valley Woods Rd East Side,43.759595,-79.332036,0 days 19:00:35,917,W,,False
2,10256,11533,113 Valley Woods Rd,43.75896,-79.332778,0 days 19:02:12,917,W,,False
3,6913,11527,Valley Woods Rd at York Mills Rd,43.758994,-79.33307,0 days 19:02:30,917,W,,False
4,5191,11397,1200-1202 York Mills Rd,43.759132,-79.333893,0 days 19:03:18,917,W,,False
5,4880,9072,York Mills Rd at Fenside Dr,43.760561,-79.327819,0 days 19:05:28,917,W,,False
6,6061,9073,York Mills Rd at Fenside Dr,43.760782,-79.327546,0 days 19:06:03,917,W,,False
7,6223,13959,Lynedock Cres at Fenside Dr East Side,43.761865,-79.328158,0 days 19:06:36,917,W,,False
8,10406,11532,Opposite 44 Valley Woods Rd,43.755229,-79.333336,0 days 19:07:12,917,W,,False
9,6235,13958,Fenside Dr at Slidell Cres,43.76331,-79.329061,0 days 19:07:14,917,W,,False


In [213]:
# current best mini run benchmark
%lprun -f find_shortest_path find_shortest_path(home[0], home[1], longos[0], longos[1], current_time_delta)

KeyError: Timedelta('0 days 19:00:00')