# Build the reference table containing the weights between all connected stops

In [1]:
import numpy as np
import pandas as pd

In [2]:
from datetime import timedelta as td

In [3]:
stops = pd.read_pickle('data/model/model_stops.pickle')
schedule = pd.read_feather('data/model/schedule.ftr')

In [4]:
f = pd.read_pickle('data/model/f.pickle')

t_w = pd.read_pickle('data/model/weights_transit.pickle')
t_n = pd.read_pickle('data/model/neighbors_transit.pickle')

w_n = pd.read_pickle('data/model/weights_walking.pickle')

### Making sure w_t and n_t includes the same exact list of stop_id values
- w_n has been prefiltered to only use these set of stops

In [5]:
stop_list = t_w.reset_index().stop_id.unique()

In [6]:
stop_list

array([  263,   264,   265, ..., 24420, 24421, 24422])

In [7]:
stop_list2 = t_n.reset_index().stop_id.unique()

In [8]:
pd.Series(stop_list == stop_list2).value_counts()

True    8901
dtype: int64

---

In [9]:
w_n.head(3)

Unnamed: 0_level_0,stop_code,stop_name,stop_lat,stop_lon,walk,len
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
263,929,Davenport Rd at Bedford Rd,43.674448,-79.399659,"(263, 264, 751, 1405, 1642, 1646, 1937, 2106, ...",21
264,940,Davenport Rd at Dupont St,43.675511,-79.401938,"(263, 264, 751, 1642, 3631, 4165, 4968, 5616, ...",18
265,1871,Davisville Ave at Cleveland St,43.702088,-79.378112,"(265, 3463, 4995, 5177, 7466, 7942, 8013, 10375)",8


In [10]:
t_w.head(3)

Unnamed: 0_level_0,Unnamed: 1_level_0,duration
stop_id,next_stop_id,Unnamed: 2_level_1
263,264,76
264,4165,69
265,10375,63


In [11]:
t_n.head(3)

Unnamed: 0_level_0,neighbors,num
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1
263,"(264,)",1
264,"(4165,)",1
265,"(10375,)",1


## First create E: combine the two list of neighbors from walking and transit into a combined list of unique neighbors for each stop

In [46]:
t_n

Unnamed: 0_level_0,neighbors,num
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1
263,"(264,)",1
264,"(4165,)",1
265,"(10375,)",1
266,"(7773,)",1
267,"(4040,)",1
...,...,...
24418,"(24416,)",1
24419,"(24420,)",1
24420,"(24421,)",1
24421,"(24418,)",1


In [12]:
E = t_n.copy()

In [13]:
E = E.rename(columns = {'num': 'length'})

In [14]:
E

Unnamed: 0_level_0,neighbors,length
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1
263,"(264,)",1
264,"(4165,)",1
265,"(10375,)",1
266,"(7773,)",1
267,"(4040,)",1
...,...,...
24418,"(24416,)",1
24419,"(24420,)",1
24420,"(24421,)",1
24421,"(24418,)",1


In [15]:
E.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8901 entries, 263 to 24422
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   neighbors  8901 non-null   object
 1   length     8901 non-null   int64 
dtypes: int64(1), object(1)
memory usage: 208.6+ KB


In [16]:
for stop in t_n.index.values:
    tn = t_n.loc[stop, 'neighbors']
    tn = list(tn)
    wn = w_n.loc[stop, 'walk']
    wn = list(wn)
    n = list(set(tn + wn))
    n.remove(stop)
    n = tuple(n)
    E.at[stop, 'neighbors'] = n
    E.at[stop, 'length'] = len(n)

In [17]:
E = E.drop(columns = 'length')

In [18]:
E = E.neighbors

In [19]:
E[263]

(264,
 1937,
 10140,
 6826,
 3631,
 9653,
 6457,
 2106,
 2492,
 4165,
 9689,
 6117,
 1642,
 2410,
 1646,
 751,
 5616,
 4337,
 1405,
 7934)

In [20]:
E[917]

(3590, 5191, 6379, 6061, 6223, 4880, 8081, 10256, 6235)

In [86]:
EE = E.copy()

In [61]:
E

stop_id
263      (264, 1937, 10140, 6826, 3631, 9653, 6457, 210...
264      (6944, 4165, 6117, 263, 4968, 1642, 6826, 7015...
265            (4995, 7942, 3463, 10375, 7466, 8013, 5177)
266          (7076, 4040, 267, 5109, 375, 376, 7641, 7773)
267          (7076, 4040, 266, 5109, 375, 376, 7641, 7773)
                               ...                        
24418    (24416, 2976, 24417, 24420, 1061, 24421, 688, ...
24419    (7809, 7690, 7703, 4891, 8991, 8223, 3886, 196...
24420    (7809, 7690, 4891, 8991, 8223, 3886, 1966, 688...
24421    (7809, 7690, 4891, 8991, 8223, 1061, 1966, 388...
24422    (1060, 5188, 1926, 9382, 4779, 7984, 8435, 376...
Name: neighbors, Length: 8901, dtype: object

In [65]:
test = E[917]

In [84]:
stop_index = stops.index.values

In [75]:
np.all(np.isin(test, index))

True

In [83]:
np.isin(E[263], index)

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True])

In [92]:
for index, item in E.items():
    if np.all(np.isin(item, stop_index)):
        pass
    else:
        new = list(item)
        for num in new:
            if np.isin(num, stop_index):
                pass
            else:
                new.remove(num)
        print(index)
        print(item)
        print(new)
        E[index] = tuple(new)

In [91]:
pd.to_pickle(E, 'data/model/list_of_neighbors.pickle')

---
### Create W the dataframe containing the consolidated list of weights from both walking and transit methods

In [22]:
W = pd.DataFrame()

In [23]:
W['stop_id'] = 0
W['next_stop_id'] = 0
W['duration'] = 0
W['transit'] = False

In [24]:
W

Unnamed: 0,stop_id,next_stop_id,duration,transit


In [25]:
W = []
for stop in stop_list:
    neighbors = E[stop]
    for n in neighbors:
        W.append([stop, n, 0, False])

In [26]:
W = pd.DataFrame(W, columns = ['stop_id', 'next_stop_id', 'duration', 'transit'])

In [27]:
W = W.set_index(['stop_id', 'next_stop_id'])

In [28]:
W

Unnamed: 0_level_0,Unnamed: 1_level_0,duration,transit
stop_id,next_stop_id,Unnamed: 2_level_1,Unnamed: 3_level_1
263,264,0,False
263,1937,0,False
263,10140,0,False
263,6826,0,False
263,3631,0,False
...,...,...,...
24422,7984,0,False
24422,8435,0,False
24422,3764,0,False
24422,2102,0,False


In [29]:
stops = stops.set_index('stop_id')

In [30]:
stops

Unnamed: 0_level_0,stop_code,stop_name,stop_lat,stop_lon
stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
263,929,Davenport Rd at Bedford Rd,43.674448,-79.399659
264,940,Davenport Rd at Dupont St,43.675511,-79.401938
265,1871,Davisville Ave at Cleveland St,43.702088,-79.378112
266,11700,Disco Rd at Attwell Dr,43.701362,-79.594843
267,3478,Disco Rd at Attwell Dr,43.701043,-79.595806
...,...,...,...,...
24418,16031,Gerrard St East at River St,43.663948,-79.358666
24419,16032,Broadview Ave at Gerrard St East North Side,43.665766,-79.352651
24420,16033,Jack Layton Way at St Matthews Rd,43.665695,-79.354843
24421,16034,St Matthews Rd at Gerrard St East,43.664960,-79.355329


In [31]:
for stop in w_n.index.values:
    s_lat = w_n.loc[stop, 'stop_lat']
    s_lon = w_n.loc[stop, 'stop_lon']
    n = w_n.loc[stop, 'walk']
    n = list(n)
    n.remove(stop)
    for neighbor in n:
        n_lat = stops.loc[neighbor, 'stop_lat']
        n_lon = stops.loc[neighbor, 'stop_lon']
        distance = abs(s_lat - n_lat) + abs(s_lon - n_lon)
        duration = distance / f
        W.loc[(stop, neighbor), 'duration'] = duration

In [32]:
for i in t_w.index.values:
    duration = t_w.loc[i, 'duration']
    W.loc[i, 'duration'] = duration
    W.loc[i, 'transit'] = True

In [33]:
W.loc[917]

Unnamed: 0_level_0,duration,transit
next_stop_id,Unnamed: 1_level_1,Unnamed: 2_level_1
3590,472.262134,False
5191,33.0,True
6379,456.071491,False
6061,362.642369,False
6223,395.654459,False
4880,328.018223,False
8081,35.25495,False
10256,131.768004,False
6235,433.642895,False


In [34]:
W.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 107904 entries, (263, 264) to (24422, 1593)
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   duration  107904 non-null  float64
 1   transit   107904 non-null  bool   
dtypes: bool(1), float64(1)
memory usage: 6.0 MB


In [35]:
pd.to_pickle(W, 'data/model/weights.pickle')