# Step 3: Cap Haitien GTFS Accessibility Analysis
## Economic Indicators

In [119]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [120]:
import osmnx as ox
import pandas as pd
import geopandas as gpd
import networkx as nx
import numpy as np
from shapely.geometry import Point

In [121]:
import os, sys

In [122]:
# Get reference to GOSTNets
import sys
sys.path.append(r'C:\repos\GOSTnets')
import GOSTnets as gn

In [123]:
# read back your graph from step 1 from you saved pickle
G = nx.read_gpickle(r"C:\Users\war-machine\Documents\world_bank_work\haiti_gtfs_project\output_gtfs_graph_24hr_frame\gtfs_export_cap_haitien_merged_impute_walk_v4_service0002.pickle")

In [124]:
print(nx.info(G))

Name: 
Type: MultiDiGraph
Number of nodes: 13194
Number of edges: 36058
Average in degree:   2.7329
Average out degree:   2.7329


In [125]:
list(G.edges)[0:15]

[(247844908, 3806054858, 0),
 (330521785, 2353905402, 0),
 (330521785, 330540551, 0),
 (330521785, 330567999, 0),
 (330522696, 330569698, 0),
 (330522696, 3875208374, 0),
 (330522696, 3875208369, 0),
 (330523218, 3600503070, 0),
 (330523218, 2472620389, 0),
 (330523218, 2472620422, 0),
 (330523479, 330548415, 0),
 (330523479, 620028871, 0),
 (330523479, 2510265415, 0),
 (330523638, 2314667832, 0),
 (330523638, 2306542320, 0)]

## Identify only the largest graph

In [126]:
# compatible with NetworkX 2.4
list_of_subgraphs = list(G.subgraph(c).copy() for c in nx.weakly_connected_components(G))
max_graph = None
max_edges = 0
for i in list_of_subgraphs:
    if i.number_of_edges() > max_edges:
        max_edges = i.number_of_edges()
        max_graph = i

# set your graph equal to the largest sub-graph
G_largest = max_graph

In [127]:
# print info about the largest sub-graph
print(nx.info(G_largest))

Name: 
Type: MultiDiGraph
Number of nodes: 13194
Number of edges: 36058
Average in degree:   2.7329
Average out degree:   2.7329


In [128]:
# inspect
# gn.save(G_largest,"gtfs_export_cap_haitian_merged_impute_walk_largest",r"C:\Users\war-machine\Documents\world_bank_work\haiti_gtfs_project\output_gtfs_graph_24hr_frame")

In [129]:
# insert origins
origins = gpd.read_file(r"C:\repos\GOST_PublicGoods\Implementations\gtfs_example\input_folder\cap_haitien_worldpop_pts2.shp")

In [130]:
# snap the origins to the road graph
snapped_origins = gn.pandana_snap(G, origins, source_crs = 'epsg:4326', target_crs = 'epsg:32619')

In [131]:
snapped_origins

Unnamed: 0,VALUE,geometry,NN,NN_dist
0,3.630365,POINT (-72.38833 19.81667),616755742,375.410681
1,3.881798,POINT (-72.38667 19.81667),616755742,351.747309
2,4.394933,POINT (-72.38250 19.81667),616755742,590.012777
3,4.479361,POINT (-72.38167 19.81667),616755742,662.445228
4,3.633273,POINT (-72.38833 19.81583),616755742,291.331109
...,...,...,...,...
182971,1.172181,POINT (-72.18667 19.49583),616038064,6766.523465
182972,1.175843,POINT (-72.18583 19.49583),616038064,6733.274830
182973,1.067793,POINT (-72.18500 19.49583),616038064,6701.005202
182974,1.201748,POINT (-72.18417 19.49583),616038064,6669.728790


In [132]:
# filter out origins with a distance more than 2km
snapped_origins_filtered = snapped_origins[snapped_origins.NN_dist < 2000]

In [133]:
snapped_origins_filtered

Unnamed: 0,VALUE,geometry,NN,NN_dist
0,3.630365,POINT (-72.38833 19.81667),616755742,375.410681
1,3.881798,POINT (-72.38667 19.81667),616755742,351.747309
2,4.394933,POINT (-72.38250 19.81667),616755742,590.012777
3,4.479361,POINT (-72.38167 19.81667),616755742,662.445228
4,3.633273,POINT (-72.38833 19.81583),616755742,291.331109
...,...,...,...,...
181500,8.905331,POINT (-71.74000 19.52250),2496045487,635.151029
181501,9.385510,POINT (-71.73917 19.52250),2496045487,583.872113
181593,8.597751,POINT (-71.74250 19.52167),2496045514,743.485636
181594,9.031757,POINT (-71.74167 19.52167),2496045514,753.712825


In [134]:
originNodes = list(snapped_origins_filtered['NN'].unique())

In [135]:
originNodes

[616755742,
 330739389,
 616556652,
 616556665,
 616553566,
 616556661,
 616553549,
 616556670,
 616553537,
 616556642,
 616556668,
 616722848,
 616722841,
 616722834,
 616722832,
 616722835,
 330748594,
 2322398723,
 2378164779,
 330728487,
 2378159627,
 7101639468,
 2378164775,
 2378159625,
 2378168605,
 2378168608,
 2378158461,
 763384845,
 2774734763,
 3158035816,
 2378159620,
 2225575669,
 330745690,
 2275968008,
 2225604782,
 620843684,
 330736645,
 330743585,
 2225575422,
 330747430,
 2225604715,
 620855485,
 620855507,
 330739265,
 620855481,
 620855480,
 7101671123,
 'XHQ66_stop_0176',
 2239959280,
 6647538635,
 2322396300,
 7101671182,
 616085737,
 2239959285,
 2239959278,
 'XHQ66_stop_0074',
 620855475,
 2239959268,
 2239959275,
 'XHQ66_stop_0036',
 330751393,
 2285106105,
 2413706138,
 620855473,
 620855476,
 2413706133,
 620855897,
 620855464,
 620855460,
 2322391135,
 330743292,
 2385795482,
 2275907685,
 620843229,
 7101671051,
 2240032201,
 2413706127,
 6733460167,
 620

In [136]:
# insert destinations
destinations = gpd.read_file(r"C:\repos\GOST_PublicGoods\Implementations\gtfs_example\output_folder\osm_infrastructure\osm_shops_and_amenities.shp")

In [137]:
# snap the destinations to the road graph
snapped_destinations = gn.pandana_snap(G, destinations, source_crs = 'epsg:4326', target_crs = 'epsg:32619')

In [138]:
snapped_destinations

Unnamed: 0,amenity,name,geometry,NN,NN_dist
0,,Digicel,POINT (-72.20152 19.76129),614923497,23.868344
1,,Kokiyaj Market,POINT (-72.19571 19.76432),1943288770,20.542019
2,,,POINT (-72.19690 19.76243),632753456,25.368709
3,,Kaina Auto Parts,POINT (-72.20864 19.74606),2308830942,16.645110
4,,,POINT (-72.21934 19.73902),XHQ66_stop_0203,11.556574
...,...,...,...,...,...
1322,pharmacy,,POINT (-72.22745 19.72820),2126717272,17.620091
1323,social_facility,Orphelinat Martin- Lutter,POINT (-71.83110 19.66323),2351342145,12.791557
1324,fuel,Station Saint Joseph de Fort Liberté,POINT (-71.82606 19.63550),3448864609,11.866230
1325,restaurant,,POINT (-72.25573 19.71959),6279224863,6.925306


In [139]:
destinationsNodes = list(snapped_destinations['NN'].unique())

## Calculate OD Matrix

In [140]:
OD_matrix = gn.calculate_OD(G, originNodes, destinationsNodes, fail_value=-1, weight='length')

In [141]:
OD_matrix

array([[16142.41726913, 16567.57361602, 16336.90561602, ...,
        22163.34669169, 14530.3493461 , 16414.55361602],
       [15415.02446913, 15840.18081602, 15609.51281602, ...,
        21435.95389169, 13802.9565461 , 15687.16081602],
       [15865.89886913, 16291.05521602, 16060.38721602, ...,
        21886.82829169, 14253.8309461 , 16138.03521602],
       ...,
       [ 8481.11204557,  8851.64374612,  8620.97574612, ...,
         3622.4217951 ,  9598.78800448,  8698.62374612],
       [ 8813.19604557,  9183.72774612,  8953.05974612, ...,
         3954.5057951 ,  9930.87200448,  9030.70774612],
       [ 8518.53444557,  8889.06614612,  8658.39814612, ...,
         3659.8441951 ,  9636.21040448,  8736.04614612]])

In [142]:
type(OD_matrix)

numpy.ndarray

In [143]:
OD_df = pd.DataFrame(OD_matrix, columns = destinationsNodes , index = originNodes)

In [144]:
OD_df

Unnamed: 0,614923497,1943288770,632753456,2308830942,XHQ66_stop_0203,616796838,2213085650,617193536,617193530,2339710105,...,2275907586,614882995,2473182596,331018289,619018153,XHQ66_stop_0238,2351342145,3448864609,6279224863,330749918
616755742,16142.417269,16567.573616,16336.905616,14959.541802,14719.484275,15477.381136,18095.252932,17919.908132,18018.681732,17731.186504,...,17249.335216,15888.104816,21486.175442,14488.952000,14090.323200,14597.973980,25073.143016,22163.346692,14530.349346,16414.553616
330739389,15415.024469,15840.180816,15609.512816,14232.149002,13992.091475,14749.988336,17367.860132,17192.515332,17291.288932,17003.793704,...,16521.942416,15160.712016,20758.782642,13761.559200,13362.930400,13870.581180,24345.750216,21435.953892,13802.956546,15687.160816
616556652,15865.898869,16291.055216,16060.387216,14683.023402,14442.965875,15200.862736,17818.734532,17643.389732,17742.163332,17454.668104,...,16972.816816,15611.586416,21209.657042,14212.433600,13813.804800,14321.455580,24796.624616,21886.828292,14253.830946,16138.035216
616556665,15811.186869,16236.343216,16005.675216,14628.311402,14388.253875,15146.150736,17764.022532,17588.677732,17687.451332,17399.956104,...,16918.104816,15556.874416,21154.945042,14157.721600,13759.092800,14266.743580,24741.912616,21832.116292,14199.118946,16083.323216
616553566,15623.132469,16048.288816,15817.620816,14440.257002,14200.199475,14958.096336,17575.968132,17400.623332,17499.396932,17211.901704,...,16730.050416,15368.820016,20966.890642,13969.667200,13571.038400,14078.689180,24553.858216,21644.061892,14011.064546,15895.268816
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2496045465,8479.614446,8850.146146,8619.478146,8104.851681,8320.965535,8009.446446,7979.713488,7804.368688,7903.142288,7615.647060,...,9531.907746,8170.677346,3501.236800,13491.345571,13092.716771,8402.365891,6530.720519,3620.924195,9597.290404,8697.126146
2496352753,8535.999246,8906.530946,8675.862946,8161.236481,8377.350335,8065.831246,8036.098288,7860.753488,7959.527088,7672.031860,...,9588.292546,8227.062146,3557.621600,13547.730371,13149.101571,8458.750691,6587.105319,3677.308995,9653.675204,8753.510946
2496045460,8481.112046,8851.643746,8620.975746,8106.349281,8322.463135,8010.944046,7981.211088,7805.866288,7904.639888,7617.144660,...,9533.405346,8172.174946,3502.734400,13492.843171,13094.214371,8403.863491,6532.218119,3622.421795,9598.788004,8698.623746
2496352697,8813.196046,9183.727746,8953.059746,8438.433281,8654.547135,8343.028046,8313.295088,8137.950288,8236.723888,7949.228660,...,9865.489346,8504.258946,3834.818400,13824.927171,13426.298371,8735.947491,6864.302119,3954.505795,9930.872004,9030.707746


In [145]:
# 60 minute threshold
threshold_in_sec = 60 * 60
threshold_in_sec

1800

## calculate accessibility
number of economic indicators within 30 minutes or less

In [146]:
accessibility_measure = OD_df[OD_df <= threshold_in_sec].count(axis=1)

In [147]:
len(list(accessibility_measure))

8887

In [148]:
results = pd.DataFrame([originNodes, list(accessibility_measure)]).transpose()
colName = "accessibility_measure"
results.columns = ['NN', colName]
results

Unnamed: 0,NN,accessibility_measure
0,616755742,0
1,330739389,0
2,616556652,0
3,616556665,0
4,616553566,0
...,...,...
8882,2496045465,0
8883,2496352753,0
8884,2496045460,0
8885,2496352697,0


In [149]:
output = snapped_origins_filtered.copy()
output = pd.merge(output, results, on="NN")
output

Unnamed: 0,VALUE,geometry,NN,NN_dist,accessibility_measure
0,3.630365,POINT (-72.38833 19.81667),616755742,375.410681,0
1,3.881798,POINT (-72.38667 19.81667),616755742,351.747309,0
2,4.394933,POINT (-72.38250 19.81667),616755742,590.012777,0
3,4.479361,POINT (-72.38167 19.81667),616755742,662.445228,0
4,3.633273,POINT (-72.38833 19.81583),616755742,291.331109,0
...,...,...,...,...,...
162507,8.280589,POINT (-71.75333 19.52500),2496352697,209.087792,0
162508,7.810264,POINT (-71.75250 19.52417),2496352697,138.256164,0
162509,8.254992,POINT (-71.75250 19.52500),2496352745,149.820614,0
162510,8.222400,POINT (-71.75167 19.52500),2496352745,63.375339,0


In [150]:
#convert travel_time_to_closest_facility to number
output["accessibility_measure"] = pd.to_numeric(output["accessibility_measure"])

In [151]:
# save a shapefile...
destinations_gpd = gpd.GeoDataFrame(output, crs = "epsg:4326", geometry = 'geometry')
destinations_gpd.to_file(r"C:\repos\GOST_PublicGoods\Implementations\gtfs_example\output_folder\cap_haitien_accessibility_economic.shp")

## Testing: find a shortest path

In [152]:
nx.dijkstra_path(G, 763384845, 2240032201, weight='length')

[763384845, 2225575422, 616085737, 6970559909, 2240032154, 2240032201]

In [153]:
# Look up route cost via graph
nx.dijkstra_path_length(G, 763384845, 2240032201, weight='length')

969.01952

In [154]:
nx.dijkstra_path(G, 629215322, 2333192306, weight='length')

[629215322,
 'XHQ66_stop_0150',
 'XHQ66_stop_0104',
 'XHQ66_stop_0121',
 2333192291,
 2333192306]

In [155]:
nx.dijkstra_path(G, 2333192306,629215322, weight='length')

[2333192306,
 2333192291,
 'XHQ66_stop_0200',
 'XHQ66_stop_0156',
 'XHQ66_stop_0123',
 'XHQ66_stop_0004',
 'XHQ66_stop_0041',
 629215322]