# Step 3: Floods Cap Haitien GTFS Accessibility Analysis, Flooded Scenario
Running travel time to closest facility for health facilities, schools, and markets

In [905]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [906]:
import osmnx as ox
import pandas as pd
import geopandas as gpd
import networkx as nx
import numpy as np
from shapely.geometry import Point

In [907]:
print(gpd.__version__)

0.9.0


In [908]:
import os, sys

In [909]:
# Get reference to GOSTNets
import sys
sys.path.append(r'C:\repos\GOSTnets')
import GOSTnets as gn

### Load graphs

In [910]:
# original graph
original_G = nx.read_gpickle(r"temp\flooded_clipped_cap_haitien_walk_w_ferries_via_osmnx.pickle")

In [911]:
G_walk_flooded = nx.read_gpickle(r"temp\flooded4_clipped_cap_haitien_walk_w_ferries_via_osmnx_w_time_adv_snap.pickle")

## When the G_walk_flooded graph was merged with the GTFS feed there were two spots on the map where the GTFS feed is in a flooded area. The merged G_walk_flooded and GTFS output from Step 2 was loaded into QGIS and saved as a shapefile, then the edges that intersected a flood zone were manually deleted. Then the shapefile was saved as a CSV edges file. We will now read the CSV edges file and the merged G_walk_flooded and GTFS nodes CSV and build a new graph.

In [912]:
fpath = r"temp\modded_4_G_walk_flooded_adv_snap_cap_haitien_gtfs_merged_impute_walk_service0001_edges.csv"
fpath_nodes = r"temp\4_G_walk_flooded_adv_snap_cap_haitien_gtfs_merged_impute_walk_service0001_nodes.csv"

### The edges_and_nodes_csv_to_graph function needs to be used (not the 'combo_csv_to_graph' function) because we need the 'x' and 'y' values from the nodes CSV to populate the attributes for the nodes in the graph. Not all of the edges have a geometry field, this is why we need this.

In [913]:
G_walk_flooded_w_gtfs = gn.edges_and_nodes_csv_to_graph(fpath_nodes, fpath, u_tag = 'stnode', v_tag = 'endnode', geometry_tag = 'WKT', largest_G = False)

  if (await self.run_code(code, result,  async_=asy)):


In [914]:
gn.example_node(G_walk_flooded_w_gtfs,5)

(330725346, {'Unnamed: 0': 0, 'y': 19.7546605, 'boarding_cost': 0.0, 'x': -72.204247, 'highway': nan, 'modes': nan, 'street_count': 4.0, 'geometry': 'POINT (-72.204247 19.7546605)'})
(614908804, {'Unnamed: 0': 1, 'y': 19.7545136, 'boarding_cost': 0.0, 'x': -72.2038608, 'highway': nan, 'modes': nan, 'street_count': 4.0, 'geometry': 'POINT (-72.2038608 19.7545136)'})
(614908783, {'Unnamed: 0': 2, 'y': 19.7548238, 'boarding_cost': 0.0, 'x': -72.2046789, 'highway': nan, 'modes': nan, 'street_count': 3.0, 'geometry': 'POINT (-72.2046789 19.7548238)'})
(616793092, {'Unnamed: 0': 3, 'y': 19.7542363, 'boarding_cost': 0.0, 'x': -72.2044334, 'highway': nan, 'modes': nan, 'street_count': 4.0, 'geometry': 'POINT (-72.2044334 19.7542363)'})
(9990000828, {'Unnamed: 0': 2716, 'y': 19.75498165089413, 'boarding_cost': 0.0, 'x': -72.20411588826202, 'highway': 'projected_pap', 'modes': nan, 'street_count': nan, 'geometry': 'POINT (-72.20411588826202 19.75498165089413)'})


In [915]:
graphs = {'G_walk_flooded': G_walk_flooded, 'G_walk_flooded_w_gtfs': G_walk_flooded_w_gtfs}

## Load Data

In [916]:
# load destinations
health_destinations = gpd.read_file(r"input_folder\clipped_cap_haitien_health_pts2.shp")
school_destinations = gpd.read_file(r"input_folder\clipped_cap_haitien_schools_DPCE_20180709.shp")
market_destinations = gpd.read_file(r"input_folder\clipped_cap_haitien_markets.shp")
#shops_and_amenities = gpd.read_file(r"output_folder\osm_infrastructure\osm_shops_and_amenities.shp")

In [917]:
#destinations_dict = {'market_destinations': market_destinations}
destinations_dict = {'health_destinations': health_destinations, 'school_destinations': school_destinations, 'market_destinations': market_destinations}

### Load Origins this way because using Advanced Snapping

In [918]:
# read in origin_nodes
originNodes = gpd.read_file(r"temp/flooded_clipped_origin_nodes_walk4.csv", GEOM_POSSIBLE_NAMES="geometry", KEEP_GEOM_COLUMNS="NO")

  for feature in features_lst:


In [919]:
originNodes

Unnamed: 0,field_1,node_ID,VALUE,geometry
0,2,1110000002,14.6941843,POINT (-72.24083 19.78750)
1,3,1110000003,14.85298538,POINT (-72.24000 19.78750)
2,4,1110000004,5.29464912,POINT (-72.23917 19.78750)
3,5,1110000005,4.81778097,POINT (-72.23833 19.78750)
4,6,1110000006,4.59485197,POINT (-72.23750 19.78750)
...,...,...,...,...
6603,13452,1110013452,6.29746342,POINT (-72.16500 19.68750)
6604,13453,1110013453,5.65966034,POINT (-72.16417 19.68750)
6605,13454,1110013454,4.99599981,POINT (-72.16333 19.68750)
6606,13456,1110013456,4.72098398,POINT (-72.16167 19.68750)


In [920]:
originNodes['node_ID'] = pd.to_numeric(originNodes['node_ID'])
originNodes['VALUE'] = pd.to_numeric(originNodes['VALUE'])

In [921]:
originNodes_list = list(originNodes['node_ID'])

In [922]:
originNodes_list

[1110000002,
 1110000003,
 1110000004,
 1110000005,
 1110000006,
 1110000015,
 1110000016,
 1110000017,
 1110000018,
 1110000019,
 1110000020,
 1110000021,
 1110000022,
 1110000023,
 1110000039,
 1110000040,
 1110000041,
 1110000042,
 1110000043,
 1110000044,
 1110000045,
 1110000046,
 1110000047,
 1110000048,
 1110000049,
 1110000050,
 1110000065,
 1110000066,
 1110000067,
 1110000068,
 1110000069,
 1110000070,
 1110000071,
 1110000072,
 1110000073,
 1110000074,
 1110000075,
 1110000076,
 1110000077,
 1110000078,
 1110000079,
 1110000080,
 1110000081,
 1110000103,
 1110000104,
 1110000105,
 1110000106,
 1110000107,
 1110000108,
 1110000109,
 1110000110,
 1110000111,
 1110000112,
 1110000113,
 1110000114,
 1110000115,
 1110000116,
 1110000117,
 1110000118,
 1110000119,
 1110000120,
 1110000121,
 1110000122,
 1110000123,
 1110000124,
 1110000125,
 1110000126,
 1110000127,
 1110000155,
 1110000156,
 1110000157,
 1110000158,
 1110000159,
 1110000160,
 1110000161,
 1110000162,
 1110000163,

In [923]:
len(originNodes_list)

6608

### Test one of the graphs to see if I get any fail values

In [924]:
snapped_destinations = gn.pandana_snap(original_G, school_destinations, source_crs = 'epsg:4326', target_crs = 'epsg:32619')
destinationsNodes = list(snapped_destinations['NN'].unique())

In [925]:
# Calculate OD Matrix
OD_matrix = gn.calculate_OD(G_walk_flooded, originNodes_list, destinationsNodes, fail_value=-1, weight='length')

In [926]:
OD_matrix

array([[ 8293.48197231, 13823.32849887,  2704.29938726, ...,
        10670.63698807,  9044.64072461, 13993.86326815],
       [ 8205.99287156, 13735.83939811,  2616.8102865 , ...,
        10583.14788731,  8957.15162385, 13906.37416739],
       [ 8137.03346011, 13666.87998667,  2547.85087506, ...,
        10514.18847587,  8888.19221241, 13837.41475595],
       ...,
       [12574.66483031,  7120.46464197, 21785.78459309, ...,
        10596.93817343, 11890.04195258,  6942.03711785],
       [12335.70065361,  6881.50046527, 21546.82041639, ...,
        10357.97399673, 11651.07777588,  6703.07294115],
       [12321.48687764,  6867.2866893 , 21532.60664042, ...,
        10343.76022075, 11636.86399991,  6688.85916518]])

In [927]:
type(OD_matrix)

numpy.ndarray

In [928]:
np.where(OD_matrix < 0)

(array([], dtype=int64), array([], dtype=int64))

In [929]:
print(np.count_nonzero(OD_matrix <0))

0


In [930]:
np.mean(OD_matrix)

7244.352407175028

In [931]:
OD_matrix.min(axis=1)

array([926.77265848, 839.28355772, 770.32414628, ..., 636.01687747,
       723.25368485, 709.03990888])

In [932]:
np.mean(OD_matrix.min(axis=1)) / 60

16.08482398417933

## tests to weigh travel times by population

In [933]:
OD_df = pd.DataFrame(OD_matrix, columns = destinationsNodes, index = originNodes_list)

In [934]:
OD_df

Unnamed: 0,614882981,619785271,2413706127,8886785154,619322127,7101624518,616806362,616773664,619304918,7340142826,...,623954708,623954674,2293738946,2277664896,623954794,620901904,2313555229,617013318,616811956,2840492078
1110000002,8293.481972,13823.328499,2704.299387,6850.525644,11183.871463,7499.513367,8466.191571,10939.926100,12656.016358,8757.526195,...,19431.850144,18998.702989,21356.404639,12958.817892,17211.593902,18609.591708,22591.074309,10670.636988,9044.640725,13993.863268
1110000003,8205.992872,13735.839398,2616.810287,6763.036543,11096.382362,7412.024266,8378.702470,10852.436999,12568.527257,8670.037094,...,19344.361043,18911.213888,21268.915538,12871.328791,17124.104802,18522.102608,22503.585209,10583.147887,8957.151624,13906.374167
1110000004,8137.033460,13666.879987,2547.850875,6694.077132,11027.422950,7343.064855,8309.743059,10783.477588,12499.567846,8601.077683,...,19275.401631,18842.254477,21199.956127,12802.369380,17055.145390,18453.143196,22434.625797,10514.188476,8888.192212,13837.414756
1110000005,8064.282660,13594.129187,2475.100075,6621.326332,10954.672150,7270.314055,8236.992259,10710.726788,12426.817046,8528.326883,...,19202.650832,18769.503677,21127.205327,12729.618580,16982.394590,18380.392396,22361.874997,10441.437676,8815.441413,13764.663956
1110000006,8054.715761,13584.562288,2465.533176,6611.759433,10945.105251,7260.747156,8227.425360,10701.159889,12417.250147,8518.759984,...,19193.083932,18759.936778,21117.638428,12720.051681,16972.827691,18370.825497,22352.308098,10431.870777,8805.874513,13755.097057
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1110013452,12691.868210,7237.668021,21902.987972,14468.311174,12831.641390,13510.399678,12546.554541,12587.696027,14303.786285,13102.303541,...,21079.620071,20646.472916,23004.174566,14606.587819,18859.363829,14236.332337,4851.153655,10714.141553,12007.245332,7059.240497
1110013453,12656.951944,7202.751756,21868.071707,14433.394908,12796.725124,13475.483413,12511.638276,12552.779762,14268.870020,13067.387275,...,21044.703806,20611.556651,22969.258301,14571.671554,18824.447564,14201.416072,4816.237390,10679.225287,11972.329067,7024.324232
1110013454,12574.664830,7120.464642,21785.784593,14351.107794,12714.438010,13393.196299,12429.351162,12470.492648,14186.582906,12985.100161,...,20962.416692,20529.269537,22886.971187,14489.384440,18742.160450,14119.128958,4733.950276,10596.938173,11890.041953,6942.037118
1110013456,12335.700654,6881.500465,21546.820416,14112.143618,12475.473834,13154.232122,12190.386986,12231.528471,13947.618729,12746.135985,...,20723.452515,20290.305360,22648.007010,14250.420263,18503.196274,13880.164781,4494.986099,10357.973997,11651.077776,6703.072941


In [935]:
closest_facility_per_origin = OD_matrix.min(axis=1)
results = pd.DataFrame([originNodes_list, closest_facility_per_origin]).transpose()
colName = "travel_time_to_closest_facility"
results.columns = ['NN', colName]

In [936]:
results

Unnamed: 0,NN,travel_time_to_closest_facility
0,1.110000e+09,926.772658
1,1.110000e+09,839.283558
2,1.110000e+09,770.324146
3,1.110000e+09,697.573346
4,1.110000e+09,688.006447
...,...,...
6603,1.110013e+09,753.220257
6604,1.110013e+09,718.303991
6605,1.110013e+09,636.016877
6606,1.110013e+09,723.253685


In [937]:
originNodes

Unnamed: 0,field_1,node_ID,VALUE,geometry
0,2,1110000002,14.694184,POINT (-72.24083 19.78750)
1,3,1110000003,14.852985,POINT (-72.24000 19.78750)
2,4,1110000004,5.294649,POINT (-72.23917 19.78750)
3,5,1110000005,4.817781,POINT (-72.23833 19.78750)
4,6,1110000006,4.594852,POINT (-72.23750 19.78750)
...,...,...,...,...
6603,13452,1110013452,6.297463,POINT (-72.16500 19.68750)
6604,13453,1110013453,5.659660,POINT (-72.16417 19.68750)
6605,13454,1110013454,4.996000,POINT (-72.16333 19.68750)
6606,13456,1110013456,4.720984,POINT (-72.16167 19.68750)


In [938]:
results['travel_time_to_closest_facility'] * originNodes['VALUE']

0       13618.168248
1       12465.866413
2        4078.596063
3        3360.755594
4        3161.287780
            ...     
6603     4743.377014
6604     4065.356613
6605     3177.540199
6606     3414.469060
6607     3244.752392
Length: 6608, dtype: float64

In [939]:
(results['travel_time_to_closest_facility'] * originNodes['VALUE']).sum()

101243916.389462

In [940]:
# weighted average
#https://pbpython.com/weighted-average.html
(results['travel_time_to_closest_facility'] * originNodes['VALUE']).sum() / originNodes['VALUE'].sum() / 60

6.096887969222319

#### For each destination for each type of graph do an accessibility analysis
We are going to snap destinations to the original graph because we don't want the destinations snapping to a newly created origin node or to a GTFS line in the merged graph. We are also building statistics tables.

### comment on analysis output: Compared to the non-flooded version. The GTFS flooded averages are faster. This is because the flooded network is smaller than the non-flooded network, and only origin nodes are included that are within 800 meters of the nearest node. Therefore the origin nodes that were too far away are not being included.

### Also, even though only a difference in 2,883 in population, the average travel time is not weighted by population, that is why there is a bigger difference...

In [942]:
d2 = {'label_graph':[],'label_destination':[],'avg_travel_time_minutes':[]}

for G in graphs.items():
    for destination in destinations_dict.items():
        d = {'label':[],'population':[]}
        # snap the destinations to the road graph
        snapped_destinations = gn.pandana_snap(original_G, destination[1], source_crs = 'epsg:4326', target_crs = 'epsg:32619')
        destinationsNodes = list(snapped_destinations['NN'].unique())
        #print("print destinationsNodes")
        #print(destinationsNodes)
        
        #check destinationsNodes in graph
        
        # Calculate OD Matrix
        OD_matrix = gn.calculate_OD(G[1], originNodes_list, destinationsNodes, fail_value=-1, weight='length')
        
        if np.count_nonzero(OD_matrix < 0) > 1:
            print(f"warning: number of failed routes is: {np.count_nonzero(OD_matrix <0)}")
        
        # calculate accessibility
        # For each row, the closest facility is the smallest value in the row
        closest_facility_per_origin = OD_matrix.min(axis=1)
        results = pd.DataFrame([originNodes_list, closest_facility_per_origin]).transpose()
        colName = "travel_time_to_closest_facility"
        results.columns = ['node_ID', colName]
        
        # weighted average
        #https://pbpython.com/weighted-average.html
        avg_trip_time_weighted = (results['travel_time_to_closest_facility'] * originNodes['VALUE']).sum() / originNodes['VALUE'].sum()
        avg_trip_time = results['travel_time_to_closest_facility'].mean()
        
        #avg_trip_time = np.mean(closest_facility_per_origin)
        print(f"weighted average trip time for closest {destination[0]} with the {G[0]} graph is: {avg_trip_time_weighted/60} minutes")
        print(f"average trip time for closest {destination[0]} with the {G[0]} graph is: {avg_trip_time/60} minutes")
        
        output = originNodes.copy()
        output = pd.merge(output, results, on="node_ID")
        # convert travel_time_to_closest_facility to number
        output["travel_time_to_closest_facility"] = pd.to_numeric(output["travel_time_to_closest_facility"])
        output["trav_t_min"] = output["travel_time_to_closest_facility"] / 60
                
        # build statistics table
        
        d2['label_graph'].append(f'{G[0]}')
        d2['label_destination'].append(f'{destination[0]}')
        d2['avg_travel_time_minutes'].append(f'{avg_trip_time_weighted/60}')
        
        # 0-15min
        print("0-15min")
        print(output[output['travel_time_to_closest_facility'] <= 900]['VALUE'].sum())
        d['label'].append('0-15min population sum')
        d['population'].append(output[output['travel_time_to_closest_facility'] <= 900]['VALUE'].sum())
        # 15-30min
        print("15-30min")
        print(output[(output['travel_time_to_closest_facility'] > 900) & (output['travel_time_to_closest_facility'] <= 1800)]['VALUE'].sum())
        d['label'].append('15-30min population sum')
        d['population'].append(output[(output['travel_time_to_closest_facility'] > 900) & (output['travel_time_to_closest_facility'] <= 1800)]['VALUE'].sum())
        # 30-45min
        print("30-45min")
        print(output[(output['travel_time_to_closest_facility'] > 1800) & (output['travel_time_to_closest_facility'] <= 2700)]['VALUE'].sum())
        d['label'].append('30-45min')
        d['population'].append(output[(output['travel_time_to_closest_facility'] > 1800) & (output['travel_time_to_closest_facility'] <= 2700)]['VALUE'].sum())
        # 45-60min
        print("45-60min population sum:")
        print(output[(output['travel_time_to_closest_facility'] > 2700) & (output['travel_time_to_closest_facility'] <= 3600)]['VALUE'].sum())
        d['label'].append('45-60min')
        d['population'].append(output[(output['travel_time_to_closest_facility'] > 2700) & (output['travel_time_to_closest_facility'] <= 3600)]['VALUE'].sum())
        # 60min or greater
        print("60min or greater")
        print(output[(output['travel_time_to_closest_facility'] > 3600)]['VALUE'].sum())
        d['label'].append('60min or greater population sum')
        d['population'].append(output[(output['travel_time_to_closest_facility'] > 3600)]['VALUE'].sum())
        
        d_df = pd.DataFrame(d)
        d_df['population_pct'] = round(d_df.population / d_df.population.sum() * 100,2)
        
        # save a CSV of Table
        d_df.to_csv(fr"output_folder\flooded4_clipped_table_cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.csv")
        
        # save a shapefile...
        destinations_gpd = gpd.GeoDataFrame(output, crs = "epsg:4326", geometry = 'geometry')
        destinations_gpd.to_file(fr"output_folder\flooded4_clipped_cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.shp")
        
d_df2 = pd.DataFrame(d2)
# save a CSV of Table
d_df2.to_csv(fr"output_folder\table_floods_cap_haitien_accessibility_adv_snap4.csv")


weighted average trip time for closest health_destinations with the G_walk_flooded graph is: 14.541676062836833 minutes
average trip time for closest health_destinations with the G_walk_flooded graph is: 23.304588071267474 minutes
0-15min
171935.62801662
15-30min
81411.05894509
30-45min
16858.21601945
45-60min population sum:
5496.2677661
60min or greater
1062.7479944299998


  destinations_gpd.to_file(fr"output_folder\flooded4_clipped_cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.shp")


weighted average trip time for closest school_destinations with the G_walk_flooded graph is: 6.096887969222319 minutes
average trip time for closest school_destinations with the G_walk_flooded graph is: 16.08482398417933 minutes
0-15min
249714.84537032997
15-30min
20483.57837628
30-45min
5129.8021159
45-60min population sum:
1283.24837717
60min or greater
152.44450201


  destinations_gpd.to_file(fr"output_folder\flooded4_clipped_cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.shp")


weighted average trip time for closest market_destinations with the G_walk_flooded graph is: 44.9095102685568 minutes
average trip time for closest market_destinations with the G_walk_flooded graph is: 60.307049548763054 minutes
0-15min
6675.879079820001
15-30min
48130.64125737
30-45min
104475.21112371
45-60min population sum:
72716.27844837
60min or greater
44765.90883242


  destinations_gpd.to_file(fr"output_folder\flooded4_clipped_cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.shp")


weighted average trip time for closest health_destinations with the G_walk_flooded_w_gtfs graph is: 12.669725761311858 minutes
average trip time for closest health_destinations with the G_walk_flooded_w_gtfs graph is: 20.96192028479575 minutes
0-15min
200822.97645091
15-30min
56437.13834292999
30-45min
14206.749404
45-60min population sum:
4338.28596912
60min or greater
958.76857473


  destinations_gpd.to_file(fr"output_folder\flooded4_clipped_cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.shp")


weighted average trip time for closest school_destinations with the G_walk_flooded_w_gtfs graph is: 5.803625415574818 minutes
average trip time for closest school_destinations with the G_walk_flooded_w_gtfs graph is: 14.727948065066386 minutes
0-15min
252444.48956688002
15-30min
18940.56583995
30-45min
4527.666905640001
45-60min population sum:
776.1728464
60min or greater
75.02358281999999


  destinations_gpd.to_file(fr"output_folder\flooded4_clipped_cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.shp")


weighted average trip time for closest market_destinations with the G_walk_flooded_w_gtfs graph is: 33.67790299980872 minutes
average trip time for closest market_destinations with the G_walk_flooded_w_gtfs graph is: 46.52702861561093 minutes
0-15min
8619.08599091
15-30min
122501.97048064001
30-45min
105083.47470001
45-60min population sum:
25101.022774279998
60min or greater
15458.364795849999


  destinations_gpd.to_file(fr"output_folder\flooded4_clipped_cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.shp")
