# Step 3: Cap Haitien GTFS Accessibility Analysis
Running travel time to closest facility for health facilities, schools, and markets

In [449]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [450]:
import osmnx as ox
import pandas as pd
import geopandas as gpd
import networkx as nx
import numpy as np
from shapely.geometry import Point

In [451]:
import os, sys

In [452]:
# Get reference to GOSTNets
import sys
sys.path.append(r'C:\repos\GOSTnets')
import GOSTnets as gn

### Load graphs

In [453]:
# original graph
original_G = nx.read_gpickle(r"temp\cap_haitien_walk_w_ferries_via_osmnx.pickle")

In [454]:
# read back your graphs from step 2 from your saved pickle
G_service0001 = nx.read_gpickle(r"temp\gtfs_export_cap_haitien_merged_impute_walk_adv_snap_service0001.pickle")
G_service0002 = nx.read_gpickle(r"temp\gtfs_export_cap_haitien_merged_impute_walk_adv_snap_service0002.pickle")

In [455]:
#print(nx.info(G))

In [456]:
#list(G.edges)[0:15]

In [457]:
graphs = {'G_service0001': G_service0001, 'G_service0002': G_service0002}
#graphs = {'G_service0002': G_service0002}

## Load Data

In [458]:
# load destinations
health_destinations = gpd.read_file(r"input_folder\cap_haitien_health_pts.shp")
school_destinations = gpd.read_file(r"input_folder\cap_haitien_schools_DPCE_20180709.shp")
market_destinations = gpd.read_file(r"input_folder\cap_haitien_markets.shp")
shops_and_amenities = gpd.read_file(r"output_folder\osm_infrastructure\osm_shops_and_amenities.shp")

In [459]:
destinations_dict = {'health_destinations': health_destinations, 'school_destinations': school_destinations, 'market_destinations': market_destinations}
#destinations_dict = {'market_destinations': market_destinations}
#destinations_dict = {'shops_and_amenities': shops_and_amenities}

### Load Origins this way because using Advanced Snapping

In [460]:
# read in origin_nodes
originNodes = gpd.read_file(r"temp/origin_nodes.csv", GEOM_POSSIBLE_NAMES="geometry", KEEP_GEOM_COLUMNS="NO")

In [461]:
originNodes['node_ID'] = pd.to_numeric(originNodes['node_ID'])
originNodes['VALUE'] = pd.to_numeric(originNodes['VALUE'])

In [462]:
originNodes_list = list(originNodes['node_ID'])

In [463]:
originNodes_list

[1110000000,
 1110000001,
 1110000002,
 1110000003,
 1110000004,
 1110000005,
 1110000006,
 1110000007,
 1110000008,
 1110000009,
 1110000010,
 1110000011,
 1110000012,
 1110000013,
 1110000014,
 1110000015,
 1110000016,
 1110000017,
 1110000018,
 1110000019,
 1110000020,
 1110000021,
 1110000022,
 1110000023,
 1110000024,
 1110000025,
 1110000026,
 1110000027,
 1110000028,
 1110000029,
 1110000030,
 1110000031,
 1110000032,
 1110000033,
 1110000034,
 1110000035,
 1110000036,
 1110000037,
 1110000038,
 1110000039,
 1110000040,
 1110000041,
 1110000042,
 1110000043,
 1110000044,
 1110000045,
 1110000046,
 1110000047,
 1110000048,
 1110000049,
 1110000050,
 1110000051,
 1110000052,
 1110000053,
 1110000054,
 1110000055,
 1110000056,
 1110000057,
 1110000058,
 1110000059,
 1110000060,
 1110000061,
 1110000062,
 1110000063,
 1110000064,
 1110000065,
 1110000066,
 1110000067,
 1110000068,
 1110000073,
 1110000074,
 1110000075,
 1110000076,
 1110000077,
 1110000078,
 1110000079,
 1110000086,

#### For each destination for each type of graph do an accessibility analysis
We are going to snap destinations to the original graph because we don't want the destinations snapping to a newly created origin node or to a GTFS line in the merged graph. We are also building statistics tables.

In [464]:
for G in graphs.items():
    for destination in destinations_dict.items():
        d = {'label':[],'population':[]}
        # snap the destinations to the road graph
        snapped_destinations = gn.pandana_snap(original_G, destination[1], source_crs = 'epsg:4326', target_crs = 'epsg:32619')
        destinationsNodes = list(snapped_destinations['NN'].unique())
        #print("print destinationsNodes")
        #print(destinationsNodes)
        # Calculate OD Matrix
        OD_matrix = gn.calculate_OD(G[1], originNodes_list, destinationsNodes, fail_value=-1, weight='length')
        avg_trip_time = np.mean(OD_matrix)
        print(f"average trip time for {destination[0]} with the {G[0]} graph is: {avg_trip_time/60} minutes")
        # calculate accessibility
        # For each row, the closest facility is the smallest value in the row
        closest_facility_per_origin = OD_matrix.min(axis=1)
        results = pd.DataFrame([originNodes_list, closest_facility_per_origin]).transpose()
        colName = "travel_time_to_closest_facility"
        results.columns = ['node_ID', colName]
        # output = snapped_origins_filtered.copy()
        output = originNodes.copy()
        output = pd.merge(output, results, on="node_ID")
        # convert travel_time_to_closest_facility to number
        output["travel_time_to_closest_facility"] = pd.to_numeric(output["travel_time_to_closest_facility"])
        output["trav_t_min"] = output["travel_time_to_closest_facility"] / 60
        
        # build statistics table
        # 0-15min
        print("0-15min")
        print(output[output['travel_time_to_closest_facility'] <= 900]['VALUE'].sum())
        d['label'].append('0-15min population sum')
        d['population'].append(output[output['travel_time_to_closest_facility'] <= 900]['VALUE'].sum())
        # 15-30min
        print("15-30min")
        print(output[(output['travel_time_to_closest_facility'] > 900) & (output['travel_time_to_closest_facility'] <= 1800)]['VALUE'].sum())
        d['label'].append('15-30min population sum')
        d['population'].append(output[(output['travel_time_to_closest_facility'] > 900) & (output['travel_time_to_closest_facility'] <= 1800)]['VALUE'].sum())
        # 30-45min
        print("30-45min")
        print(output[(output['travel_time_to_closest_facility'] > 1800) & (output['travel_time_to_closest_facility'] <= 2700)]['VALUE'].sum())
        d['label'].append('30-45min')
        d['population'].append(output[(output['travel_time_to_closest_facility'] > 1800) & (output['travel_time_to_closest_facility'] <= 2700)]['VALUE'].sum())
        # 45-60min
        print("45-60min population sum:")
        print(output[(output['travel_time_to_closest_facility'] > 2700) & (output['travel_time_to_closest_facility'] <= 3600)]['VALUE'].sum())
        d['label'].append('45-60min')
        d['population'].append(output[(output['travel_time_to_closest_facility'] > 2700) & (output['travel_time_to_closest_facility'] <= 3600)]['VALUE'].sum())
        # 60min or greater
        print("60min or greater")
        print(output[(output['travel_time_to_closest_facility'] > 3600)]['VALUE'].sum())
        d['label'].append('60min or greater population sum')
        d['population'].append(output[(output['travel_time_to_closest_facility'] > 3600)]['VALUE'].sum())
        
        d_df = pd.DataFrame(d)
        d_df['population_pct'] = round(d_df.population / d_df.population.sum() * 100,2)
        
        # save a CSV of Table
        d_df.to_csv(fr"output_folder\table_cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.csv")
        
        # save a shapefile...
        destinations_gpd = gpd.GeoDataFrame(output, crs = "epsg:4326", geometry = 'geometry')
        destinations_gpd.to_file(fr"output_folder\cap_haitien_accessibility_adv_snap_{destination[0]}_{G[0]}.shp")


average trip time for shops_and_amenities with the G_service0001 graph is: 192.43857955905656 minutes
0-15min
414990.03854997
15-30min
175022.61785535
30-45min
115389.57555358001
45-60min population sum:
85371.77736549999
60min or greater
229559.45484783
average trip time for shops_and_amenities with the G_service0002 graph is: 192.57686342713976 minutes
0-15min
415271.27134067
15-30min
175301.78506045998
30-45min
114006.52753292
45-60min population sum:
85088.32213756
60min or greater
230665.55810062


## Create accessibility statistics for walking times to GTFS stops

In [445]:
# load stops
import partridge as ptg
path = r'input_folder/cap_haitien_gtfs.zip'
# from: http://simplistic.me/playing-with-gtfs.html
import datetime

service_ids_by_date = ptg.read_service_ids_by_date(path)
service_ids = service_ids_by_date[datetime.date(2019, 6, 29)]

print(f"service_ids is {service_ids}")

# view lets you filter before you load the feed. For example, below you are filtering by the service_ids
feed = ptg.load_feed(path, view={
    'trips.txt': {
        'service_id': service_ids,
    },
})

stops = feed.stops[['stop_id','stop_lat','stop_lon']]
stops_gdf_service0001 = gpd.GeoDataFrame(stops, geometry=gpd.points_from_xy(stops.stop_lon, stops.stop_lat), crs='epsg:4326')

# from: http://simplistic.me/playing-with-gtfs.html
import datetime

service_ids_by_date = ptg.read_service_ids_by_date(path)
service_ids = service_ids_by_date[datetime.date(2019, 7, 1)]

print(f"service_ids is {service_ids}")

# view lets you filter before you load the feed. For example, below you are filtering by the service_ids
feed = ptg.load_feed(path, view={
    'trips.txt': {
        'service_id': service_ids,
    },
})

stops = feed.stops[['stop_id','stop_lat','stop_lon']]

stops_gdf_service0002 = gpd.GeoDataFrame(stops, geometry=gpd.points_from_xy(stops.stop_lon, stops.stop_lat), crs='epsg:4326')

service_ids is frozenset({'service_0001'})
service_ids is frozenset({'service_0002'})


In [446]:
stops_gdf_service0002.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [447]:
stops_dict = {'stops_gdf_service0001': stops_gdf_service0001,'stops_gdf_service0002': stops_gdf_service0002}

In [448]:
for stop in stops_dict.items():
    d = {'label':[],'population':[]}
    # snap the stops to the road graph
    snapped_destinations = gn.pandana_snap(original_G, stop[1], source_crs = 'epsg:4326', target_crs = 'epsg:32619')
    destinationsNodes = list(snapped_destinations['NN'].unique())
    #print("print destinationsNodes")
    #print(destinationsNodes)
    # Calculate OD Matrix
    OD_matrix = gn.calculate_OD(G[1], originNodes_list, destinationsNodes, fail_value=-1, weight='length')
    avg_trip_time = np.mean(OD_matrix)
    print(f"average trip time for {stop[0]} with the walking graph is: {avg_trip_time/60} minutes")
    # calculate accessibility
    # For each row, the closest facility is the smallest value in the row
    closest_facility_per_origin = OD_matrix.min(axis=1)
    results = pd.DataFrame([originNodes_list, closest_facility_per_origin]).transpose()
    colName = "travel_time_to_closest_facility"
    results.columns = ['node_ID', colName]
    # output = snapped_origins_filtered.copy()
    output = originNodes.copy()
    output = pd.merge(output, results, on="node_ID")
    # convert travel_time_to_closest_facility to number
    output["travel_time_to_closest_facility"] = pd.to_numeric(output["travel_time_to_closest_facility"])
    output["trav_t_min"] = output["travel_time_to_closest_facility"] / 60

    # build statistics table
    # 0-15min
    print("0-15min")
    print(output[output['travel_time_to_closest_facility'] <= 900]['VALUE'].sum())
    d['label'].append('0-15min population sum')
    d['population'].append(output[output['travel_time_to_closest_facility'] <= 900]['VALUE'].sum())
    # 15-30min
    print("15-30min")
    print(output[(output['travel_time_to_closest_facility'] > 900) & (output['travel_time_to_closest_facility'] <= 1800)]['VALUE'].sum())
    d['label'].append('15-30min population sum')
    d['population'].append(output[(output['travel_time_to_closest_facility'] > 900) & (output['travel_time_to_closest_facility'] <= 1800)]['VALUE'].sum())
    # 30-45min
    print("30-45min")
    print(output[(output['travel_time_to_closest_facility'] > 1800) & (output['travel_time_to_closest_facility'] <= 2700)]['VALUE'].sum())
    d['label'].append('30-45min')
    d['population'].append(output[(output['travel_time_to_closest_facility'] > 1800) & (output['travel_time_to_closest_facility'] <= 2700)]['VALUE'].sum())
    # 45-60min
    print("45-60min population sum:")
    print(output[(output['travel_time_to_closest_facility'] > 2700) & (output['travel_time_to_closest_facility'] <= 3600)]['VALUE'].sum())
    d['label'].append('45-60min')
    d['population'].append(output[(output['travel_time_to_closest_facility'] > 2700) & (output['travel_time_to_closest_facility'] <= 3600)]['VALUE'].sum())
    # 60min or greater
    print("60min or greater")
    print(output[(output['travel_time_to_closest_facility'] > 3600)]['VALUE'].sum())
    d['label'].append('60min or greater population sum')
    d['population'].append(output[(output['travel_time_to_closest_facility'] > 3600)]['VALUE'].sum())

    d_df = pd.DataFrame(d)
    d_df['population_pct'] = round(d_df.population / d_df.population.sum() * 100,2)

    # save a CSV of Table
    d_df.to_csv(fr"output_folder\table_cap_haitien_accessibility_adv_snap_{stop[0]}_{G[0]}.csv")

average trip time for stops_gdf_service0001 with the walking graph is: 170.06307687635066 minutes
0-15min
289201.49201159
15-30min
194315.23239854
30-45min
126393.14602309
45-60min population sum:
89265.83113386
60min or greater
321157.76260515
average trip time for stops_gdf_service0002 with the walking graph is: 170.06307687635066 minutes
0-15min
289201.49201159
15-30min
194315.23239854
30-45min
126393.14602309
45-60min population sum:
89265.83113386
60min or greater
321157.76260515
