# Step 3 - Snap CXB OD Estimates

This notebook is the final step of calculating the base OD matrices which underlie all GOSTNets-derived accessibility analysis. It's also the slowest step

In [5]:
import geopandas as gpd
import pandas as pd
import os, sys, time, importlib

# add to your system path the location of the LoadOSM.py and GOSTnet.py scripts
# sys.path.append("../../../GOSTnets/GOSTnets")

import GOSTnets as gn
import importlib
# import Network_Clean as gnClean
importlib.reload(gn)

import networkx as nx
import osmnx
from shapely.geometry import Point
import numpy as np

import rasterio
from rasterio import features
from shapely.wkt import loads

import ipyparallel as ipp

### Setup

paths

In [6]:
input_pth = r'inputs\\dests'
geo_pth = r'../../../GEO'
fin_pth = 'final'

pickles

In [7]:
fin_pckle = r'final_current_G.pickle'
upgr_all = r'final_upgrade_all_G.pickle'
upgr_nosouth = r'final_upgrade_nosouth_G.pickle'
upgr_noferry = r'final_upgrade_noferry_G.pickle'

In [8]:
G_current = nx.read_gpickle(os.path.join(fin_pth, fin_pckle))
G_upgr_all = nx.read_gpickle(os.path.join(fin_pth, upgr_all))
G_upgr_nosouth = nx.read_gpickle(os.path.join(fin_pth, upgr_nosouth))
G_upgr_noferry = nx.read_gpickle(os.path.join(fin_pth, upgr_noferry))

origins

In [10]:
# grid_name = r'hrsl_2018_cxb_pts_snapped.csv'
grid_name = r'growth_center_origins_snapped.csv'
origins = pd.read_csv(os.path.join(fin_pth,grid_name))
grid = origins

destinations

In [11]:
cxb_fil = r'cxb_ctr.shp'
chitt_fil = r'chittagong.shp'
health_fil = r'hc_merge_200324_4326.shp'
primary_fil = r'schools/school_category_primary.gpkg'
secondary_fil = r'schools/school_category_secondary.gpkg'
tertiary_fil = r'schools/school_category_tertiary.gpkg'
matar_fil = r'martarbari.shp'
mkts_fil = r'mkts_merge_4326.shp'
gc_fil = r'cxb_lged_gc_moved_4326.shp'

In [12]:
dests = {"CXB" : cxb_fil, "Chittagong" : chitt_fil, "Health" : health_fil, \
         "Primary_education" : primary_fil, "Secondary_education" : secondary_fil, "Tertiary_education" : tertiary_fil, \
         "Martarbari" : matar_fil, "All_markets" : mkts_fil, "Growth_centers" : gc_fil}

# note you can use smaller / larger dest dictionaries as needed
# This is helpful for going back and re-running only certain destinations, or adding in new ones.

# dests = {"CXB" : cxb_fil}

scenarios (for looping)

In [13]:
# the dict here allows us to match a label to a pickle

scenarios = {'current' : [G_current,'_current_snapped.csv'],\
             'upgrade_all' : [G_upgr_all,'_ua_snapped.csv'],\
             'upgrade_nosouth' : [G_upgr_nosouth,'_uns_snapped.csv'],\
             'upgrade_noferry' : [G_upgr_noferry,'_unf_snapped.csv']}

Settings

In [14]:
walk_speed = 4.5
WGS = {'epsg':'4326'}
measure_crs = {'epsg':'32646'}
# date = 'May2020'

#### Troubleshooting

In [10]:
# dests = {"All_markets" : mkts_fil, "Growth_centers" : gc_fil}
# cur = {'current' : [G_current,'_current_snapped.csv']}

# for scen, values in cur.items():
# #     namestr(values, globals())
#     od_routine(values[0],scen,values[1])

# for scen, values in cur.items():
#     add_walking_time(scen,values[1])

In [14]:
# for u, v, data in G_upgr_all.edges(data=True):
#     if data['osm_id'] == '244861071':
#         print(data['infra_type'])

In [15]:
# for u, v, data in G_upgr_nosouth.edges(data=True):
#     if data['osm_id'] == '244861071':
#         print(data['infra_type'])

#### Define functions that will generate OD matrices, and then generate direct walking times from origins to destinations

The benefit of functions is they allow us to loop the analysis over the dictionary items

These functions are big, messy, and adapted from old GOSTNets code. Though functional, the code could likely be streamlined as it's tricky to troubleshoot.

In [15]:
def od_routine(G_input,scenario='',snap_ending='_snapped.csv'):
    
    origins_pop_nodes = list(set(origins.NN)) # consolidates by shared nodes. 
    
    for dest_type, fpth in dests.items():

        snapfile = dest_type + snap_ending

        print(dest_type)

        dest = pd.read_csv(os.path.join(fin_pth,snapfile))
        dest_nodes = list(set(dest.NN))

        print(len(list(set(dest.NN))))

        od_time = gn.calculate_OD(G_input, origins=origins_pop_nodes, 
                              destinations=dest_nodes, fail_value=99999999, weight='time')

        od_time_df = pd.DataFrame(od_time, index=origins_pop_nodes, columns=dest_nodes)

        print(od_time_df.shape)

        # Add walking time (from origin to NN) for each OD

        # origins_join = origins_pop_snapped.merge(od_time_df, how='left', on='NN')
        origins['NN_dist_seconds'] = ((origins.NN_dist / 1000) / walk_speed) * 60 * 60
        origins_join = origins.join(od_time_df, on='NN', rsuffix="dist_")

    #     print(origins_join.head())

        origins_join.columns[6:len(origins_join.columns)]

        origins_join.to_csv(os.path.join(fin_pth,'origins_walktime_{}_NN_{}.csv'.format(scenario,dest_type)))
        od_time_df.to_csv(os.path.join(fin_pth,'OD_matrix_{}_NN_{}.csv'.format(scenario,dest_type)))

In [16]:
def add_walking_time(scenario='',snap_ending='_snapped.csv'):
    # Main method
    
    print(scenario)

    for dest_type, fpth in dests.items():

        snapfile = dest_type + snap_ending

        print(dest_type)

        OD_name = r'OD_matrix_{}_NN_{}.csv'.format(scenario,dest_type)

        OD = pd.read_csv(os.path.join(fin_pth, OD_name))
        # OD = od_time_df

        OD = OD.rename(columns = {'Unnamed: 0':'O_ID'})
        OD = OD.set_index('O_ID')
        OD = OD.replace([np.inf, -np.inf], np.nan)

    #     # Filtering by only desired destination in an all-destination OD matrix.
    #     # Skipping for now

        od_dest_df = pd.read_csv(os.path.join(fin_pth,snapfile))
        od_dest_df['geometry'] = od_dest_df['geometry'].apply(loads)
#         od_dest_gdf = gpd.GeoDataFrame(od_dest_df, crs = {'init':'epsg:4326'}, geometry = 'geometry')

    #     accepted_facilities = list(set(list(acceptable_df.NN)))
    #     accepted_facilities_str = [str(i) for i in accepted_facilities]

    #     print(accepted_facilities)
    #     print(accepted_facilities_str)
    #     # OD = OD_original[accepted_facilities_str] # not necessary, already done
    #     # acceptable_df.to_csv(os.path.join(basepth,'Output','%s.csv' % subset))

        # Computing walk time from network to destination

        dest = pd.read_csv(os.path.join(fin_pth,snapfile))

        dest_df = dest[['NN','NN_dist']]
        dest_df = dest_df.set_index('NN')

        dest_df['NN_dist'] = dest_df['NN_dist'] / 1000 * 3600 / walk_speed
        dest_df.index = dest_df.index.map(str)

        d_f = OD.transpose()

        for i in d_f.columns:
            dest_df[i] = d_f[i]

        for i in dest_df.columns:
            if i == 'NN_dist':
                pass
            else:
                dest_df[i] = dest_df[i] + dest_df['NN_dist']
                
        dest_df = dest_df.drop('NN_dist', axis = 1)
        dest_df = dest_df.transpose()
        dest_df['min_time'] = dest_df.min(axis = 1)

#         dest_df['geometry'] = dest_df['geometry'].apply(loads)
        dest_gdf = gpd.GeoDataFrame(od_dest_df, geometry = 'geometry', crs = {'init':'epsg:4326'})

        # Add walk time from origin to network

        grid = pd.read_csv(os.path.join(fin_pth, grid_name))
        grid = grid.rename(columns = {'NN':'O_ID','NN_dist':'walk_to_road_net'})
        grid = grid.set_index(grid['O_ID'])

        grid['on_network_time'] = dest_df['min_time']
        grid['walk_to_road_net'] = grid['walk_to_road_net'] / 1000 * 3600 / walk_speed 
        grid['total_time_net'] = grid['on_network_time'] + grid['walk_to_road_net']

#         print(grid.head())

        grid['geometry'] = grid['geometry'].apply(loads)
        o_2_d = gpd.GeoDataFrame(grid, crs = {'init':'epsg:4326'}, geometry = 'geometry')

            # Snapping!

        print('start of snapping: %s\n' % time.ctime())
        o_2_d = gn.pandana_snap_points(o_2_d, 
                                   dest_gdf, # eventually just use dest_gdf
                                   source_crs='epsg:4326',
                                   target_crs='epsg:32646',
                                   add_dist_to_node_col = True)
        print('\nend of snapping: %s' % time.ctime())
        print('\n--- processing complete')

            # Recalculating the resulting walking times into seconds and minutes.
            # Make sure that if walking is faster than on-network travel, it prefers walking

        o_2_d['walk_time_direct'] = o_2_d['idx_dist'] / 1000 * 3600 / walk_speed

        grid['walk_time_direct'] = o_2_d['walk_time_direct']

        # grid['PLOT_TIME_SECS'] = grid[['total_time_net']].min(axis = 1)
        # grid['PLOT_TIME_SECS'] = grid[['walk_to_road_net','total_time_net']].min(axis = 1)

            # The city locations / port location don't have walk_time_direct values so we use if/else logic to work around them.

        if 'walk_time_direct' in grid.columns:
            grid['PLOT_TIME_SECS'] = grid[['walk_time_direct','total_time_net']].min(axis = 1)
        else:
            grid['PLOT_TIME_SECS'] = grid[['total_time_net']]

        grid['PLOT_TIME_MINS'] = grid['PLOT_TIME_SECS'] / 60

        if 'walk_time_direct' in grid.columns:
            def choice(x):
                if x.walk_time_direct < x.total_time_net:
                    return 'walk'
                else:
                    return 'net'

            grid['choice'] = grid.apply(lambda x: choice(x), axis = 1)
            grid['choice'].value_counts()

#         print(grid.head())

        # Export

        grid.to_csv(os.path.join(fin_pth,'final_cxb_{}_od_grid_{}.csv'.format(scenario,dest_type)))

### Load origins and destinations, get unique origin nodes, run OD, export

Run the OD routine function on all destinations for the 3 different scenarios

In [17]:
for scen, values in scenarios.items():
    od_routine(values[0],scen,values[1])

CXB
1
(34, 1)
CXB
1
(34, 1)
CXB
1
(34, 1)
CXB
1
(34, 1)


### Import completed OD matrix, calcualte walking times from origins to the destinations

In [18]:
for scen, values in scenarios.items():
    add_walking_time(scen,values[1])

current
CXB
start of snapping: Fri Jun 12 14:54:38 2020


  return _prepare_from_string(" ".join(pjargs))




end of snapping: Fri Jun 12 14:54:39 2020

--- processing complete
upgrade_all
CXB


  return _prepare_from_string(" ".join(pjargs))


start of snapping: Fri Jun 12 14:54:39 2020


end of snapping: Fri Jun 12 14:54:39 2020

--- processing complete
upgrade_nosouth
CXB


  return _prepare_from_string(" ".join(pjargs))


start of snapping: Fri Jun 12 14:54:39 2020


end of snapping: Fri Jun 12 14:54:40 2020

--- processing complete
upgrade_noferry
CXB


  return _prepare_from_string(" ".join(pjargs))


start of snapping: Fri Jun 12 14:54:40 2020


end of snapping: Fri Jun 12 14:54:40 2020

--- processing complete


### That's it

You now have completed OD matrices for everything