## Market Access for Central America using GOSTnets Raster
Defines a custom Market Access Indicator from urban clusters to economic hubs

### Cities include: Costa Rica, Dominican Republic, Nicaragua, Guatemala, Panama, El Salvador

### Inputs:
    - origins: urban clusters
    - destinations: economic hubs
    
Friction layer is the 2020 driving raster Global Friction Surface from the Malaria Access Project (https://malariaatlas.org/project-resources/accessibility-to-healthcare/)

## As of May 2023, use a python 3.8 environment, so that pandana can be installed

In [70]:
import sys, os, importlib

import rasterio as rio

import numpy as np
import pandas as pd
import geopandas as gpd
import osmnx as ox

import skimage.graph as graph

from rasterio.mask import mask
from rasterio import features

from shapely.geometry import box, Point, Polygon
from scipy.ndimage import generic_filter
#from pandana.loaders import osm

sys.path.append("../")



In [71]:
from tqdm import tqdm

In [72]:
sys.path.append(r"/Users/tomgertin/repos/GOSTnets")
import GOSTnets as gn

In [73]:
sys.path.append(r"/Users/tomgertin/repos/gostrocks/src")
import GOSTRocks.rasterMisc as rMisc
importlib.reload(rMisc)

<module 'GOSTRocks.rasterMisc' from '/Users/tomgertin/repos/gostrocks/src/GOSTRocks/rasterMisc.py'>

In [74]:
sys.path.append(r"/Users/tomgertin/repos/GOSTNets_Raster/src")
import GOSTNetsRaster.market_access as ma
importlib.reload(ma)

<module 'GOSTNetsRaster.market_access' from '/Users/tomgertin/repos/GOSTNets_Raster/src/GOSTNetsRaster/market_access.py'>

In [75]:
global_friction = r"/Users/tomgertin/data/2020_motorized_friction_surface.geotiff"

In [76]:
destinations_econ_hubs = r"inputs/large_urban_centers_cen_america.shp"
destinations_econ_hubs = gpd.read_file(destinations_econ_hubs)

In [77]:
# You need the bounding box to be a min bounding box, not the actual shape or else the results are weird 
#admin = r"inputs/central_america_min_bounding_box.shp"
admin = r"inputs/central_america_min_bounding_box.shp"

In [78]:
out_folder = r"outputs"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)

In [79]:
#global_friction_clipped = r"/Users/tomgertin/data/2020_motorized_friction_cen_america.geotiff"
global_friction_clipped = r"/Users/tomgertin/data/central_america_min_bounding_box_clipped2.geotiff"

In [80]:
#rMisc.clipRaster?

In [81]:
#clip global friction surface to admin area
if not os.path.exists(global_friction_clipped):
    rMisc.clipRaster(rio.open(global_friction), gpd.read_file(admin), global_friction_clipped)

In [82]:
# open friction surface
inR = rio.open(global_friction_clipped)

## create travel time surface

The actual units within the friction surface raster are minutes required to travel one meter. Therefore multiple by 1000 to get an approximate time in minutes it takes to cross a pixel, because 30-arcsec resolution pixel is approx. 1km by 1km at the equator.

At the equator, an arc-second of longitude approximately equals 30.87 meters. Arc-seconds of longitude decrease in a trigonometric cosine-based fashion as one moves toward the earth's poles (https://www.esri.com/news/arcuser/0400/wdside.html).

In [83]:
# import math

# # COS of 23.5 degrees * length of arc-sec at equator * 30-arcsec
# math.cos(math.radians(23.5)) * 30.87 * 30

#frictionD = inR.read()[0,:,:] * 849
frictionD = inR.read()[0,:,:]
# convert friction surface to traversal time (lazily). Original data are
#    the original data are minutes to travel 1 m, so we will convert to 
#    minutes to cross the cell
frictionD = frictionD * 1000

# Correct no data values
frictionD[frictionD < 0] = 99999999

mcp = graph.MCP_Geometric(frictionD)

# type(frictionD)

# inR.crs

In [84]:
type(inR)

rasterio.io.DatasetReader

In [85]:
frictionD.shape

(1668, 3000)

In [86]:
indices = list(np.ndindex(frictionD.shape))

In [87]:
#indices

In [88]:
xys = [inR.xy(ind[0], ind[1]) for ind in indices]

In [89]:
xys

[(-92.73750000000001, 20.495833333333334),
 (-92.72916666666667, 20.495833333333334),
 (-92.72083333333335, 20.495833333333334),
 (-92.7125, 20.495833333333334),
 (-92.70416666666668, 20.495833333333334),
 (-92.69583333333334, 20.495833333333334),
 (-92.68750000000001, 20.495833333333334),
 (-92.67916666666667, 20.495833333333334),
 (-92.67083333333333, 20.495833333333334),
 (-92.66250000000001, 20.495833333333334),
 (-92.65416666666667, 20.495833333333334),
 (-92.64583333333334, 20.495833333333334),
 (-92.6375, 20.495833333333334),
 (-92.62916666666668, 20.495833333333334),
 (-92.62083333333334, 20.495833333333334),
 (-92.61250000000001, 20.495833333333334),
 (-92.60416666666667, 20.495833333333334),
 (-92.59583333333335, 20.495833333333334),
 (-92.5875, 20.495833333333334),
 (-92.57916666666668, 20.495833333333334),
 (-92.57083333333334, 20.495833333333334),
 (-92.56250000000001, 20.495833333333334),
 (-92.55416666666667, 20.495833333333334),
 (-92.54583333333333, 20.495833333333334)

In [90]:
res_df = pd.DataFrame({
    'spatial_index': indices, 
    'xy': xys
})

In [91]:
res_df

Unnamed: 0,spatial_index,xy
0,"(0, 0)","(-92.73750000000001, 20.495833333333334)"
1,"(0, 1)","(-92.72916666666667, 20.495833333333334)"
2,"(0, 2)","(-92.72083333333335, 20.495833333333334)"
3,"(0, 3)","(-92.7125, 20.495833333333334)"
4,"(0, 4)","(-92.70416666666668, 20.495833333333334)"
...,...,...
5003995,"(1667, 2995)","(-67.77916666666667, 6.604166666666666)"
5003996,"(1667, 2996)","(-67.77083333333334, 6.604166666666666)"
5003997,"(1667, 2997)","(-67.7625, 6.604166666666666)"
5003998,"(1667, 2998)","(-67.75416666666668, 6.604166666666666)"


In [92]:
res_df.index

RangeIndex(start=0, stop=5004000, step=1)

## Calculate Travel Time

It will loop through each destination and calculate the travel time for each pixel

In [42]:
# for each destination get cost of travel for every origin
for idx, dest in tqdm(destinations_econ_hubs.iterrows()):
    dest_gdf = gpd.GeoDataFrame([dest], geometry='geometry', crs='EPSG:4326')
    res = ma.calculate_travel_time(inR, mcp, dest_gdf)[0]
    # The loc method locates data by label.
    # Then within the brackets, the semi-colon signifies that you want all of the rows,
    # and the index specifies the column
    # so therefore a new column is added that contains all of the travel times to each origin
    res_df.loc[:,idx] = res.flatten()

17it [00:44,  2.62s/it]


In [43]:
# filter only by the destinations_econ_hubs tt columns
od_cities = np.array(res_df[destinations_econ_hubs.index])

In [44]:
od_cities.shape

(5004000, 17)

In [45]:
od_cities

array([[2.75213641e+03, 2.72926724e+03, 6.53731730e+03, ...,
        2.35066441e+03, 3.36435019e+03, 3.38729258e+03],
       [2.75082749e+03, 2.72795833e+03, 6.53600839e+03, ...,
        2.34935550e+03, 3.36304128e+03, 3.38598367e+03],
       [2.74865269e+03, 2.72578353e+03, 6.53377988e+03, ...,
        2.34718070e+03, 3.36086648e+03, 3.38380887e+03],
       ...,
       [1.49498058e+09, 1.49498060e+09, 1.49498090e+09, ...,
        1.49498093e+09, 1.49497991e+09, 1.49497989e+09],
       [1.53640193e+09, 1.53640195e+09, 1.53640225e+09, ...,
        1.53640229e+09, 1.53640127e+09, 1.53640125e+09],
       [1.57782329e+09, 1.57782331e+09, 1.57782361e+09, ...,
        1.57782364e+09, 1.57782262e+09, 1.57782260e+09]])

In [46]:
od_cities_df = pd.DataFrame(od_cities)

In [47]:
od_cities_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,2.752136e+03,2.729267e+03,6.537317e+03,6.508016e+03,6.495039e+03,6.342083e+03,6.445641e+03,1.849834e+03,1.873097e+03,1.461351e+03,1.616666e+03,2.018199e+03,1.846459e+03,1.975641e+03,2.350664e+03,3.364350e+03,3.387293e+03
1,2.750827e+03,2.727958e+03,6.536008e+03,6.506707e+03,6.493730e+03,6.340774e+03,6.444332e+03,1.848525e+03,1.871788e+03,1.460042e+03,1.615357e+03,2.016890e+03,1.845150e+03,1.974332e+03,2.349355e+03,3.363041e+03,3.385984e+03
2,2.748653e+03,2.725784e+03,6.533780e+03,6.504479e+03,6.491501e+03,6.338546e+03,6.442103e+03,1.847216e+03,1.870479e+03,1.458733e+03,1.614048e+03,2.014715e+03,1.842975e+03,1.972157e+03,2.347181e+03,3.360866e+03,3.383809e+03
3,2.745493e+03,2.722624e+03,6.530620e+03,6.501319e+03,6.488341e+03,6.335386e+03,6.438943e+03,1.845907e+03,1.869170e+03,1.457424e+03,1.612739e+03,2.011555e+03,1.839815e+03,1.968997e+03,2.344021e+03,3.357706e+03,3.380649e+03
4,2.744184e+03,2.721315e+03,6.529311e+03,6.500010e+03,6.487032e+03,6.334077e+03,6.437635e+03,1.844598e+03,1.867861e+03,1.456115e+03,1.611430e+03,2.010246e+03,1.838506e+03,1.967688e+03,2.342712e+03,3.356398e+03,3.379340e+03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5003995,1.412138e+09,1.412138e+09,1.412138e+09,1.412138e+09,1.412138e+09,1.412138e+09,1.412138e+09,1.412139e+09,1.412139e+09,1.412139e+09,1.412139e+09,1.412139e+09,1.412139e+09,1.412139e+09,1.412138e+09,1.412137e+09,1.412137e+09
5003996,1.453559e+09,1.453559e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453560e+09,1.453559e+09,1.453559e+09
5003997,1.494981e+09,1.494981e+09,1.494981e+09,1.494981e+09,1.494981e+09,1.494981e+09,1.494981e+09,1.494981e+09,1.494981e+09,1.494982e+09,1.494982e+09,1.494982e+09,1.494982e+09,1.494981e+09,1.494981e+09,1.494980e+09,1.494980e+09
5003998,1.536402e+09,1.536402e+09,1.536402e+09,1.536402e+09,1.536402e+09,1.536402e+09,1.536402e+09,1.536403e+09,1.536403e+09,1.536403e+09,1.536403e+09,1.536403e+09,1.536403e+09,1.536403e+09,1.536402e+09,1.536401e+09,1.536401e+09


### 4. Calculate Market Access

We are hoping to calculate a market access indicator, which is defined by:

Market Access = summation of (Pi / Dij) for every origin destination pair where the destination is within 500km of the origin


where 

j – is the origin for which we are calculating it,

i – is a large city within n distance of 8 hours,

Pi – is the population of large city I,

Dij – is the travel time between large city i and origin j with an added penalty of 180 minutes per border crossing,

and the market access indicator is the sum of this ratio for all large cities within 500 km of origin j.

In [48]:
def calculate_ma(od, dWeight=[]):

    if len(dWeight) != od.shape[1]:
        dWeight = [1] * od.shape[1]
        
    allRes = []
    
    #Define the conditions
    condition_greater_than = od > (8*60)

    # Save the indices that meet the condition
    rows, cols = np.where(condition_greater_than)
    indices = list(zip(rows, cols))

    # Specify the number of rows
    num_rows = od.shape[0]

    # Copy the list for the specified number of rows
    data = np.tile(dWeight, (num_rows, 1))

    # Create a DataFrame from the copied data
    city_pop_df = pd.DataFrame(data)

    # in case the columns have to match for matrix division
    city_pop_df.columns = od.columns

    # Divide each value by its respective row sum
    od2 = city_pop_df / od

    # Perform another operation using the saved indices
    for row, col in indices:
        od2.iloc[row, col] = -1
    
    # sum up each row for all values greater than -1
    od2['ma'] = od2[od2 > -1].sum(axis=1)
    
    return(od2)

## small test

In [49]:
od = {'0': [5, 490, 15, 22],
        '1': [2, 4, 6, 43],
        '2': [500, 12, 16, 98],}

In [50]:
od = pd.DataFrame(od)

In [51]:
od

Unnamed: 0,0,1,2
0,5,2,500
1,490,4,12
2,15,6,16
3,22,43,98


In [52]:
weight = [2,4,1]

In [53]:
t_r1 = calculate_ma(od,weight)

In [54]:
t_r1

Unnamed: 0,0,1,2,ma
0,0.4,2.0,-1.0,2.4
1,-1.0,1.0,0.083333,1.083333
2,0.133333,0.666667,0.0625,0.8625
3,0.090909,0.093023,0.010204,0.194136


## Calculate Market Access

In [56]:
access_cities_df = calculate_ma(od_cities_df, dWeight = list(destinations_econ_hubs['population']))

In [57]:
access_cities_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,ma
0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
2,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
3,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
4,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5003995,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
5003996,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
5003997,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
5003998,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0


In [59]:
access_cities_df.shape

(5004000, 18)

In [60]:
access_cities_df.index = res_df.index

In [62]:
access_cities_df['xy'] = res_df['xy']

In [63]:
geoms = [Point(xy) for xy in access_cities_df.xy]

In [64]:
access_cities_results = gpd.GeoDataFrame(access_cities_df, geometry=geoms, crs=destinations_econ_hubs.crs)

In [93]:
access_cities_results

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,11,12,13,14,15,16,ma,max_ma,xy,geometry
0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,"(-92.73750000000001, 20.495833333333334)",POINT (-92.73750 20.49583)
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,"(-92.72916666666667, 20.495833333333334)",POINT (-92.72917 20.49583)
2,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,"(-92.72083333333335, 20.495833333333334)",POINT (-92.72083 20.49583)
3,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,"(-92.7125, 20.495833333333334)",POINT (-92.71250 20.49583)
4,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,"(-92.70416666666668, 20.495833333333334)",POINT (-92.70417 20.49583)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5003995,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,"(-67.77916666666667, 6.604166666666666)",POINT (-67.77917 6.60417)
5003996,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,"(-67.77083333333334, 6.604166666666666)",POINT (-67.77083 6.60417)
5003997,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,"(-67.7625, 6.604166666666666)",POINT (-67.76250 6.60417)
5003998,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,"(-67.75416666666668, 6.604166666666666)",POINT (-67.75417 6.60417)


### 5. Save results in raster format

In [67]:
sys.path.append(r"/Users/tomgertin/repos/INFRA_SAP")
#import infrasap.market_access as ma
from infrasap import aggregator
#importlib.reload(ma)

In [68]:
output_path = os.path.join("", "output")
if not os.path.exists(output_path):
    os.mkdir(output_path)

In [94]:
aggregator.rasterize_gdf(access_cities_results, f'ma', template = global_friction_clipped, outFile = os.path.join(output_path, f"cen_america_ma.tif"))
