# Testing more efficient way to run calculate_gravity

### Inputs:
    - origins: urban clusters
    - destinations: economic hubs
    
Friction layer is the 2020 driving raster Global Friction Surface from the Malaria Access Project (https://malariaatlas.org/project-resources/accessibility-to-healthcare/)

## As of May 2023, use a python 3.8 environment, so that pandana can be installed

In [1]:
import sys, os, importlib

import rasterio as rio

import numpy as np
import pandas as pd
import geopandas as gpd
import osmnx as ox

import skimage.graph as graph

from rasterio.mask import mask
from rasterio import features

from shapely.geometry import box, Point, Polygon
from scipy.ndimage import generic_filter
#from pandana.loaders import osm

sys.path.append("../")



In [2]:
from tqdm import tqdm

In [3]:
sys.path.append(r"/Users/tomgertin/repos/GOSTnets")
import GOSTnets as gn

In [4]:
sys.path.append(r"/Users/tomgertin/repos/gostrocks/src")
import GOSTRocks.rasterMisc as rMisc
importlib.reload(rMisc)

<module 'GOSTRocks.rasterMisc' from '/Users/tomgertin/repos/gostrocks/src/GOSTRocks/rasterMisc.py'>

In [5]:
sys.path.append(r"/Users/tomgertin/repos/GOSTNets_Raster/src")
import GOSTNetsRaster.market_access as ma
importlib.reload(ma)

<module 'GOSTNetsRaster.market_access' from '/Users/tomgertin/repos/GOSTNets_Raster/src/GOSTNetsRaster/market_access.py'>

## Load data

In [6]:
global_friction = r"/Users/tomgertin/data/2020_motorized_friction_surface.geotiff"

In [7]:
destinations_econ_hubs = r"inputs/large_urban_centers_cen_america_subset.shp"
destinations_econ_hubs = gpd.read_file(destinations_econ_hubs)

In [8]:
# You need the bounding box to be a min bounding box, not the actual shape or else the results are weird 
admin = r"inputs/central_america_min_bounding_box_small.shp"

In [9]:
out_folder = r"outputs"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)

In [10]:
#global_friction_clipped = r"/Users/tomgertin/data/2020_motorized_friction_cen_america.geotiff"
global_friction_clipped = r"/Users/tomgertin/data/central_america_min_bounding_box_small.geotiff"

In [11]:
#rMisc.clipRaster?

In [12]:
#clip global friction surface to admin area
if not os.path.exists(global_friction_clipped):
    rMisc.clipRaster(rio.open(global_friction), gpd.read_file(admin), global_friction_clipped)

In [13]:
# open friction surface
inR = rio.open(global_friction_clipped)

## create travel time surface

The actual units within the friction surface raster are minutes required to travel one meter. Therefore multiple by 1000 to get an approximate time in minutes it takes to cross a pixel, because 30-arcsec resolution pixel is approx. 1km by 1km at the equator.

At the equator, an arc-second of longitude approximately equals 30.87 meters. Arc-seconds of longitude decrease in a trigonometric cosine-based fashion as one moves toward the earth's poles (https://www.esri.com/news/arcuser/0400/wdside.html).

In [14]:
# import math

# # COS of 23.5 degrees * length of arc-sec at equator * 30-arcsec
# math.cos(math.radians(23.5)) * 30.87 * 30

#frictionD = inR.read()[0,:,:] * 849
frictionD = inR.read()[0,:,:]
# convert friction surface to traversal time (lazily). Original data are
#    the original data are minutes to travel 1 m, so we will convert to 
#    minutes to cross the cell
frictionD = frictionD * 1000

# Correct no data values
frictionD[frictionD < 0] = 99999999

mcp = graph.MCP_Geometric(frictionD)

# type(frictionD)

# inR.crs

In [15]:
type(inR)

rasterio.io.DatasetReader

In [16]:
frictionD.shape

(643, 1130)

In [17]:
indices = list(np.ndindex(frictionD.shape))

In [18]:
#indices

In [19]:
xys = [inR.xy(ind[0], ind[1]) for ind in indices]

In [20]:
xys

[(-92.49583333333334, 18.04583333333333),
 (-92.4875, 18.04583333333333),
 (-92.47916666666667, 18.04583333333333),
 (-92.47083333333333, 18.04583333333333),
 (-92.4625, 18.04583333333333),
 (-92.45416666666667, 18.04583333333333),
 (-92.44583333333334, 18.04583333333333),
 (-92.4375, 18.04583333333333),
 (-92.42916666666666, 18.04583333333333),
 (-92.42083333333333, 18.04583333333333),
 (-92.4125, 18.04583333333333),
 (-92.40416666666667, 18.04583333333333),
 (-92.39583333333333, 18.04583333333333),
 (-92.3875, 18.04583333333333),
 (-92.37916666666666, 18.04583333333333),
 (-92.37083333333334, 18.04583333333333),
 (-92.3625, 18.04583333333333),
 (-92.35416666666667, 18.04583333333333),
 (-92.34583333333333, 18.04583333333333),
 (-92.3375, 18.04583333333333),
 (-92.32916666666667, 18.04583333333333),
 (-92.32083333333334, 18.04583333333333),
 (-92.3125, 18.04583333333333),
 (-92.30416666666666, 18.04583333333333),
 (-92.29583333333333, 18.04583333333333),
 (-92.2875, 18.04583333333333)

In [21]:
res_df = pd.DataFrame({
    'spatial_index': indices, 
    'xy': xys
})

In [22]:
res_df

Unnamed: 0,spatial_index,xy
0,"(0, 0)","(-92.49583333333334, 18.04583333333333)"
1,"(0, 1)","(-92.4875, 18.04583333333333)"
2,"(0, 2)","(-92.47916666666667, 18.04583333333333)"
3,"(0, 3)","(-92.47083333333333, 18.04583333333333)"
4,"(0, 4)","(-92.4625, 18.04583333333333)"
...,...,...
726585,"(642, 1125)","(-83.12083333333334, 12.69583333333333)"
726586,"(642, 1126)","(-83.1125, 12.69583333333333)"
726587,"(642, 1127)","(-83.10416666666667, 12.69583333333333)"
726588,"(642, 1128)","(-83.09583333333333, 12.69583333333333)"


In [23]:
res_df.index

RangeIndex(start=0, stop=726590, step=1)

## Calculate Travel Time

It will loop through each destination and calculate the travel time for each pixel

In [24]:
# for each destination get cost of travel for every origin
for idx, dest in tqdm(destinations_econ_hubs.iterrows()):
    dest_gdf = gpd.GeoDataFrame([dest], geometry='geometry', crs='EPSG:4326')
    res = ma.calculate_travel_time(inR, mcp, dest_gdf)[0]
    # The loc method locates data by label.
    # Then within the brackets, the semi-colon signifies that you want all of the rows,
    # and the index specifies the column
    # so therefore a new column is added that contains all of the travel times to each origin
    res_df.loc[:,idx] = res.flatten()

7it [00:02,  2.73it/s]


In [25]:
# filter only by the destinations_econ_hubs tt columns
od_cities = np.array(res_df[destinations_econ_hubs.index])

In [26]:
od_cities.shape

(726590, 7)

In [27]:
od_cities

array([[1.04213809e+03, 1.06540129e+03, 6.53655526e+02, ...,
        1.21050356e+03, 1.03876355e+03, 1.16794510e+03],
       [1.01813809e+03, 1.04140129e+03, 6.29655526e+02, ...,
        1.18650356e+03, 1.01476355e+03, 1.14394510e+03],
       [1.00538809e+03, 1.02865129e+03, 6.16905526e+02, ...,
        1.17375356e+03, 1.00201355e+03, 1.13119510e+03],
       ...,
       [5.00014404e+07, 5.00014093e+07, 5.00018082e+07, ...,
        5.00015682e+07, 5.00014438e+07, 5.00012025e+07],
       [5.00014417e+07, 5.00014106e+07, 5.00018095e+07, ...,
        5.00015695e+07, 5.00014451e+07, 5.00012038e+07],
       [7.07121205e+07, 7.07120893e+07, 7.07124882e+07, ...,
        7.07122482e+07, 7.07121239e+07, 7.07118826e+07]])

In [78]:
# generate list of random weights
od_cities_df.shape[0]

726590

In [80]:
import random
oWeight_list = []
random_list = []
for _ in range(od_cities_df.shape[0]):
    random_number = random.randint(0, 10000)
    random_list.append(random_number)

In [81]:
random_list

[1746,
 615,
 6191,
 4019,
 6077,
 7750,
 3136,
 2267,
 9009,
 7091,
 1977,
 1355,
 5375,
 1854,
 2559,
 5577,
 1949,
 5826,
 7737,
 1142,
 1678,
 7409,
 7421,
 5794,
 5692,
 7216,
 2483,
 9270,
 9678,
 3385,
 6840,
 923,
 5315,
 7676,
 2717,
 6027,
 177,
 3980,
 6927,
 6208,
 3883,
 2974,
 3099,
 612,
 1467,
 5572,
 5066,
 8560,
 9943,
 4486,
 241,
 5473,
 1066,
 8553,
 2618,
 5108,
 6701,
 7171,
 2913,
 6162,
 9867,
 7885,
 9347,
 5425,
 4085,
 3420,
 112,
 3105,
 1801,
 4224,
 8176,
 2877,
 9909,
 131,
 8132,
 8087,
 5863,
 3062,
 5641,
 2352,
 1001,
 6101,
 4708,
 2233,
 790,
 3667,
 3584,
 162,
 7428,
 2788,
 4996,
 6511,
 9502,
 5224,
 3326,
 7646,
 3430,
 4637,
 9310,
 9611,
 7687,
 6629,
 2539,
 9595,
 59,
 9029,
 2940,
 5685,
 5375,
 6330,
 5911,
 4037,
 9305,
 5156,
 6765,
 2094,
 8975,
 7388,
 5535,
 5351,
 7328,
 743,
 893,
 3307,
 5160,
 7599,
 9849,
 9273,
 9521,
 4649,
 2109,
 4875,
 9407,
 2137,
 2481,
 603,
 2110,
 8857,
 1910,
 4454,
 7420,
 6485,
 7946,
 4427,
 9485,

## Test 1, use original GOSTnets.calculate_od_raw function

In [1]:
# for reference here is the existing code

# if len(oWeight) != od.shape[0]:
#         oWeight = [1] * od.shape[0]
#     if len(dWeight) != od.shape[1]:
#         dWeight = [1] * od.shape[1]
#     allRes = []
#     for dist_decay in decayVals:
#         outOD = od * 0
#         decayFunction = lambda x: np.exp(-1 * dist_decay * x)
#         for row in range(0, od.shape[0]):
#             curRow = od[row,:]
#             decayedRow = decayFunction(curRow)
#             weightedRow = decayedRow * oWeight[row] * dWeight
#             outOD[row,:] = weightedRow
#         summedVals = np.sum(outOD, axis=1)
#         allRes.append(summedVals)
#     res = pd.DataFrame(allRes).transpose()
#     res.columns = columns=['d_%s' % d for d in decayVals]
#     return(res)

In [30]:
import GOSTnets.calculate_od_raw as calcOD

In [82]:
%%time
access_cities = calcOD.calculate_gravity(od_cities, oWeight = random_list, dWeight = list(destinations_econ_hubs['population']))

CPU times: user 44.5 s, sys: 344 ms, total: 44.9 s
Wall time: 44.9 s


In [83]:
access_cities

Unnamed: 0,d_0.01,d_0.005,d_0.001,d_0.0007701635,d_0.0003850818,d_0.0001925409,d_9.62704e-05,d_3.85082e-05,d_1e-05
0,2.064355e+06,1.248483e+08,4.612576e+09,5.739153e+09,8.299895e+09,9.994336e+09,1.097075e+10,1.160301e+10,1.192870e+10
1,9.243703e+05,4.958255e+07,1.664169e+09,2.059236e+09,2.950647e+09,3.536647e+09,3.873205e+09,4.090749e+09,4.202699e+09
2,1.057071e+07,5.319868e+08,1.696760e+10,2.093420e+10,2.984937e+10,3.568975e+10,3.903815e+10,4.120043e+10,4.231256e+10
3,6.952580e+06,3.476164e+08,1.102925e+10,1.360352e+10,1.938703e+10,2.317449e+10,2.534552e+10,2.674736e+10,2.746833e+10
4,1.057829e+07,5.272550e+08,1.668734e+10,2.057929e+10,2.932152e+10,3.504559e+10,3.832643e+10,4.044478e+10,4.153423e+10
...,...,...,...,...,...,...,...,...,...
726585,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.065199e-209
726586,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,2.335176e-207
726587,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,4.344930e-207
726588,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.310852e-209


## Test 1, use custom GOSTnets.calculate_od_raw function

### build custom calculate_gravity function, use whole matrix operations vs looping through rows

In [74]:
def custom_calculate_gravity(od, oWeight=[], dWeight=[], decayVals=[0.01,
                                                        0.005,
                                                        0.001,
                                                        0.0007701635,   # Market access halves every 15 mins
                                                        0.0003850818,   # Market access halves every 30 mins
                                                        0.0001925409,   # Market access halves every 60 mins
                                                        0.0000962704,   # Market access halves every 120 mins
                                                        0.0000385082,   # Market access halves every 300 mins
                                                        0.00001]):
    

    if len(oWeight) != od.shape[0]:
        oWeight = [1] * od.shape[0]
    if len(dWeight) != od.shape[1]:
        dWeight = [1] * od.shape[1]
    allRes = []
    
    od_df = pd.DataFrame(od)
    
    
    for dist_decay in decayVals:
        decayFunction = lambda x: np.exp(-1 * dist_decay * x)
        
        summedVals = np.sum(decayFunction(od_df) * dWeight, axis=1) * oWeight
        
        allRes.append(summedVals)
        
    res = pd.DataFrame(allRes).transpose()
    res.columns = columns=['d_%s' % d for d in decayVals]
    
    return(res)
    

In [75]:
#access_cities_df = calculate_ma(od_cities_df, dWeight = list(destinations_econ_hubs['population']))

In [84]:
%%time
access_cities2 = custom_calculate_gravity(od_cities_df, oWeight = random_list, dWeight = list(destinations_econ_hubs['population']))


CPU times: user 3.52 s, sys: 82.4 ms, total: 3.6 s
Wall time: 3.51 s


In [85]:
access_cities2

Unnamed: 0,d_0.01,d_0.005,d_0.001,d_0.0007701635,d_0.0003850818,d_0.0001925409,d_9.62704e-05,d_3.85082e-05,d_1e-05
0,2.064355e+06,1.248483e+08,4.612576e+09,5.739153e+09,8.299895e+09,9.994336e+09,1.097075e+10,1.160301e+10,1.192870e+10
1,9.243703e+05,4.958255e+07,1.664169e+09,2.059236e+09,2.950647e+09,3.536647e+09,3.873205e+09,4.090749e+09,4.202699e+09
2,1.057071e+07,5.319868e+08,1.696760e+10,2.093420e+10,2.984937e+10,3.568975e+10,3.903815e+10,4.120043e+10,4.231256e+10
3,6.952580e+06,3.476164e+08,1.102925e+10,1.360352e+10,1.938703e+10,2.317449e+10,2.534552e+10,2.674736e+10,2.746833e+10
4,1.057829e+07,5.272550e+08,1.668734e+10,2.057929e+10,2.932152e+10,3.504559e+10,3.832643e+10,4.044478e+10,4.153423e+10
...,...,...,...,...,...,...,...,...,...
726585,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,1.065199e-209
726586,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,2.335176e-207
726587,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,4.344930e-207
726588,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.310852e-209
