## Accessibility Analysis for Bangladesh using GOSTnets Raster
Use GOSTnets raster and GOSTnets to calculate Market Access using a gravity model.

### Inputs:
    - origins: population grid
    - destinations: Ports
    
Friction layer is the Global Friction Surface 2019 from the Malaria Access Project (https://malariaatlas.org/research-project/accessibility-to-healthcare/)

In [1]:
import sys, os, importlib
import rasterio

import numpy as np
import pandas as pd
import geopandas as gpd
import osmnx as ox
sys.path.append(r"C:\repos\GOSTnets")
import GOSTnets as gn
import skimage.graph as graph

from rasterio.mask import mask
from rasterio import features

from shapely.geometry import box, Point, Polygon
from scipy.ndimage import generic_filter
from pandana.loaders import osm

sys.path.append("../")

# sys.path.append(r"C:\repos\INFRA_SAP")
# import infrasap.market_access as ma
# importlib.reload(ma)



In [2]:
sys.path.append(r"C:\repos\gostrocks\src")
import GOSTRocks.rasterMisc as rMisc
importlib.reload(rMisc)

<module 'GOSTRocks.rasterMisc' from 'C:\\repos\\gostrocks\\src\\GOSTRocks\\rasterMisc.py'>

In [3]:
sys.path.append(r"C:\repos\GOSTNets_Raster\src")
import GOSTNetsRaster.market_access as ma
importlib.reload(ma)

<module 'GOSTNetsRaster.market_access' from 'C:\\repos\\GOSTNets_Raster\\src\\GOSTNetsRaster\\market_access.py'>

In [4]:
global_friction = r"D:\data\global_friction_surface\2020_motorized_friction_surface.geotiff"

In [5]:
# WorldPop
origins_file = r"inputs\bgd_pd_2020_1km.tif"

In [6]:
admin = r"inputs\BGD_corridor1_and_2_min_dissolved2.shp"

In [7]:
# You need the bounding box to be a min bounding box, not the actual shape or else the results are weird 
admin_bound = r"inputs\BGD_corridor1_and_2_min_bounds.shp"

In [8]:
out_folder = r"outputs"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)

In [9]:
friction_file = os.path.join(out_folder, "friction_surface3.tif")

In [10]:
#clip global friction surface to admin area bounding box
if not os.path.exists(friction_file):
    rMisc.clipRaster(rasterio.open(global_friction), gpd.read_file(admin_bound), friction_file)

In [11]:
travel_costs_output_raster = os.path.join(out_folder, "BGD_ma_least_cost_travel_time_ports.tif")

## 1. Standardize or Co-register population file to match the friction surface raster

In [12]:
# create friction surface
inR = rasterio.open(friction_file)

In [13]:
inPop = rasterio.open(origins_file)
# Make sure that both rasters have the exact same resolution, crs, and number of pixels
out_pop_surface_std = os.path.join(out_folder, "BGD_pd_2020_1km_STD_MA.tif")
if not os.path.exists(out_pop_surface_std):
    rMisc.standardizeInputRasters(inPop, inR, out_pop_surface_std, data_type="C")

## 2. Import urban extent centroids

In [14]:
from rasterstats import zonal_stats

In [15]:
admin4_polygons = r"inputs\BGD_admin4_overlapping_corridors1_and_2.shp"
admin4_polygons = gpd.read_file(admin4_polygons)

In [16]:
admin4_polygons[:2]

Unnamed: 0,ID_0,ISO,NAME_0,ID_1,NAME_1,ID_2,NAME_2,ID_3,NAME_3,ID_4,NAME_4,VARNAME_4,TYPE_4,ENGTYPE_4,VALIDFR_4,VALIDTO_4,REMARKS_4,Shape_Leng,Shape_Area,geometry
0,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,8886,Nawabganj Dh,Dhaka,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.881763,0.025416,"POLYGON ((90.15996 23.74899, 90.16160 23.74695..."
1,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,8887,Savar,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,1.033487,0.030875,"POLYGON ((90.34810 24.03533, 90.34845 24.03257..."


In [17]:
#read in the co-registered population surface file
popR = rasterio.open(out_pop_surface_std)
popD = popR.read()

In [18]:
# code to replace all negative values with 0
popD[popD<0] = 0

In [19]:
# save pop raster too for later
out_meta = popR.meta.copy()
Pop_raster = os.path.join(out_folder, "Pop_raster.tif")
with rasterio.open(Pop_raster, "w", **out_meta) as dest:
    dest.write(popD.astype(out_meta['dtype']))

In [20]:
# summarize the population sum per admin area
statsPop = zonal_stats(admin4_polygons, Pop_raster, stats=['sum'])

In [21]:
# join the population sum to each admin area
admin4_centroids_w_PopSUM = admin4_polygons.join(pd.DataFrame(statsPop))

In [22]:
admin4_centroids_w_PopSUM.rename(columns={'sum':'popsum'}, inplace=True)

In [23]:
admin4_centroids_w_PopSUM[:3]

Unnamed: 0,ID_0,ISO,NAME_0,ID_1,NAME_1,ID_2,NAME_2,ID_3,NAME_3,ID_4,...,VARNAME_4,TYPE_4,ENGTYPE_4,VALIDFR_4,VALIDTO_4,REMARKS_4,Shape_Leng,Shape_Area,geometry,popsum
0,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,8886,...,Dhaka,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.881763,0.025416,"POLYGON ((90.15996 23.74899, 90.16160 23.74695...",539776.5
1,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,8887,...,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,1.033487,0.030875,"POLYGON ((90.34810 24.03533, 90.34845 24.03257...",2428921.0
2,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,8888,...,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.768723,0.026482,"POLYGON ((90.46078 23.88508, 90.46143 23.87958...",15358978.0


In [24]:
admin4_centroids_w_PopSUM.set_index("ID_4", inplace=True)

In [25]:
#replace polygon geometry with centroid geometry
admin4_centroids_w_PopSUM.geometry = admin4_centroids_w_PopSUM.representative_point()

In [26]:
admin4_centroids_w_PopSUM[:3]

Unnamed: 0_level_0,ID_0,ISO,NAME_0,ID_1,NAME_1,ID_2,NAME_2,ID_3,NAME_3,NAME_4,VARNAME_4,TYPE_4,ENGTYPE_4,VALIDFR_4,VALIDTO_4,REMARKS_4,Shape_Leng,Shape_Area,geometry,popsum
ID_4,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
8886,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,Nawabganj Dh,Dhaka,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.881763,0.025416,POINT (90.14986 23.65921),539776.5
8887,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,Savar,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,1.033487,0.030875,POINT (90.29510 23.89213),2428921.0
8888,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,Tejgaon,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.768723,0.026482,POINT (90.40635 23.78716),15358978.0


In [27]:
admin4_centroids_w_PopSUM

Unnamed: 0_level_0,ID_0,ISO,NAME_0,ID_1,NAME_1,ID_2,NAME_2,ID_3,NAME_3,NAME_4,VARNAME_4,TYPE_4,ENGTYPE_4,VALIDFR_4,VALIDTO_4,REMARKS_4,Shape_Leng,Shape_Area,geometry,popsum
ID_4,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
8886,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,Nawabganj Dh,Dhaka,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.881763,0.025416,POINT (90.14986 23.65921),5.397765e+05
8887,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,Savar,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,1.033487,0.030875,POINT (90.29510 23.89213),2.428921e+06
8888,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,Tejgaon,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.768723,0.026482,POINT (90.40635 23.78716),1.535898e+07
8889,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3642,Gazipur,Gazipur S.,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,May be the capital city of zila,1.045429,0.030622,POINT (90.39520 24.02962),3.671253e+06
8890,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3642,Gazipur,Kaliakair,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.903742,0.027321,POINT (90.28579 24.12798),9.753927e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9044,23,BGD,Bangladesh,328,Khulna,3110,Khulna,3666,Shatkhira,Kalaroa,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.948097,0.021863,POINT (89.03846 22.87506),3.152081e+05
9047,23,BGD,Bangladesh,328,Khulna,3110,Khulna,3666,Shatkhira,Shyamnagar,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,10.511822,0.134612,POINT (89.11709 22.24333),4.098161e+05
9048,23,BGD,Bangladesh,328,Khulna,3110,Khulna,3666,Shatkhira,Tala,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,1.557520,0.029202,POINT (89.21982 22.73178),3.974542e+05
9190,23,BGD,Bangladesh,330,Sylhet,3117,Sylhet,3686,Hobiganj,Lakhai,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.721331,0.016889,POINT (91.25148 24.30097),2.246393e+05


## 3. Prepare origins (population grid)
Convert from raster format to csv/geopandas data frame

In [28]:
pop_surf = rasterio.open(out_pop_surface_std)
# You can get a Numpy masked array that covers up nodata values from Rasterio by adding a keyword argument: src.read(1, masked=True)
# https://gis.stackexchange.com/questions/224043/excluding-nodata-value-in-band-calculation-with-rasterio
pop = pop_surf.read(1, masked=True)

In [29]:
indices = list(np.ndindex(pop.shape))

In [30]:
indices

[(0, 0),
 (0, 1),
 (0, 2),
 (0, 3),
 (0, 4),
 (0, 5),
 (0, 6),
 (0, 7),
 (0, 8),
 (0, 9),
 (0, 10),
 (0, 11),
 (0, 12),
 (0, 13),
 (0, 14),
 (0, 15),
 (0, 16),
 (0, 17),
 (0, 18),
 (0, 19),
 (0, 20),
 (0, 21),
 (0, 22),
 (0, 23),
 (0, 24),
 (0, 25),
 (0, 26),
 (0, 27),
 (0, 28),
 (0, 29),
 (0, 30),
 (0, 31),
 (0, 32),
 (0, 33),
 (0, 34),
 (0, 35),
 (0, 36),
 (0, 37),
 (0, 38),
 (0, 39),
 (0, 40),
 (0, 41),
 (0, 42),
 (0, 43),
 (0, 44),
 (0, 45),
 (0, 46),
 (0, 47),
 (0, 48),
 (0, 49),
 (0, 50),
 (0, 51),
 (0, 52),
 (0, 53),
 (0, 54),
 (0, 55),
 (0, 56),
 (0, 57),
 (0, 58),
 (0, 59),
 (0, 60),
 (0, 61),
 (0, 62),
 (0, 63),
 (0, 64),
 (0, 65),
 (0, 66),
 (0, 67),
 (0, 68),
 (0, 69),
 (0, 70),
 (0, 71),
 (0, 72),
 (0, 73),
 (0, 74),
 (0, 75),
 (0, 76),
 (0, 77),
 (0, 78),
 (0, 79),
 (0, 80),
 (0, 81),
 (0, 82),
 (0, 83),
 (0, 84),
 (0, 85),
 (0, 86),
 (0, 87),
 (0, 88),
 (0, 89),
 (0, 90),
 (0, 91),
 (0, 92),
 (0, 93),
 (0, 94),
 (0, 95),
 (0, 96),
 (0, 97),
 (0, 98),
 (0, 99),
 (0, 100),

In [31]:
xys = [pop_surf.xy(ind[0], ind[1]) for ind in indices]

In [32]:
res_df = pd.DataFrame({
    'spatial_index': indices, 
    'xy': xys, 
    'pop': pop.flatten()
})

In [33]:
res_df

Unnamed: 0,spatial_index,xy,pop
0,"(0, 0)","(88.69583333333333, 24.42916666666667)",907.638000
1,"(0, 1)","(88.70416666666667, 24.42916666666667)",930.021179
2,"(0, 2)","(88.71249999999999, 24.42916666666667)",1120.448486
3,"(0, 3)","(88.72083333333333, 24.42916666666667)",1113.935303
4,"(0, 4)","(88.72916666666666, 24.42916666666667)",1093.856567
...,...,...,...
198907,"(443, 443)","(92.38749999999999, 20.737500000000004)",
198908,"(443, 444)","(92.39583333333333, 20.737500000000004)",
198909,"(443, 445)","(92.40416666666665, 20.737500000000004)",
198910,"(443, 446)","(92.4125, 20.737500000000004)",


## 4. Create friction surface

The actual units within the friction surface raster are minutes required to travel one meter. Therefore multiple by 1000 to get an approximate time in minutes it takes to cross a pixel, because 30-arcsec resolution pixel is approx. 1km by 1km at the equator. However, Bangladesh is about 23.5 degrees north of the equator. 

At the equator, an arc-second of longitude approximately equals 30.87 meters. Arc-seconds of longitude decrease in a trigonometric cosine-based fashion as one moves toward the earth's poles (https://www.esri.com/news/arcuser/0400/wdside.html). 

In [3]:
import math

In [9]:
# COS of 23.5 degrees * length of arc-sec at equator * 30-arcsec
math.cos(math.radians(23.5)) * 30.87 * 30

849.2893348880634

In [34]:
frictionD = inR.read()[0,:,:] * 849

In [35]:
type(frictionD)

numpy.ndarray

In [36]:
inR.crs

CRS.from_epsg(4326)

In [37]:
# Correct no data values
frictionD[frictionD < 0] = 99999999

In [38]:
mcp = graph.MCP_Geometric(frictionD)

## 5. Calculate Travel Time
We are building the OD-matrix. To do this we are looping through the admin centroids, these will be the destinations; and each loop will find the travel time from all origins. All the loops will build the total OD-matrix.

In [39]:
### intermediary test

In [40]:
#travel_costs_output_raster = os.path.join(out_folder, "BGD_corridor1_and_2_least_cost_test1.tif")

In [41]:
#adm4_centroids = r"inputs\BGD_admin4_overlapping_corridors1_and_2_centroids.shp"
#adm4_centroids = gpd.read_file(adm4_centroids)

In [42]:
#travel_costs, traceback = ma.calculate_travel_time(inR, mcp, adm4_centroids, out_raster=travel_costs_output_raster)

In [43]:
# for each destination get cost of travel for every origin
for idx, row in admin4_centroids_w_PopSUM.iterrows():
    print(idx)
    #print(dest)
    dest_gdf = gpd.GeoDataFrame([row], geometry='geometry', crs='EPSG:4326')
    res = ma.calculate_travel_time(inR, mcp, dest_gdf)[0]
    res_df.loc[:,idx] = res.flatten()

8886
8887
8888
8889
8890
8891
8896
8897
8898
8900
8926
8927
8928
8929
8930
8931
8932
8757
8758
8759
8760
8761
8762
8763
8764
8765
8766
8767
8768
8769
8770
8771
8772
8773
8774
8775
8776
8777
8785
8787
8788
8789
8793
8794
8796
8797
8798
8802
8803
8804
8805
8806
8807
8808
8809
8810
8811
8812
8813
8814
8815
8816
8817
8818
8819
8820
8821
8822
8823
8824
8825
8826
8827
8828
8829
8830
8831
8833
8835
8836
8837
8838
8839
8840
8841
8842
8843
8844
8845
8846
8847
8848
8849
8852
8854
8858
8859
8860
8861
8862
8864
8867
8868
8869
8872
8877
8878
8881
8883
8884
8885
8901
8902
8903
8904
8905
8906
8907
8908
8909
8910
8911
8914
8915
8916
8918
8919
8920
8921
8922
8923
8924
8925
8933
8934
8935
8936
8938
8939
8940
8941
8942
8943
8956
8957
8958
8998
8999
9002
9003
9004
9005
9006
9007
9008
9009
9012
9013
9014
9016
9017
9018
9019
9020
9021
9022
9023
9024
9025
9026
9027
9028
9029
9030
9031
9032
9033
9034
9035
9036
9037
9038
9039
9040
9041
9042
9044
9047
9048
9190
9191


In [44]:
res.shape

(444, 448)

In [45]:
444*448

198912

In [46]:
res_df

Unnamed: 0,spatial_index,xy,pop,8886,8887,8888,8889,8890,8891,8896,...,9038,9039,9040,9041,9042,9044,9047,9048,9190,9191
0,"(0, 0)","(88.69583333333333, 24.42916666666667)",907.638000,7.071100e+07,7.071095e+07,7.071097e+07,7.071095e+07,7.071094e+07,7.071098e+07,7.071104e+07,...,7.071095e+07,7.071099e+07,7.071100e+07,7.071141e+07,7.071104e+07,7.071097e+07,7.071107e+07,7.071100e+07,7.071112e+07,7.071111e+07
1,"(0, 1)","(88.70416666666667, 24.42916666666667)",930.021179,5.000032e+07,5.000027e+07,5.000029e+07,5.000027e+07,5.000026e+07,5.000030e+07,5.000035e+07,...,5.000027e+07,5.000031e+07,5.000032e+07,5.000073e+07,5.000036e+07,5.000029e+07,5.000039e+07,5.000031e+07,5.000044e+07,5.000042e+07
2,"(0, 2)","(88.71249999999999, 24.42916666666667)",1120.448486,5.000032e+07,5.000026e+07,5.000028e+07,5.000027e+07,5.000025e+07,5.000030e+07,5.000035e+07,...,5.000027e+07,5.000030e+07,5.000032e+07,5.000073e+07,5.000036e+07,5.000029e+07,5.000039e+07,5.000031e+07,5.000044e+07,5.000042e+07
3,"(0, 3)","(88.72083333333333, 24.42916666666667)",1113.935303,5.000030e+07,5.000024e+07,5.000026e+07,5.000025e+07,5.000023e+07,5.000028e+07,5.000033e+07,...,5.000025e+07,5.000028e+07,5.000029e+07,5.000071e+07,5.000034e+07,5.000026e+07,5.000037e+07,5.000029e+07,5.000042e+07,5.000040e+07
4,"(0, 4)","(88.72916666666666, 24.42916666666667)",1093.856567,5.000030e+07,5.000025e+07,5.000026e+07,5.000025e+07,5.000023e+07,5.000028e+07,5.000033e+07,...,5.000025e+07,5.000029e+07,5.000030e+07,5.000071e+07,5.000034e+07,5.000027e+07,5.000037e+07,5.000029e+07,5.000042e+07,5.000040e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198907,"(443, 443)","(92.38749999999999, 20.737500000000004)",,5.000062e+07,5.000061e+07,5.000058e+07,5.000062e+07,5.000064e+07,5.000061e+07,5.000074e+07,...,5.000076e+07,5.000074e+07,5.000073e+07,5.000101e+07,5.000082e+07,5.000082e+07,5.000086e+07,5.000080e+07,5.000063e+07,5.000062e+07
198908,"(443, 444)","(92.39583333333333, 20.737500000000004)",,5.000063e+07,5.000062e+07,5.000059e+07,5.000063e+07,5.000065e+07,5.000062e+07,5.000075e+07,...,5.000077e+07,5.000075e+07,5.000074e+07,5.000102e+07,5.000083e+07,5.000083e+07,5.000087e+07,5.000081e+07,5.000064e+07,5.000063e+07
198909,"(443, 445)","(92.40416666666665, 20.737500000000004)",,5.000063e+07,5.000062e+07,5.000059e+07,5.000062e+07,5.000065e+07,5.000062e+07,5.000075e+07,...,5.000077e+07,5.000075e+07,5.000073e+07,5.000102e+07,5.000083e+07,5.000083e+07,5.000087e+07,5.000081e+07,5.000064e+07,5.000063e+07
198910,"(443, 446)","(92.4125, 20.737500000000004)",,5.000062e+07,5.000061e+07,5.000058e+07,5.000062e+07,5.000064e+07,5.000061e+07,5.000074e+07,...,5.000076e+07,5.000074e+07,5.000072e+07,5.000101e+07,5.000082e+07,5.000082e+07,5.000086e+07,5.000080e+07,5.000063e+07,5.000062e+07


In [47]:
# remove values where pop is 0 or nan
res_df = res_df.loc[res_df['pop']!=0].copy()
res_df = res_df.loc[~(res_df['pop'].isna())].copy()

In [48]:
# only extract the point (pixels) that are the centroids
od_cities = np.array(res_df[admin4_centroids_w_PopSUM.index])

In [49]:
od_cities

array([[7.07110043e+07, 7.07109488e+07, 7.07109683e+07, ...,
        7.07109956e+07, 7.07111209e+07, 7.07111074e+07],
       [5.00003216e+07, 5.00002661e+07, 5.00002856e+07, ...,
        5.00003129e+07, 5.00004382e+07, 5.00004247e+07],
       [5.00003204e+07, 5.00002649e+07, 5.00002844e+07, ...,
        5.00003118e+07, 5.00004371e+07, 5.00004235e+07],
       ...,
       [6.11219444e+02, 5.97615849e+02, 5.72699108e+02, ...,
        7.90437267e+02, 6.20994389e+02, 6.06893333e+02],
       [6.12357193e+02, 5.98753598e+02, 5.73836858e+02, ...,
        7.91575017e+02, 6.22132138e+02, 6.08031082e+02],
       [5.00006127e+07, 5.00005991e+07, 5.00005742e+07, ...,
        5.00007919e+07, 5.00006225e+07, 5.00006084e+07]])

In [50]:
od_cities.shape

(111196, 191)

## 6. Calculate Market Access
Calculates Market Access for each pixel

In [51]:
import GOSTnets.calculate_od_raw as calcOD

In [52]:
# for testing purposes try running without any popsum weight....
#access_cities = calcOD.calculate_gravity(od_cities)

In [53]:
access_cities = calcOD.calculate_gravity(od_cities, dWeight = admin4_centroids_w_PopSUM['popsum'])

In [54]:
access_cities.index = res_df.index

In [55]:
access_cities_results = res_df.join(access_cities).rename(columns = {
        'd_0.01': 'd_1',
        'd_0.005': 'd_2',
        'd_0.001' : 'd_3',
        'd_0.0007701635' : 'd_4',   # Market access halves every 15 mins
        'd_0.0003850818' : 'd_5',   # Market access halves every 30 mins
        'd_0.0001925409' : 'd_6',   # Market access halves every 60 mins
        'd_9.62704e-05' : 'd_7',   # Market access halves every 120 mins
        'd_3.85082e-05' : 'd_8',   # Market access halves every 300 mins
        'd_1e-05' : 'd_9'
    })

In [56]:
geoms = [Point(xy) for xy in access_cities_results.xy]

In [57]:
access_cities_results = gpd.GeoDataFrame(access_cities_results, geometry=geoms, crs=admin4_centroids_w_PopSUM.crs)

In [58]:
access_cities_results[:3]

Unnamed: 0,spatial_index,xy,pop,8886,8887,8888,8889,8890,8891,8896,...,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,geometry
0,"(0, 0)","(88.69583333333333, 24.42916666666667)",907.638,70711000.0,70710950.0,70710970.0,70710950.0,70710940.0,70710980.0,70711040.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.204512e-300,POINT (88.69583 24.42917)
1,"(0, 1)","(88.70416666666667, 24.42916666666667)",930.021179,50000320.0,50000270.0,50000290.0,50000270.0,50000260.0,50000300.0,50000350.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.234441e-210,POINT (88.70417 24.42917)
2,"(0, 2)","(88.71249999999999, 24.42916666666667)",1120.448486,50000320.0,50000260.0,50000280.0,50000270.0,50000250.0,50000300.0,50000350.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.234525e-210,POINT (88.71250 24.42917)


## 7. Save results in raster format

In [59]:
def rasterize_gdf(inD, field, template, outFile=None, nodata=np.nan):
    ''' Convert geopandas GeoDataFrame to raster of equal size/res to template raster
    
    INPUT
    inD [ geopandas data frame / path ]
    outFile [ string ] - path to save output raster
    field [ string ] - field to rasterize
    template [ string ] - path to template raster
    nodata [ int ] - value for no data
    
    RETURNS
    Raster file. If no outFile is specified, the function returns burned features as numpy array
    '''
    raster_template = rasterio.open(template)
    # get info from template file
    xRes = raster_template.res[1]
    yRes = raster_template.res[0]
    trans = raster_template.transform
    x_pixels = raster_template.shape[1]
    y_pixels = raster_template.shape[0]
    raster_template.close()
    
    shapes = ((row.geometry,row[field]) for idx, row in inD.iterrows())
    burned = features.rasterize(shapes=shapes, fill=nodata, out_shape=raster_template.shape, transform=trans)
    burned = burned.astype(str(inD[field].dtype))
    
    if outFile:
        with rasterio.open(
            outFile, 'w', driver = 'GTiff',
            height = y_pixels, width = x_pixels,
            count=1, dtype=str(inD[field].dtype),
            crs=raster_template.crs,
            transform=trans, nodata=nodata
        ) as new_dataset:
            new_dataset.write_band(1, burned)
    else:
        return burned

In [60]:
for d in range(3,10):
    print(f'writing {d}')
    rasterize_gdf(access_cities_results, f'd_{d}', template=friction_file, outFile=os.path.join(out_folder, f"access_cities_d{d}.tif"))

writing 3
writing 4
writing 5
writing 6
writing 7
writing 8
writing 9


In [61]:
for d in range(3,10):
    print(f'writing {d}')
    # clip rasters
    rasterized_gdf_file = os.path.join(out_folder, f"access_cities_d{d}.tif")
    clipped_rasterized_gdf_file = os.path.join(out_folder, f"clipped_access_cities_d{d}2.tif")
    if not os.path.exists(clipped_rasterized_gdf_file):
        rMisc.clipRaster(rasterio.open(rasterized_gdf_file), gpd.read_file(admin), clipped_rasterized_gdf_file)

writing 3
writing 4
writing 5
writing 6
writing 7
writing 8
writing 9


### Convert travel time to minutes and get minimum travel cost to nearest city

In [62]:
admin4_centroids_w_PopSUM[:3]

Unnamed: 0_level_0,ID_0,ISO,NAME_0,ID_1,NAME_1,ID_2,NAME_2,ID_3,NAME_3,NAME_4,VARNAME_4,TYPE_4,ENGTYPE_4,VALIDFR_4,VALIDTO_4,REMARKS_4,Shape_Leng,Shape_Area,geometry,popsum
ID_4,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
8886,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,Nawabganj Dh,Dhaka,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.881763,0.025416,POINT (90.14986 23.65921),539776.5
8887,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,Savar,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,1.033487,0.030875,POINT (90.29510 23.89213),2428921.0
8888,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,Tejgaon,,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.768723,0.026482,POINT (90.40635 23.78716),15358978.0


In [63]:
access_cities_results[:3]

Unnamed: 0,spatial_index,xy,pop,8886,8887,8888,8889,8890,8891,8896,...,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,geometry
0,"(0, 0)","(88.69583333333333, 24.42916666666667)",907.638,70711000.0,70710950.0,70710970.0,70710950.0,70710940.0,70710980.0,70711040.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.204512e-300,POINT (88.69583 24.42917)
1,"(0, 1)","(88.70416666666667, 24.42916666666667)",930.021179,50000320.0,50000270.0,50000290.0,50000270.0,50000260.0,50000300.0,50000350.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.234441e-210,POINT (88.70417 24.42917)
2,"(0, 2)","(88.71249999999999, 24.42916666666667)",1120.448486,50000320.0,50000260.0,50000280.0,50000270.0,50000250.0,50000300.0,50000350.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.234525e-210,POINT (88.71250 24.42917)


In [64]:
access_cities_results_fields = list(access_cities_results)
access_cities_results_fields

['spatial_index',
 'xy',
 'pop',
 8886,
 8887,
 8888,
 8889,
 8890,
 8891,
 8896,
 8897,
 8898,
 8900,
 8926,
 8927,
 8928,
 8929,
 8930,
 8931,
 8932,
 8757,
 8758,
 8759,
 8760,
 8761,
 8762,
 8763,
 8764,
 8765,
 8766,
 8767,
 8768,
 8769,
 8770,
 8771,
 8772,
 8773,
 8774,
 8775,
 8776,
 8777,
 8785,
 8787,
 8788,
 8789,
 8793,
 8794,
 8796,
 8797,
 8798,
 8802,
 8803,
 8804,
 8805,
 8806,
 8807,
 8808,
 8809,
 8810,
 8811,
 8812,
 8813,
 8814,
 8815,
 8816,
 8817,
 8818,
 8819,
 8820,
 8821,
 8822,
 8823,
 8824,
 8825,
 8826,
 8827,
 8828,
 8829,
 8830,
 8831,
 8833,
 8835,
 8836,
 8837,
 8838,
 8839,
 8840,
 8841,
 8842,
 8843,
 8844,
 8845,
 8846,
 8847,
 8848,
 8849,
 8852,
 8854,
 8858,
 8859,
 8860,
 8861,
 8862,
 8864,
 8867,
 8868,
 8869,
 8872,
 8877,
 8878,
 8881,
 8883,
 8884,
 8885,
 8901,
 8902,
 8903,
 8904,
 8905,
 8906,
 8907,
 8908,
 8909,
 8910,
 8911,
 8914,
 8915,
 8916,
 8918,
 8919,
 8920,
 8921,
 8922,
 8923,
 8924,
 8925,
 8933,
 8934,
 8935,
 8936,
 8938,
 

In [65]:
# ignore keep at min
# if column name in row of access_cities_results exists as a row index in admin4_centroids_w_PopSUM then convert to hours
#access_cities_results_hr = access_cities_results.apply(lambda x: x/60 if x.name in admin4_centroids_w_PopSUM.index else x)

In [66]:
# For each row find the min travel time 
access_cities_results.loc[:, "tt_min"] = access_cities_results[admin4_centroids_w_PopSUM.index].min(axis=1)

In [67]:
access_cities_results

Unnamed: 0,spatial_index,xy,pop,8886,8887,8888,8889,8890,8891,8896,...,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,geometry,tt_min
0,"(0, 0)","(88.69583333333333, 24.42916666666667)",907.638000,7.071100e+07,7.071095e+07,7.071097e+07,7.071095e+07,7.071094e+07,7.071098e+07,7.071104e+07,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,8.204512e-300,POINT (88.69583 24.42917),7.071089e+07
1,"(0, 1)","(88.70416666666667, 24.42916666666667)",930.021179,5.000032e+07,5.000027e+07,5.000029e+07,5.000027e+07,5.000026e+07,5.000030e+07,5.000035e+07,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.234441e-210,POINT (88.70417 24.42917),5.000021e+07
2,"(0, 2)","(88.71249999999999, 24.42916666666667)",1120.448486,5.000032e+07,5.000026e+07,5.000028e+07,5.000027e+07,5.000025e+07,5.000030e+07,5.000035e+07,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.234525e-210,POINT (88.71250 24.42917),5.000021e+07
3,"(0, 3)","(88.72083333333333, 24.42916666666667)",1113.935303,5.000030e+07,5.000024e+07,5.000026e+07,5.000025e+07,5.000023e+07,5.000028e+07,5.000033e+07,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.236015e-210,POINT (88.72083 24.42917),5.000019e+07
4,"(0, 4)","(88.72916666666666, 24.42916666666667)",1093.856567,5.000030e+07,5.000025e+07,5.000026e+07,5.000025e+07,5.000023e+07,5.000028e+07,5.000033e+07,...,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00,7.235945e-210,POINT (88.72917 24.42917),5.000019e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198451,"(442, 435)","(92.32083333333333, 20.745833333333337)",355.519226,6.118758e+02,5.982722e+02,5.733555e+02,6.054160e+02,6.290322e+02,5.983819e+02,7.264784e+02,...,9.473649e+06,5.966973e+07,6.733019e+07,8.268701e+07,9.176122e+07,9.669808e+07,9.979749e+07,1.013666e+08,POINT (92.32083 20.74583),4.007962e+01
198452,"(442, 436)","(92.32916666666667, 20.745833333333337)",383.392883,6.373116e+02,6.237080e+02,5.987913e+02,6.308519e+02,6.544680e+02,6.238177e+02,7.519142e+02,...,8.342267e+06,5.817112e+07,6.602404e+07,8.188105e+07,9.131293e+07,9.646159e+07,9.969979e+07,1.013408e+08,POINT (92.32917 20.74583),6.551544e+01
198453,"(442, 437)","(92.33749999999999, 20.745833333333337)",398.624969,6.112194e+02,5.976158e+02,5.726991e+02,6.047597e+02,6.283758e+02,5.977255e+02,7.258220e+02,...,9.504791e+06,5.970891e+07,6.736423e+07,8.270791e+07,9.177282e+07,9.670419e+07,9.980001e+07,1.013673e+08,POINT (92.33750 20.74583),3.942325e+01
198454,"(442, 438)","(92.34583333333333, 20.745833333333337)",446.424927,6.123572e+02,5.987536e+02,5.738369e+02,6.058974e+02,6.295136e+02,5.988633e+02,7.269598e+02,...,9.450874e+06,5.964101e+07,6.730523e+07,8.267168e+07,9.175272e+07,9.669360e+07,9.979564e+07,1.013661e+08,POINT (92.34583 20.74583),4.056100e+01


In [68]:
rasterize_gdf(access_cities_results, 'tt_min', template=friction_file, outFile=os.path.join(out_folder, f"BGD_cities_min_tt.tif"))

In [69]:
#rasterized_gdf = rasterize_gdf(access_cities_results, 'tt_hrs', template=friction_file)

In [70]:
# clip raster
rasterized_gdf_file = os.path.join(out_folder, "BGD_cities_min_tt.tif")
clipped_file_admins = os.path.join(out_folder, "BGD_corridor1_and_2_cities_min_tt_clipped.tif")
if not os.path.exists(clipped_file_admins):
    rMisc.clipRaster(rasterio.open(rasterized_gdf_file), gpd.read_file(admin), clipped_file_admins)

### Clip results to corridor admin

In [71]:
clipped_file_ports = os.path.join(out_folder, "BGD_corridor2_least_cost_travel_time_ports_clipped.tif")
if not os.path.exists(clipped_file_ports):
    rMisc.clipRaster(rasterio.open(travel_costs_output_raster), gpd.read_file(admin), clipped_file_ports)

## Do Zonal Stats: Population weighted average

In [72]:
# do zonal stats (pop weighted average) as found here: https://github.com/worldbank/GOST_PublicGoods/blob/master/Implementations/FY21/URB_TurkeyUrbanizationReview/URB_TurkeyUrbanizationReview.ipynb

In [73]:
ttR_ports = rasterio.open(travel_costs_output_raster)
ttD_ports = ttR_ports.read()

popR = rasterio.open(out_pop_surface_std)
popD = popR.read()

In [74]:
ttD_ports.shape

(1, 444, 448)

In [75]:
popD.shape

(1, 444, 448)

In [76]:
ttD_ports

array([[[7.07108785e+07, 5.00001961e+07, 5.00001950e+07, ...,
         5.00002833e+07, 5.00002792e+07, 7.07109595e+07],
        [5.00001961e+07, 1.85550932e+02, 1.84463622e+02, ...,
         2.77796918e+02, 2.73727038e+02, 5.00002792e+07],
        [5.00001751e+07, 1.73738432e+02, 1.72651122e+02, ...,
         2.64026941e+02, 2.60170924e+02, 5.00002683e+07],
        ...,
        [5.00006055e+07, 6.04154252e+02, 6.01529252e+02, ...,
         5.55598251e+01, 5.56447493e+01, 5.00000562e+07],
        [5.00006068e+07, 6.05439553e+02, 6.02843548e+02, ...,
         6.18160750e+01, 5.70447494e+01, 5.00000579e+07],
        [7.07112855e+07, 5.00006068e+07, 5.00006042e+07, ...,
         5.00000672e+07, 5.00000579e+07, 7.07107364e+07]]])

In [77]:
popD

array([[[   907.638 ,    930.0212,   1120.4485, ..., -99999.    ,
         -99999.    , -99999.    ],
        [  1382.4071,   1033.267 ,   1150.532 , ..., -99999.    ,
         -99999.    , -99999.    ],
        [  1275.4652,   1290.4519,   1465.0599, ..., -99999.    ,
         -99999.    , -99999.    ],
        ...,
        [-99999.    , -99999.    , -99999.    , ..., -99999.    ,
         -99999.    , -99999.    ],
        [-99999.    , -99999.    , -99999.    , ..., -99999.    ,
         -99999.    , -99999.    ],
        [-99999.    , -99999.    , -99999.    , ..., -99999.    ,
         -99999.    , -99999.    ]]], dtype=float32)

In [78]:
# code to replace all negative value with 0
popD[popD<0] = 0

In [79]:
popD

array([[[ 907.638 ,  930.0212, 1120.4485, ...,    0.    ,    0.    ,
            0.    ],
        [1382.4071, 1033.267 , 1150.532 , ...,    0.    ,    0.    ,
            0.    ],
        [1275.4652, 1290.4519, 1465.0599, ...,    0.    ,    0.    ,
            0.    ],
        ...,
        [   0.    ,    0.    ,    0.    , ...,    0.    ,    0.    ,
            0.    ],
        [   0.    ,    0.    ,    0.    , ...,    0.    ,    0.    ,
            0.    ],
        [   0.    ,    0.    ,    0.    , ...,    0.    ,    0.    ,
            0.    ]]], dtype=float32)

In [80]:
ttPop_ports = popD * ttD_ports

In [81]:
ttPop_ports

array([[[6.41798804e+10, 4.65012413e+10, 5.60226428e+10, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [6.91206262e+10, 1.91723649e+05, 2.12231296e+05, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [6.37734838e+10, 2.24201091e+05, 2.52944241e+05, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        ...,
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
        [0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
         0.00000000e+00, 0.00000000e+00, 0.00000000e+00]]])

In [82]:
out_meta = ttR_ports.meta.copy()
with rasterio.open(os.path.join(out_folder, "ttPop_ports_raster.tif"), "w", **out_meta) as dest:
    dest.write(ttPop_ports.astype(out_meta['dtype']))

In [83]:
# save pop raster too for later
out_meta = ttR_ports.meta.copy()
with rasterio.open(os.path.join(out_folder, "Pop_raster.tif"), "w", **out_meta) as dest:
    dest.write(popD.astype(out_meta['dtype']))

In [84]:
#rasterStats, ignore negative values
from rasterstats import zonal_stats

In [85]:
ttPop_ports_raster = r"outputs\ttPop_ports_raster.tif"

In [86]:
Pop_raster = r"outputs\Pop_raster.tif"

In [87]:
#stats = zonal_stats(admin1, ttPop_raster, geojson_out=True)

### consider using an admin or an urban exent file here for better summaries

In [88]:
#BGD_adm1_df = gpd.GeoDataFrame.from_file(r"inputs\BGD_corridor1_dissolved.shp")
adm_df = gpd.GeoDataFrame.from_file(admin)

In [89]:
ttPop_ports_sum_stats = zonal_stats(admin, ttPop_ports_raster, stats='sum')

In [90]:
#pd.DataFrame(ttPop_HF_sum_stats)

In [91]:
ttPop_ports_sum_stats_joined = adm_df.join(pd.DataFrame(ttPop_ports_sum_stats))

In [92]:
#ttPop_HF_sum_stats_joined

In [93]:
# Now need to do rasterstats for just pop, then divide the sum of each of the ttPop zones with the sum pop for each zone

In [94]:
statsPop = zonal_stats(admin, Pop_raster, stats=['sum'])

In [95]:
statsPop_df = pd.DataFrame(statsPop)

In [96]:
statsPop_df.rename(columns={'sum':'popsum'}, inplace=True)

In [97]:
statsPop_df

Unnamed: 0,popsum
0,103054500.0


In [98]:
ttPop_ports_sum_stats_joined2 = ttPop_ports_sum_stats_joined.join(pd.DataFrame(statsPop_df))

In [99]:
ttPop_ports_sum_stats_joined2

Unnamed: 0,ID_0,ISO,NAME_0,ID_1,NAME_1,ID_2,NAME_2,ID_3,NAME_3,ID_4,...,TYPE_4,ENGTYPE_4,VALIDFR_4,VALIDTO_4,REMARKS_4,Shape_Leng,Shape_Area,geometry,sum,popsum
0,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,8886,...,Upazila|Thana|Po,Sub-district,Unknown,Unknown,,0.881763,0.025416,"MULTIPOLYGON (((92.22418 21.12555, 92.22400 21...",1441588000.0,103054500.0


In [100]:
ttPop_ports_sum_stats_joined2['avg_tt'] = ttPop_ports_sum_stats_joined2['sum'] / ttPop_ports_sum_stats_joined2['popsum']

In [101]:
ttPop_ports_sum_stats_joined2

Unnamed: 0,ID_0,ISO,NAME_0,ID_1,NAME_1,ID_2,NAME_2,ID_3,NAME_3,ID_4,...,ENGTYPE_4,VALIDFR_4,VALIDTO_4,REMARKS_4,Shape_Leng,Shape_Area,geometry,sum,popsum,avg_tt
0,23,BGD,Bangladesh,327,Dhaka,3103,Dhaka,3641,Dhaka,8886,...,Sub-district,Unknown,Unknown,,0.881763,0.025416,"MULTIPOLYGON (((92.22418 21.12555, 92.22400 21...",1441588000.0,103054500.0,13.988602


In [102]:
ttPop_ports_sum_stats_joined2.to_file(final_sum_stats_output)

NameError: name 'final_sum_stats_output' is not defined

In [None]:
# Calculate Market Access
#https://github.com/worldbank/GOST_PublicGoods/blob/master/Implementations/FY21/ACC_Raster_MarketAccess_Template/ACC_Benin_RasterMarketAccess.ipynb