# Market access and population

This notebook combines the market access calculations from the global friction surface 2019 with population from WorldPop, and urban/rural calculations based on the JDC calculations to create a complete, flexible workflow for measuring access to features:

### Required input data
1. Destinations
2. ISO3 code (for extent extraction)
3. Travel time thresholds in minutes

### Workflow
1. Calculate travel time to destinations  
   a. Calculate binary travel time layers
2. Calculate urban and rural  
   a. Calculate urban and rural population layers  
3. Combine travel time layers (#1a) with population layers (#2a)
4. Run zonal stats on #3


# TODO
1. Rasters may need to be standardized to each other

In [1]:
import sys, os
import rasterio

import pandas as pd
import geopandas as gpd
import numpy as np
import skimage.graph as graph

from rasterio.mask import mask
from rasterio import features
from shapely.geometry import box, Point, Polygon
from scipy.ndimage import generic_filter
from pandana.loaders import osm

sys.path.append("../../../GOST_Urban")
import src.UrbanRaster as urban

sys.path.append("../../")
sys.path.append('/home/wb514197/Repos/GOSTnets')
import infrasap.market_access as ma
import infrasap.rasterMisc as rMisc
from infrasap.misc import tPrint

In [72]:
#User records
destinations = "/home/public/Data/COUNTRY/PAK/HEALTH/pakistan_health_facilities/Pakistan_Health_Facilities.shp"
iso3 = "PAK"
# out_folder = "/home/wb411133/data/Country/PAK/HEALTH/"
out_folder = "/home/wb514197/data/PAK/HEALTH/"

thresholds = [30, 60, 120, 180]

if not os.path.exists(out_folder):
    os.makedirs(out_folder)
    
# Read in destinations
inH = gpd.read_file(destinations)

In [73]:
# Filter destinations here, if desired
scenario1 = ['GENERAL HOSPITALS','CHILDREN HOSPITAL','TEHSIL HEADQUARTER HOSPITAL','DISTRICT HEADQUARTER HOSPITAL','AGENCY HEADQUARTER HOSPITAL']
scenario2 = ['GENERAL PHYSICIAN','BASIC HEALTH UNIT','GENERAL HOSPITALS','MATERNITY HOME','RURAL HEALTH CENTER','SPECIALIST','CHILDREN HOSPITAL','MCH CENTRE','SUB-HEALTH CENTER','DIAGNOSTIC CENTRE','TEHSIL HEADQUARTER HOSPITAL','DISTRICT HEADQUARTER HOSPITAL','FAMILY WELFARE CENTER','URBAN HEALTH CENTRE','AGENCY HEADQUARTER HOSPITAL']

#inH = inH.loc[inH['Category'].isin(scenario2)]
inH['cat1'] = 0
inH['cat2'] = 0

inH.loc[inH['Category'].isin(scenario1), 'cat1'] = 1
inH.loc[inH['Category'].isin(scenario2), 'cat2'] = 1

# inH.to_file("/home/wb514197/data/PAK/HEALTH_FACILITIES.shp")

In [74]:
# inH = inH.loc[inH['cat1']==1]
inH = inH.loc[inH['cat2']==1]

# out_folder = "/home/wb514197/data/PAK/HEALTH/"
# out_folder = "/home/wb514197/data/PAK/HEALTH_SCENARIO1/"
out_folder = "/home/wb514197/data/PAK/HEALTH_SCENARIO2/"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)

In [5]:
global_friction_surface = "/home/public/Data/GLOBAL/INFRA/FRICTION_2020/2020_motorized_friction_surface.geotiff"
global_population = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/ppp_2020_1km_Aggregated.tif"
inG = rasterio.open(global_friction_surface)
inP = rasterio.open(global_population)

# Read in country bounds
global_bounds = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
admin1 = "/home/public/Data/GLOBAL/ADMIN/Admin1_Polys.shp"
admin2 = "/home/wb514197/data/PAK/pakistan_indicators.shp"

inB = gpd.read_file(global_bounds)
inB = inB.loc[inB['ISO3'] == "PAK"]
inB = inB.to_crs(inG.crs)
inB1 = gpd.read_file(admin1)
inB1 = inB1.loc[inB1['ISO3'] == "PAK"]
inB1 = inB1.to_crs(inG.crs)
inB2 = gpd.read_file(admin2)
inB2 = inB2.to_crs(inG.crs)

# Clip the travel raster to ISO3
out_travel_surface = os.path.join(out_folder, "TRAVEL_SURFACE.tif")
rMisc.clipRaster(inG, inB, out_travel_surface)

# Clip the population raster to ISO3
out_pop_surface = os.path.join(out_folder, "POP_2020_NEW.tif")
rMisc.clipRaster(inP, inB, out_pop_surface)

In [6]:
travel_surf = rasterio.open(out_travel_surface)
pop_surf = rasterio.open(out_pop_surface)

In [29]:
out_pop_surface_std = os.path.join(out_folder, "POP_2020_NEW_STD.tif")

In [17]:
rMisc.standardizeInputRasters(pop_surf, travel_surf, os.path.join(out_folder, "POP_2020_NEW_STD.tif"), data_type="C")

In [20]:
travel_surf = rasterio.open(out_travel_surface)
pop_surf = rasterio.open(out_pop_surface_std)

In [25]:
inB2 = gpd.read_file(admin2)
inB2 = inB2.to_crs(inG.crs)

In [26]:
inB2 = inB2.loc[inB2.Year==2018].copy()
inB2.index.name = "FID"
inB2.reset_index(inplace=True)

In [76]:
# create MCP object
inG = rasterio.open(out_travel_surface)
inG_data = inG.read() * 1000
# Correct no data values
inG_data[inG_data < 0] = 99999999
mcp = graph.MCP_Geometric(inG_data[0,:,:])

In [77]:
# Calculate travel time
out_file = os.path.join(out_folder, "HEALTH_TRAVEL_TIME_MINUTES.tif")
facility_cells = ma.get_mcp_dests(inG, inH)
costs, traceback = mcp.find_costs(facility_cells)  
costs[np.isinf(costs)] = 0
costs[np.isnan(costs)] = 0
meta = inG.meta.copy()
meta.update(dtype=costs.dtype)
with rasterio.open(out_file, 'w', **meta) as out:
    out.write_band(1, costs)

# Calculate urban

In [32]:
urban_raster = os.path.join(out_folder, "URBAN.tif")
urban_pop_raster = os.path.join(out_folder, "URBAN_POP.tif")
calc_urban = urban.urbanGriddedPop(out_pop_surface)
urban_extents = calc_urban.calculateUrban(densVal=300, totalPopThresh=5000,
                          raster=urban_raster, raster_pop=urban_pop_raster, 
                          print_message=iso3, verbose=True)

10:48:55	PAK: Read in urban data
10:48:57	PAK: Creating Shape 0
10:49:06	PAK: Creating Shape 1000
10:49:12	PAK: Creating Shape 2000
10:49:18	PAK: Creating Shape 3000


# Combine traveltime and population

In [78]:
# cur_thresh = thresholds[0]
tt_raster = rasterio.open(out_file)
pop_raster = rasterio.open(out_pop_surface)
urban_pop = rasterio.open(urban_pop_raster)

tt_d = tt_raster.read(1)
pop_d = pop_raster.read(1)
urban_pop_d = urban_pop.read(1)

In [79]:
pop_d.min()

-3.4028235e+38

In [80]:
urban_pop_d.min()

-0.0

In [81]:
pop_d[pop_d<0] = 0
urban_pop_d[urban_pop_d==-0.] = 0

In [84]:
base_raster_name = os.path.join(out_folder, "TT_POP_%s.tif")
base_urban_raster_name = os.path.join(out_folder, "TT_POP_%s_URBAN.tif")
out_meta = pop_raster.meta.copy()
out_rasters = [out_pop_surface, urban_pop_raster]
for thresh in thresholds:
    cur_out_file = base_raster_name % thresh
    cur_out_urban_file = base_urban_raster_name % thresh
    out_rasters.append(cur_out_file)
    out_rasters.append(cur_out_urban_file)
#     if not os.path.exists(cur_out_file) or not os.path.exists(cur_out_urban_file):
    cur_tt_d = (tt_d < thresh).astype('int')
    cur_pop = pop_d * cur_tt_d
    cur_urban_pop = urban_pop_d * cur_tt_d    
    with rasterio.open(cur_out_file, 'w', **meta) as outR:
        outR.write_band(1, cur_pop)

    with rasterio.open(cur_out_urban_file, 'w', **meta) as outR:
        outR.write_band(1, cur_urban_pop)

In [85]:
from rasterstats import zonal_stats

In [89]:
# del(final)
# del(res)
# del(all_res)

In [90]:
all_res = {}
try:
    del(compiled)
except:
    pass

for pop_R_file in out_rasters:
    src = rasterio.open(pop_R_file)
    data = src.read(1)
    data[data<0] = 0
    data[data==-0.]=0
    name = os.path.basename(pop_R_file).replace(".tif", "")
    col = "%s_%s" % (name, "SUM")
#     res = rMisc.zonalStats(inB2, pop_R_file, minVal=0)
    res = pd.DataFrame(zonal_stats(inB2, data, affine=src.transform, stats='sum', nodata=pop_raster.nodata)).rename(columns = {'sum':col})
#     name = os.path.basename(pop_R_file).replace(".tif", "")
#     cols = ["%s_%s" % (name, x) for x in ['SUM','MIN','MAX','MEAN']]
#     res = pd.DataFrame(res, columns=cols)
#     all_res[name] = res
    try:
        final = final.join(res)
    except:
        final = res.copy()

In [91]:
output = final.filter(regex="SUM").copy()
output['FID'] = inB2['FID']
output['Province'] = inB2['Province']
output['District'] = inB2['District']

In [92]:
output.columns

Index(['POP_2020_NEW_STD_SUM', 'URBAN_POP_SUM', 'TT_POP_30_SUM',
       'TT_POP_30_URBAN_SUM', 'TT_POP_60_SUM', 'TT_POP_60_URBAN_SUM',
       'TT_POP_120_SUM', 'TT_POP_120_URBAN_SUM', 'TT_POP_180_SUM',
       'TT_POP_180_URBAN_SUM', 'FID', 'Province', 'District'],
      dtype='object')

In [93]:
output.to_csv(os.path.join(out_folder, "COMBINED_ZONAL_STATS_Admin2_V2.csv"))

In [95]:
# output = final.filter(regex="SUM")
# output['NAME'] = inB1['WB_ADM1_NA']
# output['CODE'] = inB1['WB_ADM1_CO']
# output.to_csv(os.path.join(out_folder, "COMBINED_ZONAL_STATS.csv"))

In [96]:
# output['NAME'] = inB1['WB_ADM1_NA']
# output.head()