## Country-level INFRA-SAP

Create summary statistics at admin-2 and country-level

In [50]:
import os, sys, time, importlib

import geopandas as gpd
import pandas as pd
import networkx as nx
sys.path.append('/home/wb514197/Repos/GOSTnets')

import GOSTnets as gn
import rasterio as rio
from osgeo import gdal
import GOSTnets.calculate_od_raw as calcOD
import numpy as np

%load_ext autoreload
%autoreload 2

sys.path.append('/home/wb514197/Repos/INFRA_SAP')
from infrasap import aggregator
from shapely.wkt import loads
import infrasap.rai_calculator as rai

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
country = 'costarica' # name of OSM file
iso3 = 'CRI'
epsg = 32616

### Load data

In [3]:
base_in = "/home/public/Data/PROJECTS/INFRA_SAP"
in_folder = os.path.join(base_in, iso3)

# define data paths
focal_admin2 = os.path.join(in_folder, "admin.shp")
focal_osm = os.path.join(in_folder, f"{country}-latest.osm.pbf")
pop_name = "WP_2020_1km"
wp_1km = os.path.join(in_folder, f"{pop_name}.tif")
urban_extents = os.path.join(in_folder, "urban_extents.shp")
airports = os.path.join(in_folder, "airports.shp")
ports = os.path.join(in_folder, "ports.shp")
borders = os.path.join(in_folder, "borders.shp")
G_path = os.path.join(in_folder, 'graph', f"G_{iso3}.pickle")

base_out = "/home/wb514197/data/INFRA_SAP" # GOT permission denied using public 
out_folder = os.path.join(base_out, iso3)
if not os.path.exists(out_folder):
    os.makedirs(out_folder)

In [82]:
tables_path = os.path.join(out_folder, 'tables')
if not os.path.exists(tables_path):
    os.mkdir(tables_path)

In [5]:
target = gpd.read_file(focal_admin2)

### 1. RAI

In [7]:
%%time
rai_roadnetwork = rai.extract_rai_network(focal_osm, epsg=epsg)

  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))


CPU times: user 6min 31s, sys: 2.01 s, total: 6min 33s
Wall time: 6min 33s


In [10]:
rai_roadnetwork = rai_roadnetwork.to_crs('EPSG:4326')

In [51]:
%%time
rai = rai.calculate_rai(target, "OBJECTID", rio.open(wp_1km), rai_roadnetwork, tables_path)

CPU times: user 865 ms, sys: 16 ms, total: 881 ms
Wall time: 881 ms


In [52]:
rai.head()

Unnamed: 0,RAI_POP_1,ID,RAI_POP_2,RAI_POP_3,RAI_POP_4,POP
0,206833.40625,13111,287415.28125,292675.0,292714.90625,292959.96875
1,474.20639,13112,9697.74707,11722.016602,11722.016602,11762.019531
2,23474.837891,13113,29490.84375,30737.425781,30737.425781,30737.425781
3,10339.826172,13114,72908.796875,82282.96875,82282.96875,82451.890625
4,7432.836426,13115,10965.994141,17723.382812,17723.382812,18218.400391


In [53]:
rai['RAI_Percent_1'] = rai.RAI_POP_1/rai.POP
rai['RAI_Percent_2'] = rai.RAI_POP_2/rai.POP
rai['RAI_Percent_3'] = rai.RAI_POP_3/rai.POP
rai['RAI_Percent_4'] = rai.RAI_POP_4/rai.POP

In [54]:
rai = rai[['ID','POP','RAI_POP_1', 'RAI_POP_2', 'RAI_POP_3', 'RAI_POP_4',
           'RAI_Percent_1', 'RAI_Percent_2', 'RAI_Percent_3', 'RAI_Percent_4']]
rai.rename(columns={'ID':'OBJECTID'}, inplace=True)

In [58]:
rai = target.merge(rai, on="OBJECTID")
rai.drop(['geometry','Shape_Leng','Shape_Area'], axis=1, inplace=True)
rai.to_excel(os.path.join(tables_path, "1Rural Access to Roads.xlsx"), index=False)

### 2. Reload OD Matrix

In [64]:
OD = pd.read_csv(os.path.join(out_folder, 'OD_08_03.csv'), header=[0,1], index_col=0)

In [66]:
OD['city'] = OD['city'].apply(lambda x: (x/3600))
OD['port'] = OD['port'].apply(lambda x: (x/3600))
OD['border'] = OD['border'].apply(lambda x: (x/3600))
OD['airport'] = OD['airport'].apply(lambda x: (x/3600))

### 3. District-level summaries

**Percentage of rural population with access to a main city (>50k inhabitants) in XX hours**

In [69]:
dest_type='city'
rural=True
thresholds=[0,0.5,1,1.5,2,3,4,5,6,7,10,1000] # hours
rural_access = aggregator.calculate_access_percentages(OD, target, 'city', True, urban_extents, thresholds=thresholds)

In [79]:
rural_access.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
rural_access.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)

In [None]:
rural_access.to_excel(os.path.join(tables_path, "2Rural Access to Cities.xlsx"), index=False)

**Travel time to closest city**

In [84]:
city_tt = aggregator.calculate_access_percentages(OD, target, 'city', thresholds=thresholds)
city_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
city_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
city_tt.to_excel(os.path.join(tables_path, "3Travel Time to Nearest City.xlsx"), index=False)

**Travel time to capital**

In [87]:
dest_all = pd.read_csv(os.path.join(out_folder, 'destination_all.csv'), index_col=0)
cap_idx = dest_all.sort_values('Pop', ascending=False).iloc[[0]].index[0]

In [94]:
capital_tt = aggregator.calculate_access_percentages(OD, target, 'city', thresholds=thresholds, capital=str(cap_idx))
capital_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
capital_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
capital_tt.to_excel(os.path.join(tables_path, "4Travel Time to Capital.xlsx"), index=False)

**Time to drive to the closest land border point**

In [97]:
border_tt = aggregator.calculate_access_percentages(OD, target, 'border', thresholds=thresholds)
border_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
border_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
border_tt.to_excel(os.path.join(tables_path, "6Travel Time to Nearest Border.xlsx"), index=False)

**Time to drive to the closest airport**

In [99]:
airport_tt = aggregator.calculate_access_percentages(OD, target, 'airport', thresholds=thresholds)
airport_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
airport_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
airport_tt.to_excel(os.path.join(tables_path, "5Travel Time to Nearest Airport.xlsx"), index=False)

**Time to drive to the closest port**

In [101]:
port_tt = aggregator.calculate_access_percentages(OD, target, 'port', thresholds=thresholds)
port_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
port_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
port_tt.to_excel(os.path.join(tables_path, "7Travel Time to Nearest Port.xlsx"), index=False)

#### Aggregate to the country-level

In [106]:
country_level = aggregator.agregate_to_country(rural_access, "Rural pop. with access to a main city")
country_level = aggregator.agregate_to_country(city_tt, "Travel time to closest city", country_level)
country_level = aggregator.agregate_to_country(capital_tt, "Travel time to capital", country_level)
country_level = aggregator.agregate_to_country(border_tt, "Travel time to closest border", country_level)
country_level = aggregator.agregate_to_country(airport_tt, "Travel time to closest airport", country_level)
country_level = aggregator.agregate_to_country(port_tt, "Travel time to closest port", country_level)

In [124]:
country_level

Unnamed: 0,"(0.0, 0.5] pop","(0.5, 1.0] pop","(1.0, 1.5] pop","(1.5, 2.0] pop","(2.0, 3.0] pop","(3.0, 4.0] pop","(4.0, 5.0] pop","(5.0, 6.0] pop","(6.0, 7.0] pop","(7.0, 10.0] pop",> 10.0 pop
Rural pop. with access to a main city,0.027794,0.096762,0.121908,0.128795,0.263096,0.150579,0.107248,0.054467,0.04387,0.003236,0.002244
Travel time to closest city,0.042079,0.102564,0.121199,0.126698,0.256198,0.1463,0.104121,0.053023,0.042507,0.003135,0.002174
Travel time to capital,0.012913,0.029203,0.039237,0.0523,0.126876,0.138564,0.169447,0.14423,0.10112,0.179317,0.006794
Travel time to closest border,0.009612,0.020369,0.028512,0.041661,0.083324,0.103444,0.155445,0.174268,0.119525,0.256435,0.007404
Travel time to closest airport,0.055462,0.091808,0.116738,0.114179,0.227812,0.190796,0.110044,0.057195,0.033211,0.000581,0.002174
Travel time to closest port,0.01248,0.035056,0.049239,0.066428,0.192382,0.180864,0.079525,0.059966,0.11191,0.208519,0.003631


#### Pop-weighted average country-level

In [122]:
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','cities_min_tt.tif'), raster_path, 'Travel time to closest city')
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','capital_tt.tif'), raster_path, 'Travel time to capital', national_average)
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','borders_min_tt.tif'), raster_path, 'Travel time to closest border', national_average)
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','airport_min_tt.tif'), raster_path, 'Travel time to closest airport', national_average)
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','port_min_tt.tif'), raster_path, 'Travel time to closest port', national_average)

In [127]:
country_level = country_level.join(national_average)

In [130]:
country_level.rename(columns = {
    '(0.0, 0.5] pop':'(0.0, 0.5]',
    '(0.5, 1.0] pop':'(0.5, 1.0]',
    '(1.0, 1.5] pop':'(1.0, 1.5]',
    '(1.5, 2.0] pop':'(1.5, 2.0]',
    '(2.0, 3.0] pop':'(2.0, 3.0]',
    '(3.0, 4.0] pop':'(3.0, 4.0]',
    '(4.0, 5.0] pop':'(4.0, 5.0]',
    '(5.0, 6.0] pop':'(5.0, 6.0]',
    '(6.0, 7.0] pop':'(6.0, 7.0]',
    '(7.0, 10.0] pop':'(7.0, 10.0]',
    '> 10.0 pop':'> 10.0'  
}, inplace=True)
country_level.to_excel(os.path.join(tables_path, "9National Level Aggregates.xlsx"), index=False)

#### Pop-weighted average by district

In [109]:
raster_path = wp_1km

In [112]:
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','cities_min_tt.tif'), raster_path, 'tt_min_city')
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','capital_tt.tif'), raster_path, 'tt_capital', table=weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','borders_min_tt.tif'), raster_path, 'tt_border', table=weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','airport_min_tt.tif'), raster_path, 'tt_airport', table=weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','port_min_tt.tif'), raster_path, 'tt_port', table=weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','cities_min_dist.tif'), raster_path, 'dist_cities', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','airport_min_dist.tif'), raster_path, 'dist_airport', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','port_min_dist.tif'), raster_path, 'dist_ports', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','borders_min_dist.tif'), raster_path, 'dist_borders', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','capital_dist.tif'), raster_path, 'dist_capital',weighted_average)

#### Market Access

In [134]:
out_access = os.path.join(out_folder, 'access')

In [135]:
ma5 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d5.tif"), raster_path, 'ma_5')
ma6 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d6.tif"), raster_path, 'ma_6')
ma7 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d7.tif"), raster_path, 'ma_7')
ma8 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d8.tif"), raster_path, 'ma_8')
ma9 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d9.tif"), raster_path, 'ma_9')

In [136]:
ma = pd.concat([ma5, ma6[['ma_6']], ma7[['ma_7']], ma8[['ma_8']], ma9[['ma_9']]], axis=1)

#### Combine district-level data

In [152]:
district_summary = weighted_average.merge(ma[['OBJECTID','ma_5','ma_6','ma_7','ma_8','ma_9']], on="OBJECTID")

In [153]:
# rai = pd.read_excel(os.path.join(tables_path, '1Rural Access to Roads.xlsx'))
rai = rai[['OBJECTID','RAI_Percent_1','RAI_Percent_2', 'RAI_Percent_3', 'RAI_Percent_4']]
district_summary = district_summary.merge(rai, on='OBJECTID')

In [154]:
hubs = pd.read_excel(os.path.join(tables_path, '8Travel Time to Regional Hub.xlsx'))
hubs = hubs.rename(columns={'Time (hrs)':'tt_regional', 'Regional Hub Name':'hub_name'})
hubs = hubs[['OBJECTID','tt_regional','hub_name']]
district_summary = district_summary.merge(hubs, on='OBJECTID')

In [155]:
district_summary.to_file(os.path.join(out_folder,'admin_join.shp'), driver='ESRI Shapefile')

In [156]:
district_table = district_summary.copy()
district_table.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
district_table.to_excel(os.path.join(tables_path, "10District Level Summary.xlsx"), index=False)