## Country-level INFRA-SAP

Create summary statistics at admin-2 and country-level

In [1]:
import os, sys, time, importlib

import geopandas as gpd
import pandas as pd
import networkx as nx
sys.path.append('/home/wb514197/Repos/GOSTnets')

import GOSTnets as gn
import rasterio as rio
from osgeo import gdal
import GOSTnets.calculate_od_raw as calcOD
import numpy as np

%load_ext autoreload
%autoreload 2

sys.path.append('/home/wb514197/Repos/INFRA_SAP')
from infrasap import aggregator
from shapely.wkt import loads
import infrasap.rai_calculator as rai
from utm_zone import epsg as epsg_get
import json

In [2]:
country = 'mauritania'
iso3 = 'MRT'

### Load data

In [3]:
# base_in = "/home/public/Data/PROJECTS/INFRA_SAP"
base_in = "/home/wb514197/data/INFRA_SAP"
in_folder = os.path.join(base_in, iso3)

# define data paths
focal_admin2 = os.path.join(in_folder, "admin.shp")
focal_osm = os.path.join(in_folder, f"{country}-latest.osm.pbf")
pop_name = "WP_2020_1km"
wp_1km = os.path.join(in_folder, f"{pop_name}.tif")
urban_extents = os.path.join(in_folder, "urban_extents.shp")
airports = os.path.join(in_folder, "airports.shp")
ports = os.path.join(in_folder, "ports.shp")
borders = os.path.join(in_folder, "borders.shp")

# base_out = "/home/wb514197/data/INFRA_SAP" # GOT permission denied using public 
# out_folder = os.path.join(base_out, iso3)
out_folder = os.path.join(in_folder, "output")

if not os.path.exists(out_folder):
    os.makedirs(out_folder)

In [4]:
bounds = gpd.read_file(focal_admin2)
bounds_json = json.loads(bounds.to_json())
epsg = epsg_get(bounds_json)

In [5]:
out_folder

'/home/wb514197/data/INFRA_SAP/MRT/output'

In [6]:
tables_path = os.path.join(out_folder, 'tables')
if not os.path.exists(tables_path):
    os.mkdir(tables_path)

In [7]:
target = gpd.read_file(focal_admin2)

### 1. RAI

In [8]:
%%time
rai_roadnetwork = rai.extract_rai_network(focal_osm, epsg=epsg)

  return _prepare_from_string(" ".join(pjargs))
  return _prepare_from_string(" ".join(pjargs))


CPU times: user 4min 29s, sys: 2.77 s, total: 4min 32s
Wall time: 4min 31s


In [9]:
rai_roadnetwork = rai_roadnetwork.to_crs('EPSG:4326')

In [10]:
%%time
rai = rai.calculate_rai(target, "OBJECTID", rio.open(wp_1km), rai_roadnetwork, tables_path)

CPU times: user 1min 58s, sys: 486 ms, total: 1min 59s
Wall time: 1min 59s


In [12]:
rai.head()

Unnamed: 0,RAI_POP_1,ID,RAI_POP_2,RAI_POP_3,RAI_POP_4,POP
0,27.885479,22900,1222.407349,4488.111816,5173.705566,10304.847656
1,24220.990234,22901,26490.560547,31181.384766,33628.621094,39414.550781
2,-1.0,22902,0.224405,60.111805,60.316807,2157.651611
3,-1.0,22903,2082.561279,4156.754395,4423.415527,8795.634766
4,901.139893,22904,7711.625,8038.547363,8038.547363,93808.578125


In [13]:
# rai = pd.read_csv(os.path.join(tables_path, "RAI_population.csv"), index_col=0)

In [14]:
rai.loc[rai.RAI_POP_1<0, "RAI_POP_1"] = 0
rai.loc[rai.RAI_POP_2<0, "RAI_POP_2"] = 0
rai.loc[rai.RAI_POP_3<0, "RAI_POP_3"] = 0
rai.loc[rai.RAI_POP_4<0, "RAI_POP_4"] = 0

In [15]:
rai['RAI_Percent_1'] = rai.RAI_POP_1/rai.POP
rai['RAI_Percent_2'] = rai.RAI_POP_2/rai.POP
rai['RAI_Percent_3'] = rai.RAI_POP_3/rai.POP
rai['RAI_Percent_4'] = rai.RAI_POP_4/rai.POP

In [16]:
rai.rename(columns={'ID':'OBJECTID'}, inplace=True)
rai = rai[['OBJECTID','POP','RAI_POP_1', 'RAI_POP_2', 'RAI_POP_3', 'RAI_POP_4',
           'RAI_Percent_1', 'RAI_Percent_2', 'RAI_Percent_3', 'RAI_Percent_4']]

In [17]:
rai = target.merge(rai, on="OBJECTID")
# rai = target.join(rai)

In [18]:
rai.drop(['geometry','Shape_Leng','Shape_Area'], axis=1, inplace=True)

In [19]:
rai.to_excel(os.path.join(tables_path, "1Rural Access to Roads.xlsx"), index=False)

### 2. Reload OD Matrix

In [22]:
OD = pd.read_csv(os.path.join(out_folder, 'OD_03_04.csv'), header=[0,1], index_col=0)

  mask |= (ar1 == a)


In [23]:
OD['city'] = OD['city'].apply(lambda x: (x/3600))
OD['port'] = OD['port'].apply(lambda x: (x/3600))
OD['border'] = OD['border'].apply(lambda x: (x/3600))
OD['airport'] = OD['airport'].apply(lambda x: (x/3600))

### 3. District-level summaries

**Percentage of rural population with access to a main city (>50k inhabitants) in XX hours**

In [24]:
dest_type='city'
rural=True
thresholds=[0,0.5,1,1.5,2,3,4,5,6,7,10,1000] # hours
rural_access = aggregator.calculate_access_percentages(OD, target, 'city', True, urban_extents, thresholds=thresholds)

In [25]:
rural_access.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
rural_access.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)

In [26]:
rural_access.to_excel(os.path.join(tables_path, "2Rural Access to Cities.xlsx"), index=False)

**Travel time to closest city**

In [27]:
city_tt = aggregator.calculate_access_percentages(OD, target, 'city', thresholds=thresholds)
city_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
city_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
city_tt.to_excel(os.path.join(tables_path, "3Travel Time to Nearest City.xlsx"), index=False)

**Travel time to capital**

In [28]:
dest_all = pd.read_csv(os.path.join(out_folder, 'destination_all.csv'), index_col=0)
cap_idx = dest_all.sort_values('Pop', ascending=False).iloc[[0]].index[0]

In [29]:
capital_tt = aggregator.calculate_access_percentages(OD, target, 'city', thresholds=thresholds, capital=str(cap_idx))
capital_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
capital_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
capital_tt.to_excel(os.path.join(tables_path, "4Travel Time to Capital.xlsx"), index=False)

**Time to drive to the closest land border point**

In [30]:
border_tt = aggregator.calculate_access_percentages(OD, target, 'border', thresholds=thresholds)
border_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
border_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
border_tt.to_excel(os.path.join(tables_path, "6Travel Time to Nearest Border.xlsx"), index=False)

**Time to drive to the closest airport**

In [31]:
airport_tt = aggregator.calculate_access_percentages(OD, target, 'airport', thresholds=thresholds)
airport_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
airport_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
airport_tt.to_excel(os.path.join(tables_path, "5Travel Time to Nearest Airport.xlsx"), index=False)

**Time to drive to the closest port**

In [32]:
port_tt = aggregator.calculate_access_percentages(OD, target, 'port', thresholds=thresholds)
port_tt.rename(columns = {
    '(10.0, 1000.0]':'> 10.0',
    '(10.0, 1000.0] pop':'> 10.0 pop'
}, inplace=True)
port_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
port_tt.to_excel(os.path.join(tables_path, "7Travel Time to Nearest Port.xlsx"), index=False)

#### Aggregate to the country-level

In [33]:
country_level = aggregator.agregate_to_country(rural_access, "Rural pop. with access to a main city")
country_level = aggregator.agregate_to_country(city_tt, "Travel time to closest city", country_level)
country_level = aggregator.agregate_to_country(capital_tt, "Travel time to capital", country_level)
country_level = aggregator.agregate_to_country(border_tt, "Travel time to closest border", country_level)
country_level = aggregator.agregate_to_country(airport_tt, "Travel time to closest airport", country_level)
country_level = aggregator.agregate_to_country(port_tt, "Travel time to closest port", country_level)

In [34]:
country_level

Unnamed: 0,"(0.0, 0.5] pop","(0.5, 1.0] pop","(1.0, 1.5] pop","(1.5, 2.0] pop","(2.0, 3.0] pop","(3.0, 4.0] pop","(4.0, 5.0] pop","(5.0, 6.0] pop","(6.0, 7.0] pop","(7.0, 10.0] pop",> 10.0 pop
Rural pop. with access to a main city,0.00826,0.002393,0.000993,0.000659,0.004425,0.012436,0.015904,0.027013,0.037339,0.084529,0.80605
Travel time to closest city,0.338438,0.001467,0.000608,0.000404,0.002711,0.007619,0.009745,0.019024,0.026365,0.053229,0.54039
Travel time to capital,0.300808,0.001323,0.000427,0.000359,0.00267,0.007596,0.009678,0.018971,0.026185,0.053328,0.578654
Travel time to closest border,0.009967,0.004206,0.003634,0.004138,0.006985,0.010286,0.035063,0.295098,0.020523,0.061829,0.548271
Travel time to closest airport,0.345228,0.002627,0.001037,0.000906,0.003804,0.008412,0.012969,0.020006,0.024086,0.065455,0.51547
Travel time to closest port,0.338004,0.001935,0.000568,0.000388,0.002452,0.007374,0.00959,0.019064,0.025739,0.05368,0.541206


#### Pop-weighted average country-level

In [35]:
raster_path = wp_1km

In [36]:
raster_path

'/home/wb514197/data/INFRA_SAP/MRT/WP_2020_1km.tif'

In [37]:
out_folder

'/home/wb514197/data/INFRA_SAP/MRT/output'

In [39]:
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','cities_min_tt.tif'), raster_path, 'Travel time to closest city')
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','capital_tt.tif'), raster_path, 'Travel time to capital', national_average)
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','borders_min_tt.tif'), raster_path, 'Travel time to closest border', national_average)
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','airport_min_tt.tif'), raster_path, 'Travel time to closest airport', national_average)
national_average = aggregator.pop_weighted_average_national(os.path.join(out_folder,'travel_time','port_min_tt.tif'), raster_path, 'Travel time to closest port', national_average)

In [40]:
country_level = country_level.join(national_average)

In [41]:
country_level.rename(columns = {
    '(0.0, 0.5] pop':'(0.0, 0.5]',
    '(0.5, 1.0] pop':'(0.5, 1.0]',
    '(1.0, 1.5] pop':'(1.0, 1.5]',
    '(1.5, 2.0] pop':'(1.5, 2.0]',
    '(2.0, 3.0] pop':'(2.0, 3.0]',
    '(3.0, 4.0] pop':'(3.0, 4.0]',
    '(4.0, 5.0] pop':'(4.0, 5.0]',
    '(5.0, 6.0] pop':'(5.0, 6.0]',
    '(6.0, 7.0] pop':'(6.0, 7.0]',
    '(7.0, 10.0] pop':'(7.0, 10.0]',
    '> 10.0 pop':'> 10.0'  
}, inplace=True)
country_level.to_excel(os.path.join(tables_path, "9National Level Aggregates.xlsx"), index=True)

#### Pop-weighted average by district

In [42]:
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','cities_min_tt.tif'), raster_path, 'tt_min_city')
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','capital_tt.tif'), raster_path, 'tt_capital', table=weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','borders_min_tt.tif'), raster_path, 'tt_border', table=weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','airport_min_tt.tif'), raster_path, 'tt_airport', table=weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_time','port_min_tt.tif'), raster_path, 'tt_port', table=weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','cities_min_dist.tif'), raster_path, 'dist_cities', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','airport_min_dist.tif'), raster_path, 'dist_airport', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','port_min_dist.tif'), raster_path, 'dist_ports', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','borders_min_dist.tif'), raster_path, 'dist_borders', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(out_folder,'travel_distance','capital_dist.tif'), raster_path, 'dist_capital',weighted_average)

#### Market Access

In [43]:
out_access = os.path.join(out_folder, 'access')

In [44]:
ma5 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d5.tif"), raster_path, 'ma_5')
ma6 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d6.tif"), raster_path, 'ma_6')
ma7 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d7.tif"), raster_path, 'ma_7')
ma8 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d8.tif"), raster_path, 'ma_8')
ma9 = aggregator.pop_weighted_average(target, os.path.join(out_access,f"access_cities_d9.tif"), raster_path, 'ma_9')

In [45]:
ma = pd.concat([ma5, ma6[['ma_6']], ma7[['ma_7']], ma8[['ma_8']], ma9[['ma_9']]], axis=1)

#### Combine district-level data

In [46]:
district_summary = weighted_average.merge(ma[['OBJECTID','ma_5','ma_6','ma_7','ma_8','ma_9']], on="OBJECTID")

In [47]:
rai.columns

Index(['OBJECTID', 'ISO_A2', 'WB_ADM1_CO', 'WB_ADM0_CO', 'WB_ADM0_NA',
       'WB_ADM1_NA', 'WB_ADM2_CO', 'WB_ADM2_NA', 'ISO3', 'POP', 'RAI_POP_1',
       'RAI_POP_2', 'RAI_POP_3', 'RAI_POP_4', 'RAI_Percent_1', 'RAI_Percent_2',
       'RAI_Percent_3', 'RAI_Percent_4'],
      dtype='object')

In [48]:
# rai = pd.read_excel(os.path.join(tables_path, '1Rural Access to Roads.xlsx'))
rai = rai[['OBJECTID','RAI_Percent_1','RAI_Percent_2', 'RAI_Percent_3', 'RAI_Percent_4']]
district_summary = district_summary.merge(rai, on='OBJECTID')

In [49]:
hubs = pd.read_excel(os.path.join(tables_path, '8Travel Time to Regional Hub.xlsx'))
hubs = hubs.rename(columns={'Time (hrs)':'tt_regional', 'Regional Hub Name':'hub_name'}) # Name1 'Regional Hub Name'
hubs = hubs[['OBJECTID','tt_regional','hub_name']]
district_summary = district_summary.merge(hubs, on='OBJECTID', how='left')

In [50]:
district_summary.to_file(os.path.join(out_folder,'admin_join.shp'), driver='ESRI Shapefile')

In [51]:
district_table = district_summary.copy()
district_table.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
district_table.to_excel(os.path.join(tables_path, "10District Level Summary.xlsx"), index=False)