## Access Maps for Argentina

Create summary statistics at admin-2 and country-level

In [1]:
import os, sys, time, importlib

import geopandas as gpd
import pandas as pd
import networkx as nx
sys.path.append('/home/wb514197/Repos/GOSTnets')

import GOSTnets as gn
import rasterio as rio
from osgeo import gdal
import GOSTnets.calculate_od_raw as calcOD
import numpy as np

%load_ext autoreload
%autoreload 2

sys.path.append('/home/wb514197/Repos/INFRA_SAP')
from infrasap import aggregator
from shapely.wkt import loads

In [33]:
# paths to data
iso = 'ARG'
data_dir = f'/home/public/Data/PROJECTS/INFRA_SAP/{iso}'
raster_path = os.path.join(data_dir, 'WP_2020_1km.tif')
# urban_extents = gpd.read_file('/home/public/Data/PROJECTS/infraSAP_URY/Urban/WP_2020_urban.shp')
# pop_file = os.path.join(data_dir, 'wp_2020_1km.shp')
# G_path = f'/home/public/Data/PROJECTS/LAC_GOSTNets/graphs/{iso}/G_{iso}.pickle'
# global_admin = '/home/public/Data/GLOBAL/ADMIN/g2015_0_simplified.shp'
urban_extents = os.path.join(data_dir, 'urban_extents.shp')
# osm_pbf = os.path.join(data_dir, 'national_complete.osm.pbf')
output_path = os.path.join(os.path.expanduser("~"),'data/argentina')

In [3]:
tables_path = os.path.join(output_path, 'tables')
if not os.path.exists(tables_path):
    os.mkdir(tables_path)

In [4]:
target = gpd.read_file(os.path.join(data_dir, 'admin.shp'))

In [None]:
OD = pd.read_csv(os.path.join(output_path, 'OD_06_16_tt.csv'), header=[0,1], index_col=0)

**Percentage of rural population with access to a main city (>50k inhabitants) in XX hours**

In [8]:
dest_type='city'
rural=True
#thresholds=[0,15,30,60,90,120,180,240,300,3000]
thresholds=[0,0.5,1,1.5,2,3,4,5,6,7,10,50]
pop_threshold=50000

In [86]:
rural_access = aggregator.calculate_access_percentages(OD, target, 'city', True, urban_extents, pop_threshold, thresholds)

In [89]:
rural_access.columns

Index(['OBJECTID', 'ISO_A2', 'WB_ADM1_CO', 'WB_ADM0_CO', 'WB_ADM0_NA',
       'WB_ADM1_NA', 'WB_ADM2_CO', 'WB_ADM2_NA', 'ISO3', '(0.0, 0.5]',
       '(0.5, 1.0]', '(1.0, 1.5]', '(1.5, 2.0]', '(2.0, 3.0]', '(3.0, 4.0]',
       '(4.0, 5.0]', '(5.0, 6.0]', '(6.0, 7.0]', '(7.0, 10.0]', '> 10.0',
       '(0.0, 0.5] pop', '(0.5, 1.0] pop', '(1.0, 1.5] pop', '(1.5, 2.0] pop',
       '(2.0, 3.0] pop', '(3.0, 4.0] pop', '(4.0, 5.0] pop', '(5.0, 6.0] pop',
       '(6.0, 7.0] pop', '(7.0, 10.0] pop', '> 10.0 pop'],
      dtype='object')

In [88]:
rural_access.rename(columns = {
    '(10.0, 50.0]':'> 10.0',
    '(10.0, 50.0] pop':'> 10.0 pop'
}, inplace=True)
rural_access.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)

In [90]:
rural_access.to_excel(os.path.join(tables_path, "2Rural Access to Cities.xlsx"), index=False)

In [95]:
country_level = aggregator.agregate_to_country(rural_access, "Rural pop. with access to a main city")

In [96]:
country_level

Unnamed: 0,"(0.0, 0.5] pop","(0.5, 1.0] pop","(1.0, 1.5] pop","(1.5, 2.0] pop","(2.0, 3.0] pop","(3.0, 4.0] pop","(4.0, 5.0] pop","(5.0, 6.0] pop","(6.0, 7.0] pop","(7.0, 10.0] pop",> 10.0 pop
Rural pop. with access to a main city,0.006905,0.003909,0.005783,0.00565,0.014875,0.021787,0.010547,0.005887,0.005814,0.04651,0.872333


**Travel time to closest city**

In [80]:
city_tt = aggregator.calculate_access_percentages(OD, target, 'city', thresholds=thresholds)

In [91]:
city_tt.rename(columns = {
    '(10.0, 50.0]':'> 10.0',
    '(10.0, 50.0] pop':'> 10.0 pop'
}, inplace=True)
city_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
city_tt.to_excel(os.path.join(tables_path, "3Travel Time to Nearest City.xlsx"), index=False)

In [97]:
country_level = aggregator.agregate_to_country(city_tt, "Travel time to closest city", country_level)

In [110]:
country_level.head()

Unnamed: 0,"(0.0, 0.5] pop","(0.5, 1.0] pop","(1.0, 1.5] pop","(1.5, 2.0] pop","(2.0, 3.0] pop","(3.0, 4.0] pop","(4.0, 5.0] pop","(5.0, 6.0] pop","(6.0, 7.0] pop","(7.0, 10.0] pop",> 10.0 pop
Rural pop. with access to a main city,0.006905,0.003909,0.005783,0.00565,0.014875,0.021787,0.010547,0.005887,0.005814,0.04651,0.872333
Travel time to closest city,0.287208,0.240211,0.156113,0.091733,0.093961,0.065299,0.031904,0.012385,0.005753,0.012814,0.00262


In [100]:
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'travel_time','cities_min_tt.tif'), raster_path, 'tt_min_city')

In [103]:
national_average = aggregator.pop_weighted_average_national(os.path.join(output_path,'travel_time','cities_min_tt.tif'), raster_path, 'Travel time to nearest city')

In [105]:
national_average

Unnamed: 0,pop weighted average
Travel time to nearest city,1.441339


**Travel time to capital**

In [106]:
dest_all = pd.read_csv(os.path.join(output_path, 'destination_all.csv'))

In [117]:
# dest_all[dest_all.dest_type=='city'].sort_values('Pop', ascending=False)
# capital is 34

In [118]:
capital = OD['city'].loc[:,['34']]

In [119]:
origins_tt_cap = origins_geom.join(capital)

In [123]:
# output_path = '/home/wb514197/data/uruguay'
# origins_tt_cap = origins_tt_cap.to_crs('epsg:32721')
aggregator.rasterize_gdf(origins_tt_cap, '34', raster_path, os.path.join(output_path,'travel_time','capital_tt.tif'))

In [132]:
capital_tt = aggregator.calculate_access_percentages(OD, target, 'city', thresholds=thresholds, capital='34')

In [133]:
capital_tt.head()

Unnamed: 0,OBJECTID,ISO_A2,WB_ADM1_CO,WB_ADM0_CO,WB_ADM0_NA,WB_ADM1_NA,WB_ADM2_CO,WB_ADM2_NA,Shape_Leng,Shape_Area,...,"(0.5, 1.0] pop","(1.0, 1.5] pop","(1.5, 2.0] pop","(2.0, 3.0] pop","(3.0, 4.0] pop","(4.0, 5.0] pop","(5.0, 6.0] pop","(6.0, 7.0] pop","(7.0, 10.0] pop","(10.0, 50.0] pop"
0,2517,AR,429,12,Argentina,Buenos Aires,4379,25 De Mayo,507806.080218,7186351000.0,...,0.0,0.0,0.0,3886.43912,25192.279785,3640.174248,3167.9461,1250.299436,2.335726,0.0
1,2518,AR,429,12,Argentina,Buenos Aires,4380,9 De Julio,368311.510262,6476091000.0,...,0.0,0.0,0.0,0.0,25582.954223,16954.93669,6024.639596,1284.218943,0.0,0.0
2,2519,AR,429,12,Argentina,Buenos Aires,4381,Adolfo Alsina,502875.251077,9410333000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16008.687161,3226.395393
3,2520,AR,429,12,Argentina,Buenos Aires,4382,Adolfo Gonzales Chaves,436878.099149,6126381000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,110.785346,9807.399683,2180.832307,0.0
4,2521,AR,429,12,Argentina,Buenos Aires,4383,Alberti,211023.180553,1659956000.0,...,0.0,0.0,0.0,6785.9107,4312.131714,158.624149,0.0,0.0,0.0,0.0


In [135]:
capital_tt.rename(columns = {
    '(10.0, 50.0]':'> 10.0',
    '(10.0, 50.0] pop':'> 10.0 pop'
}, inplace=True)
capital_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
capital_tt.to_excel(os.path.join(tables_path, "4Travel Time to Capital.xlsx"), index=False)

In [127]:
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'travel_time','capital_tt.tif'), raster_path, 'tt_capital', table=weighted_average)

In [129]:
national_average = aggregator.pop_weighted_average_national(os.path.join(output_path,'travel_time','capital_tt.tif'), raster_path, 'Travel time to capital', national_average)

In [137]:
country_level = aggregator.agregate_to_country(capital_tt, "Travel time to capital", country_level)

**Time to drive to the closest land border point**

In [138]:
border_tt = aggregator.calculate_access_percentages(OD, target, 'border', thresholds=thresholds)

In [139]:
border_tt.rename(columns = {
    '(10.0, 50.0]':'> 10.0',
    '(10.0, 50.0] pop':'> 10.0 pop'
}, inplace=True)
border_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
border_tt.to_excel(os.path.join(tables_path, "6Travel Time to Nearest Border.xlsx"), index=False)

In [142]:
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'travel_time','borders_min_tt.tif'), raster_path, 'tt_border', table=weighted_average)

In [143]:
country_level = aggregator.agregate_to_country(border_tt, "Travel time to closest border", country_level)

In [144]:
national_average = aggregator.pop_weighted_average_national(os.path.join(output_path,'travel_time','borders_min_tt.tif'), raster_path, 'Travel time to nearest border', national_average)

**Time to drive to the closest airport**

In [37]:
airport_tt = aggregator.calculate_access_percentages(OD, target, 'airport', thresholds=thresholds)

In [38]:
airport_tt.rename(columns = {
    '(10.0, 50.0]':'> 10.0',
    '(10.0, 50.0] pop':'> 10.0 pop'
}, inplace=True)
airport_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
airport_tt.to_excel(os.path.join(tables_path, "5Travel Time to Nearest Airport.xlsx"), index=False)

In [39]:
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'travel_time','airport_min_tt_v2.tif'), raster_path, 'tt_airport')

In [40]:
country_level = aggregator.agregate_to_country(airport_tt, "Travel time to closest airport")

In [41]:
national_average = aggregator.pop_weighted_average_national(os.path.join(output_path,'travel_time','airport_min_tt_v2.tif'), raster_path, 'Travel time to nearest airport')

In [50]:
weighted_average.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)

In [51]:
weighted_average.to_csv(os.path.join(tables_path,"Airport_V2_WeightedAvg.csv"))
country_level.to_csv(os.path.join(tables_path,"Airport_V2_CountryLevel.csv"))
national_average.to_csv(os.path.join(tables_path,"Airport_V2_NatAvg.csv"))

**Time to drive to the closest port**

In [152]:
port_tt = aggregator.calculate_access_percentages(OD, target, 'port', thresholds=thresholds)

In [153]:
port_tt.rename(columns = {
    '(10.0, 50.0]':'> 10.0',
    '(10.0, 50.0] pop':'> 10.0 pop'
}, inplace=True)
port_tt.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
port_tt.to_excel(os.path.join(tables_path, "7Travel Time to Nearest Port.xlsx"), index=False)

In [154]:
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'travel_time','port_min_tt.tif'), raster_path, 'tt_port', table=weighted_average)

In [155]:
country_level = aggregator.agregate_to_country(port_tt, "Travel time to closest port", country_level)

In [156]:
national_average = aggregator.pop_weighted_average_national(os.path.join(output_path,'travel_time','port_min_tt.tif'), raster_path, 'Travel time to nearest port', national_average)

In [165]:
country_level.to_csv(os.path.join(tables_path,"country agg.csv"))

In [166]:
national_average.to_csv(os.path.join(tables_path,"country agg avg.csv"))

Distances

In [44]:
output_path = os.path.join(os.path.expanduser("~"),'data/argentina/travel_distance')

In [46]:
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'airport_min_dist_v2.tif'), raster_path, 'dist_airport', weighted_average)

In [173]:
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'cities_min_dist.tif'), raster_path, 'dist_cities', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'airport_min_dist.tif'), raster_path, 'dist_airport', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'port_min_dist.tif'), raster_path, 'dist_ports', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'borders_min_dist.tif'), raster_path, 'dist_borders', weighted_average)
weighted_average = aggregator.pop_weighted_average(target, os.path.join(output_path,'capital_tt.tif'), raster_path, 'dist_capital',weighted_average)

In [174]:
weighted_average.head()

Unnamed: 0,OBJECTID,ISO_A2,WB_ADM1_CO,WB_ADM0_CO,WB_ADM0_NA,WB_ADM1_NA,WB_ADM2_CO,WB_ADM2_NA,Shape_Leng,Shape_Area,...,tt_min_city,tt_capital,tt_border,tt_airport,tt_port,dist_cities,dist_airport,dist_ports,dist_borders,dist_capital
0,2517,AR,429,12,Argentina,Buenos Aires,4379,25 De Mayo,507806.080218,7186351000.0,...,3.655722,3.844321,8.697839,16.947489,4.38651,140.176264,1071.103884,214.612438,464.911083,203.439767
1,2518,AR,429,12,Argentina,Buenos Aires,4380,9 De Julio,368311.510262,6476091000.0,...,3.046394,4.293924,8.757491,17.007141,4.642217,109.642151,1119.41968,267.223236,513.234792,270.85731
2,2519,AR,429,12,Argentina,Buenos Aires,4381,Adolfo Alsina,502875.251077,9410333000.0,...,4.374728,9.59901,14.075741,22.325391,4.843131,180.9457,1413.497784,246.67895,806.186319,530.395344
3,2520,AR,429,12,Argentina,Buenos Aires,4382,Adolfo Gonzales Chaves,436878.099149,6126381000.0,...,3.246187,6.547094,11.815472,20.065121,3.634292,137.368106,1364.892424,178.78126,758.699623,443.002546
4,2521,AR,429,12,Argentina,Buenos Aires,4383,Alberti,211023.180553,1659956000.0,...,2.771077,3.050295,7.522046,15.771696,3.398154,99.877353,1033.013878,184.912156,426.821077,189.30003


In [180]:
district = weighted_average.merge(ma[['OBJECTID','ma_5','ma_6','ma_7','ma_8','ma_9']], on="OBJECTID")

In [182]:
district.columns

Index(['OBJECTID', 'ISO_A2', 'WB_ADM1_CO', 'WB_ADM0_CO', 'WB_ADM0_NA',
       'WB_ADM1_NA', 'WB_ADM2_CO', 'WB_ADM2_NA', 'Shape_Leng', 'Shape_Area',
       'ISO3', 'geometry', 'tt_min_city', 'tt_capital', 'tt_border',
       'tt_airport', 'tt_port', 'dist_cities', 'dist_airport', 'dist_ports',
       'dist_borders', 'dist_capital', 'ma_5', 'ma_6', 'ma_7', 'ma_8', 'ma_9'],
      dtype='object')

In [183]:
district.drop(columns=['geometry','Shape_Leng','Shape_Area'], inplace=True)
district.to_excel(os.path.join(tables_path, "10District Level Summary.xlsx"), index=False)

In [64]:
target = gpd.read_file(os.path.join(data_dir, 'admin.shp'))

In [66]:
output_path = os.path.join(os.path.expanduser("~"),'data/argentina/access_2')

In [67]:
ma5 = aggregator.pop_weighted_average(target, os.path.join(output_path,f"access_cities_d5.tif"), raster_path, 'ma_5')
ma6 = aggregator.pop_weighted_average(target, os.path.join(output_path,f"access_cities_d6.tif"), raster_path, 'ma_6')
ma7 = aggregator.pop_weighted_average(target, os.path.join(output_path,f"access_cities_d7.tif"), raster_path, 'ma_7')
ma8 = aggregator.pop_weighted_average(target, os.path.join(output_path,f"access_cities_d8.tif"), raster_path, 'ma_8')
ma9 = aggregator.pop_weighted_average(target, os.path.join(output_path,f"access_cities_d9.tif"), raster_path, 'ma_9')

In [68]:
ma = pd.concat([ma5, ma6[['ma_6']], ma7[['ma_7']], ma8[['ma_8']], ma9[['ma_9']]], axis=1)

In [69]:
ma.drop(columns=['geometry'], inplace=True)

In [70]:
ma.to_csv(os.path.join(tables_path, "MA_seconds.csv"), index=False)

In [432]:
# admin_path = '/home/wb514197/data/uruguay/admin/gadm36_URY_2.shp'
admin_path = '/home/wb514197/data/uruguay/weighted/URY_Admin2.shp'
target = gpd.read_file(admin_path)

In [433]:
target = target.loc[:, target.columns != 'tt_cities_']

In [434]:
target2 = target.join(city_tt_average[['tt_min_city']]).join(capital_tt_average[['tt_capital']]).join(ma[ma.columns[['ma' in x for x in ma.columns]]]).rename(columns={'tt_min_city':'tt_cities', 'tt_ports_w':'tt_ports'})

In [435]:
rai = pd.read_csv('/home/wb514197/data/uruguay/tables/Copy of 1. Rural Access to Roads.csv')

In [436]:
target2 = target2.join(rai[['RAI_Percent_1','RAI_Percent_2','RAI_Percent_3','RAI_Percent_4']])

In [437]:
output_path = os.path.join(os.path.expanduser("~"),'data/uruguay')

In [438]:
hubs = pd.read_csv(os.path.join(output_path,'tables',"Travel time to regional hub.csv")).rename(columns={'time':'tt_regional'})

In [439]:
hubs.head()

Unnamed: 0.1,Unnamed: 0,GID_1,NAME_1,GID_2,NAME_2,centroid,ID,D_UID,tt_regional,CITY_NAME,CNTRY_NAME
0,0,URY.1_1,Artigas,URY.1.1_1,n.a2,POINT (-56.89476510388938 -30.23170494834372),0,107,6.778444,Parana,Argentina
1,1,URY.1_1,Artigas,URY.1.2_1,n.a3,POINT (-57.59252121326498 -30.35956950213746),1,107,4.676694,Parana,Argentina
2,2,URY.1_1,Artigas,URY.1.3_1,n.a4,POINT (-57.12652663901931 -30.51186350465093),2,107,5.904194,Parana,Argentina
3,3,URY.1_1,Artigas,URY.1.4_1,n.a5,POINT (-57.41064594134301 -30.48082143064122),3,107,5.152444,Parana,Argentina
4,4,URY.1_1,Artigas,URY.1.5_1,n.a6,POINT (-56.5665962192789 -30.4645331618747),4,107,6.083889,Parana,Argentina


In [441]:
target2 = target2.merge(hubs[['GID_2','tt_regional','CITY_NAME','CNTRY_NAME']],on='GID_2')

In [442]:
target2.to_file('/home/wb514197/data/uruguay/weighted/URY_Admin2_v3.shp', driver='ESRI Shapefile')