## Country-level INFRA-SAP


- Origins: Population grid (Worldpop downsampled to 500 meters)
- Destinations: Cities, airports, border crossings, and ports

Typical access analysis with two adjustments:
    1. Extract different sets of destinations from OD
    2. Join travel time to origin grid based on "NN with the fastest route" (not necessarily closest NN)

In [1]:
import os, sys, time, importlib

import geopandas as gpd
import pandas as pd
import networkx as nx
sys.path.append('/home/wb514197/Repos/GOSTnets')

import GOSTnets as gn
import GOSTnets.calculate_od_raw as calcOD
from GOSTnets.load_osm import *
import rasterio as rio
from osgeo import gdal
import numpy as np
from shapely.geometry import Point

sys.path.append('/home/wb514197/Repos/INFRA_SAP')
from infrasap import aggregator

from shapely.wkt import loads

%load_ext autoreload
%autoreload 2

In [2]:
country = 'djibouti'
iso3 = 'DJI'
epsg = 32638

### Load origins and graph

In [3]:
# base_in = "/home/public/Data/PROJECTS/INFRA_SAP"
base_in = "/home/wb514197/data/INFRA_SAP"
in_folder = os.path.join(base_in, iso3)

# define data paths
focal_admin2 = os.path.join(in_folder, "admin.shp")
focal_osm = os.path.join(in_folder, f"{country}-latest.osm.pbf")
pop_name = "WP_2020_1km"
wp_1km = os.path.join(in_folder, f"{pop_name}.tif")
urban_extents = os.path.join(in_folder, "urban_extents.shp")
airports = os.path.join(in_folder, "airports_intl.shp")
ports = os.path.join(in_folder, "ports.shp")
borders = os.path.join(in_folder, "borders.shp")

# base_out = "/home/wb514197/data/INFRA_SAP" # GOT permission denied using public 
# out_folder = os.path.join(base_out, iso3)
# if not os.path.exists(out_folder):
#     os.makedirs(out_folder)

out_folder = os.path.join(in_folder, "output")

Convert **WP_2020_1km.tif** into a point GeoData frame

In [5]:
out_pop_csv = os.path.join(out_folder, f"{pop_name}.csv")
wp_df = pd.read_csv(out_pop_csv, sep=' ')
wp_df.rename(columns={"Z":"Pop"}, inplace=True)
wp_df = wp_df.loc[wp_df.Pop!=-99999.0].copy()
geoms = [Point(xy) for xy in zip(wp_df.X, wp_df.Y)]
wp_df.drop(["X","Y"], axis=1, inplace=True)
crs = 'EPSG:4326'
origins = gpd.GeoDataFrame(wp_df, crs=crs, geometry=geoms)
origins['pointid'] = origins.index

### Prepare Graph

In [6]:
G_time = nx.read_gpickle(os.path.join(out_folder, 'graph', f'G_{iso3}_Salt.pickle'))

#### Select largest graph (again)

In [7]:
list_of_subgraphs = [G_time.subgraph(c).copy() for c in sorted(nx.strongly_connected_components(G_time), key=len, reverse=True)]

In [8]:
G_largest = list_of_subgraphs[0]

### Prepare destinations

In [9]:
def load_csv(csv_path, geometry = 'geometry', crs = 'epsg:4326'):
    df = pd.read_csv(csv_path, index_col=0)
    df[geometry] = df[geometry].apply(loads)
    gdf = gpd.GeoDataFrame(df, crs = crs)
    return(gdf)

In [10]:
dest_all = load_csv(os.path.join(out_folder, 'destination_all.csv'))

In [11]:
len(origins), len(dest_all)

(26323, 13)

### Snap origins and destinations

#### Snap to origins to 5 nearest nodes

In [12]:
utm = f"EPSG:{epsg}"

In [13]:
%%time
# this function returns a dictionary of origin IDs, with a list of 5 NNs, and a corresponding list of distances
origins_snapped_dict = gn.pandana_snap_to_many(G_largest, origins, source_crs='epsg:4326', target_crs=utm, 
                                               add_dist_to_node_col = True, k_nearest=5, origin_id='pointid')

CPU times: user 3.89 s, sys: 105 ms, total: 3.99 s
Wall time: 3.99 s


In [14]:
dest_snapped = gn.pandana_snap_c(G_largest, dest_all, source_crs='epsg:4326', target_crs=utm,
                                 add_dist_to_node_col = False)

In [15]:
# origins_unique_nn = list(set(origins_snapped['NN']))
dest_nn = list(dest_snapped['NN'])
list_origins_NN = []
for each in origins_snapped_dict.values():
    list_origins_NN += each['NN']
origins_unique_nn = list(set(list_origins_NN))

In [16]:
%%time
curOD = gn.calculate_OD(G_largest, origins_unique_nn, dest_nn, fail_value = 999999999, weight='length')

CPU times: user 14.6 ms, sys: 0 ns, total: 14.6 ms
Wall time: 14.6 ms


In [17]:
curOD[curOD==999999999]

array([], dtype=float64)

In [18]:
curOD.shape

(325, 13)

In [19]:
od_df = pd.DataFrame(curOD, index=origins_unique_nn, columns=dest_nn)

In [20]:
od_df.head()

Unnamed: 0,new_obj_128_61_224,260,new_obj_9,516,249_12_36,249_12_36.1,249_12_36.2,249_12_36.3,new_obj_0_39_105,new_obj_0_39_105.1,new_obj_0_39_105.2,new_obj_0_39_105.3,new_obj_86
0,188268.011271,104542.016983,23768.465439,107787.258523,117025.832146,117025.832146,117025.832146,117025.832146,174627.599116,174627.599116,174627.599116,174627.599116,107222.092535
new_obj_94,174577.568845,7768.913805,92037.287233,5246.19275,19709.847653,19709.847653,19709.847653,19709.847653,220807.836573,220807.836573,220807.836573,220807.836573,830.729258
10,166956.789416,414.710928,86312.155642,7918.061462,19814.991802,19814.991802,19814.991802,19814.991802,213187.057143,213187.057143,213187.057143,213187.057143,7352.895475
25,173234.0,340605.500343,337733.545832,347546.005574,358920.155155,358920.155155,358920.155155,358920.155155,464608.447333,464608.447333,464608.447333,464608.447333,346980.839586
26,175295.28521,8486.63017,93370.904258,6617.659657,21081.31456,21081.31456,21081.31456,21081.31456,221525.552937,221525.552937,221525.552937,221525.552937,2202.196166


For each origin set of 5 (k) possible NN:
    - Add snapping dist (in time) + time to a destination
    - Which destination? min time from all of them won't necessarily work, so we need to find the closest destination for each origin, and select the NN which yields the fastest travel time to that dest
    - closest_dest.idx should match the OD column order.

In [21]:
%%time
closest_dest = gn.pandana_snap_points(origins, dest_all, source_crs='epsg:4326', target_crs=utm,
                                      add_dist_to_node_col=True)

CPU times: user 1.32 s, sys: 9.4 ms, total: 1.33 s
Wall time: 1.33 s


In [22]:
closest_dest = closest_dest.set_index('pointid')

In [23]:
closest_dest.head()

Unnamed: 0_level_0,Pop,geometry,idx,idx_dist
pointid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
160,20.005409,POINT (294635.138 1405028.298),0,104317.192282
161,12.062751,POINT (295540.325 1405021.740),0,104535.480534
162,9.741089,POINT (296445.509 1405015.211),0,104761.162858
163,10.025794,POINT (297350.688 1405008.711),0,104994.191471
355,15.837574,POINT (291912.772 1404126.147),0,102806.303627


In [25]:
%%time
fastest_nn = []
fastest_dist = []
custom_speed = 20 # km/h

for pointid, items in origins_snapped_dict.items():
    dest_index = closest_dest.loc[pointid].idx
    nn_list = items['NN']
    dist_list = items['NN_dist']
    total_dist_list = []
    for i in range(0, len(nn_list)):
        dist_snapping = dist_list[i]
#         time_snapping = ((dist_list[i] / 1000) / custom_speed) * 60 * 60
        dist_to_dest = od_df.loc[nn_list[i]].iloc[dest_index]
#         time_to_dest = od_df.loc[nn_list[i]].iloc[dest_index]
        total_dist = dist_snapping+dist_to_dest
#         total_time = time_snapping+time_to_dest
        total_dist_list.append(total_dist)
#         print(f"id: {nn_list[i]}, snapping dist (km): {dist_list[i]/1000:.2f}, time to dest (min): {(total_time/60)/60:.2f}")
    min_pos = total_dist_list.index(min(total_dist_list))
    fastest_nn.append(nn_list[min_pos])
    fastest_dist.append(dist_list[min_pos])
#     origins_snapped_smart.loc[pointid, "NN"] = nn_list[min_pos]
#     origins_snapped_smart.loc[pointid, "NN_dist"] = dist_list[min_pos]

CPU times: user 17.8 s, sys: 24 ms, total: 17.8 s
Wall time: 17.8 s


In [26]:
origins_snapped = origins.copy().set_index('pointid')
origins_snapped['NN'] = pd.Series(fastest_nn, index = origins_snapped.index)
origins_snapped['NN_dist'] = pd.Series(fastest_dist, index = origins_snapped.index)
origins_snapped['pointid'] = origins_snapped.index
origins_snapped['NN_dist_hours'] = ((origins_snapped.NN_dist / 1000) / custom_speed)

In [27]:
origins_join = origins_snapped.join(od_df, on='NN')

In [28]:
all(origins_join.columns[6:] == dest_snapped.NN)

True

In [30]:
origins_join_rename = origins_join.copy()
origins_join_rename.columns = pd.MultiIndex.from_arrays([['origin' for each in origins_snapped.columns]+list(dest_snapped.dest_type), origins_snapped.columns.append(dest_snapped.index)])

Add snapping distance

In [31]:
origins_join2 = origins_join_rename.apply(lambda x: (x + origins_join_rename.origin.NN_dist)/1000 if x.name[1] in dest_snapped.index else x)

In [32]:
out_folder

'/home/wb514197/data/INFRA_SAP/DJI/output'

In [33]:
origins_join2.to_csv(os.path.join(out_folder, 'OD_11_23_Distances.csv'))

### Make rasters of min travel time to each dest

In [34]:
raster_path = wp_1km

In [35]:
output_path = os.path.join(out_folder, "travel_distance")
if not os.path.exists(output_path):
    os.mkdir(output_path)

In [36]:
# CHECK THAT MOST POPULATED CITY IS THE CAPITAL
cap_idx = dest_all.sort_values('Pop', ascending=False).iloc[[0]].index[0]

In [37]:
city_min = pd.DataFrame(origins_join2['city'].min(axis=1), columns=["dist_city"])
ports_min = pd.DataFrame(origins_join2['port'].min(axis=1), columns=["dist_port"])
airports_min = pd.DataFrame(origins_join2['airport'].min(axis=1), columns=["dist_airport"])
borders_min = pd.DataFrame(origins_join2['border'].min(axis=1), columns=["dist_border"])
capital_dist = origins_join2['city'].loc[:,[cap_idx]].rename(columns={cap_idx:'dist_capital'})

In [38]:
origins_dist = origins_snapped.join([city_min, airports_min, borders_min, capital_dist, ports_min])

In [39]:
origins_dist.columns

Index(['Pop', 'geometry', 'NN', 'NN_dist', 'pointid', 'NN_dist_hours',
       'dist_city', 'dist_airport', 'dist_border', 'dist_capital',
       'dist_port'],
      dtype='object')

In [40]:
aggregator.rasterize_gdf(origins_dist, 'dist_city', raster_path, os.path.join(output_path,f"cities_min_dist.tif"))
aggregator.rasterize_gdf(origins_dist, 'dist_port', raster_path, os.path.join(output_path,f"port_min_dist.tif"))
aggregator.rasterize_gdf(origins_dist, 'dist_airport', raster_path, os.path.join(output_path,f"airport_min_dist.tif"))
aggregator.rasterize_gdf(origins_dist, 'dist_border', raster_path, os.path.join(output_path,f"borders_min_dist.tif"))
aggregator.rasterize_gdf(origins_dist, 'dist_capital', raster_path, os.path.join(output_path,f"capital_dist.tif"))