# Launch INFRA SAP
The World Bank's Geospatial Operational Support team, in collaboration with the Infrastructure Chief Economist's office have developed a diagnostic toolkit for assessing the state of infrastucture in a country through an assessment of infrastructure, access, connectivity, and commodity flows.

The purpose of this notebook is to launch the data preparation step of the INFRA SAP toolkit. It is principally designed to integrate with the GOST team's high compute cluster, but has been made as flexible as possible to facilitate replication. The steps in data processing require the following input:
1. Administrative boundaries of interest (defines total extent of analysis and level of aggregation)
2. Country ISO3 code

Based on these basic datasets we will extract the following datasets **these steps are particular to the World Bank's data schema, but can be directly supplied to later functions if necessary**

1. Open Street Map
2. WorldPop 2020 gridded population data
3. International airports (from OSM)
4. Major ports (from OSM)
5. Official Border Crossings (from ???)

With these data either extracted or processed we run the following analyses

1. Calculate urban and rural following the GURBA process - LINK
2. Attempt to identify/name urban areas
3. (optional) Re-sample population to 1km

Following these data preparation steps a sanity check should be performed on the extracted data to ensure major POIs are not missed and that all data have been properly extracted

In [264]:
import sys, os, importlib
import rasterio

import geopandas as gpd
import pandas as pd

sys.path.append("../")

import infrasap.wp_helper as wp
import infrasap.osm_extractor as osm
import infrasap.rai_calculator as rai
import infrasap.gsm_rasterizer as gsm_r
import infrasap.infra_helper as helper

In [269]:
# Define baseline data
iso3 = "URY"
global_boundaries =  "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
global_admin2 = r"/home/public/Data/GLOBAL/ADMIN/Admin2_Polys.shp"
global_osm = "/home/public/Data/GLOBAL/OSM/GLOBAL/planet-latest.osm.pbf"
wp_dataset = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/MOSAIC_ppp_prj_2020/ppp_prj_2020_%s.tif" % iso3
if not os.path.exists(wp_dataset):
    print("Check to make sure population dataset exists")
border_crossings = "/home/public/Data/GLOBAL/INFRA/BORDERS/border_crossings_phv.shp"#J:\Data\GLOBAL\INFRA
global_power_plants = "/home/public/Data/GLOBAL/ENERGY/global_power_plant_database.csv"
grid_lines = "/home/public/Data/GLOBAL/ENERGY/grid.gpkg"

In [270]:
importlib.reload(helper)

epsg = 32722
base_out = "/home/public/Data/PROJECTS/INFRA_SAP" #r"J:\Data\PROJECTS\INFRA_SAP"
out_folder = os.path.join(base_out, iso3)
if not os.path.exists(out_folder):
    os.makedirs(out_folder)
#define output data
focal_admin2 = os.path.join(out_folder, "admin.shp")
focal_osm = os.path.join(out_folder, "national_complete.osm.pbf")
wp_1km = os.path.join(out_folder, "WP_2020_1km.tif")
urban_extents = os.path.join(out_folder, "urban_extents.shp")
airports = os.path.join(out_folder, "airports.shp")
ports = os.path.join(out_folder, "ports.shp")
borders = os.path.join(out_folder, "borders.shp")
power_plants = os.path.join(out_folder, "power_plants.shp")
transmission = os.path.join(out_folder, "transmission_lines.shp")

global_data = gpd.read_file(global_boundaries)    
# select out admin2 from global boundaries dataset
if not os.path.exists(focal_admin2):
    in_bounds = gpd.read_file(global_admin2)
    out_bounds = in_bounds.loc[in_bounds['ISO3'] == iso3]
    out_bounds = out_bounds.to_crs({'init':'epsg:4326'})
    out_bounds.to_file(focal_admin2)
else:
    out_bounds = gpd.read_file(focal_admin2)
    
# extract national OSM from global OSM PBF
if not os.path.exists(focal_osm):
    ## BEN: Look into using WGET to download from GeoFabrik    
    extractor = osm.osmExtraction(osmosisCmd = "/home/wb411133/Code/Osmosis/bin/osmosis", tempFile = "/home/wb411133/temp/temp_execution.bat")
    print(extractor.extractBoundingBox(global_osm, focal_admin2, focal_osm, execute=False))

# Re-sample WP to 1km
if not os.path.exists(wp_1km):
    inR = rasterio.open(wp_dataset)
    wp.resample_wp(inR, wp_1km, factor=10)
    
# Calculate urban extents from 1km WorldPop
if not os.path.exists(urban_extents):
    urban_shp = wp.calculateUrban(rasterio.open(wp_1km), smooth=False)
    urban_shp.to_file(urban_extents)
    
# Extract airports and ports and rails from OSM
if not os.path.exists(airports):
    pois = osm.load_pois(focal_osm, out_bounds.unary_union)
    for key, value in pois.items():
        value.to_file(os.path.join(out_folder, "%s.shp" % key))

# Extract borders
if not os.path.exists(borders):
    all_borders = gpd.read_file(border_crossings)
    focal_borders = all_borders[all_borders.intersects(out_bounds.unary_union.buffer(0.01))]
    focal_borders.to_file(borders)

# Extract power plants
if not os.path.exists(power_plants):
    pp = helper.extract_power_plants(global_power_plants, out_bounds)
    pp.to_file(power_plants)
    

# Extract transmission lines
if not os.path.exists(transmission):
    lines = helper.extract_transmission_lines(grid_lines, out_bounds)
    lines.to_file(transmission)
    
# Extract and rasterize GSM
gsm_folder = "/home/public/Data/GLOBAL/INFRA/GSMA/"
gsm_shapefiles = []

for root, dirs, files in os.walk(gsm_folder):
    for f in files:
        if (f[-4:] == ".shp") and not ("Inclusions" in f):
            gsm_shapefiles.append(os.path.join(root, f))

gsm_process = gsm_r.gsm_rasterizer(gsm_shapefiles, out_folder)
gsm_process.initial_read_in()
gsm_process.extract_country_vectors(iso3, global_data, out_folder = out_folder)
gsm_process.rasterize_gsm_vectors()

In [337]:
# Extract and rasterize GSM
gsm_folder = "/home/public/Data/GLOBAL/INFRA/GSMA/"
gsm_shapefiles = []

for root, dirs, files in os.walk(gsm_folder):
    for f in files:
        if (f[-4:] == ".shp") and not ("Inclusions" in f) and not ("National_Rasters" in root):
            gsm_shapefiles.append(os.path.join(root, f))
gsm_shapefiles

['/home/public/Data/GLOBAL/INFRA/GSMA/2016/Global_3G_201612.shp',
 '/home/public/Data/GLOBAL/INFRA/GSMA/2016/Global_4G_201612.shp',
 '/home/public/Data/GLOBAL/INFRA/GSMA/2016/Global_GSM_201612.shp',
 '/home/public/Data/GLOBAL/INFRA/GSMA/2018/MCE_201812_3G.shp',
 '/home/public/Data/GLOBAL/INFRA/GSMA/2018/MCE_201812_4G.shp',
 '/home/public/Data/GLOBAL/INFRA/GSMA/2018/MCE_201812_GSM.shp',
 '/home/public/Data/GLOBAL/INFRA/GSMA/2017/Data/Global_3G_201712.shp',
 '/home/public/Data/GLOBAL/INFRA/GSMA/2017/Data/Global_4G_201712.shp',
 '/home/public/Data/GLOBAL/INFRA/GSMA/2017/Data/Global_GSM_201712.shp']

In [343]:
importlib.reload(gsm_r)
iso3 = "URY"
gsm_process = gsm_r.gsm_rasterizer(gsm_shapefiles, out_folder)
gsm_process.initial_read_in()
gsm_process.extract_country_vectors(iso3, global_data, out_folder = out_folder)
gsm_process.rasterize_gsm_vectors()

NameError: name 'gsm_process' is not defined

In [345]:
base_data = gsm_process.gsm_data

In [349]:
importlib.reload(gsm_r)

gsm_process = gsm_r.gsm_rasterizer(gsm_shapefiles, out_folder)
gsm_process.gsm_data = base_data
gsm_process.extract_country_vectors(iso3, global_data, out_folder = out_folder)
gsm_process.rasterize_gsm_vectors()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  select.to_file(out_file)
TopologyException: Input geom 1 is invalid: Ring Self-intersection at or near point -58.44865462599995 -34.039201843999933 at -58.44865462599995 -34.039201843999933


In [348]:
gsm_process.gsm_data.keys()[0]

TypeError: 'dict_keys' object does not support indexing

In [341]:
gsm_process.gsm_data[]

{'Global_3G_201612':                    SOURCE CNTRY_COV  CNTRY_CODE    COUNTRY  CNTRY_ISO  \
 0           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 1           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 2           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 3           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 4           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 5           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 6           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 7           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 8           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 9           Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 10          Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 11          Legacy_2009Q1  External    207122.0  Sri Lanka        144   
 12          Legac

# Calculate RAI

In [238]:
importlib.reload(rai)
rai_roadnetwork = rai.extract_rai_network(focal_osm, epsg=epsg)


NameError: name 'out_folder' is not defined

In [241]:
importlib.reload(rai)
rai = rai.calculate_rai(out_bounds, "GID_2", rasterio.open(wp_dataset), rai_roadnetwork, out_folder)

In [230]:
rai.to_csv(os.path.join(out_folder, "RAI_population.csv"))

# Debugging below

In [298]:
class InfraExtractor(osmium.SimpleHandler):
    """ Extractor for use in osmium SimpleHandler to extract nodes and highways
    """
    def __init__(self, verbose=False):
        ''' Extract nodes representing ports and international airports
        '''
        osmium.SimpleHandler.__init__(self)
        self.verbose = verbose
        self.ports = []
        self.bad_ports = []
        self.airports = []   
        self.bad_airports = []        
        
    def node(self, n):
        if n.tags.get('aeroway') == 'aerodrome':
            wkb = wkbfab.create_point(n)
            shp = wkblib.loads(wkb, hex=True)
            self.airports.append([n.id, shp, shp.x, shp.y, n.tags.get('aerodrome:type'), n.tags.get('name'), n.tags.get('name:en')])
        elif n.tags.get('harbour') == "yes":
            wkb = wkbfab.create_point(n)
            shp = wkblib.loads(wkb, hex=True)
            self.ports.append([n.id, shp, shp.x, shp.y])
    
    def way(self, n):
        if n.tags.get('aeroway') == 'aerodrome':
            try:
                wkb = wkbfab.create_multipolygon(n)
            except:
                wkb = wkbfab.create_linestring(n)
            shp = wkblib.loads(wkb, hex=True)
            self.airports.append([n.id, shp, shp.centroid.x, shp.centroid.y, n.tags.get('aerodrome:type'), n.tags.get('name'), n.tags.get('name:en')])
            #except:
            #    self.bad_airports.append([n.id, n.tags.get('aerodrome:type'), n.tags.get('name'), n.tags.get('name:en')])
        elif n.tags.get('harbour') == "yes":
            try:
                wkb = wkbfab.create_multipolygon(n)
                shp = wkblib.loads(wkb, hex=True)
                self.ports.append([n.id, shp, shp.centroid.x, shp.centroid.y])
            except:
                self.bad_ports.append([n.id])


In [332]:
import osmium
import shapely.wkb as wkblib
wkbfab = osmium.geom.WKBFactory()

importlib.reload(osm)

temp_osm = "/home/public/Data/PROJECTS/INFRA_SAP/URY/test.osm.pbf"
osm_data = osm.load_pois(focal_osm, out_bounds.unary_union)  
#for key, value in osm_data.items():
#    value.to_file(os.path.join(out_folder, "%s.shp" % key))

In [333]:
osm_data['highways'].to_file(os.path.join(out_folder, "highways.shp"))

In [302]:
res = pd.DataFrame(h.airports, columns=['OSM_ID', "geometry", "lon", "lat", "type", "name", "name_en"])

In [316]:
stations = pd.DataFrame(h.rail_stations, columns=['ID','x','y','name'])
stations.to_csv("/home/wb411133/temp/stations.csv")

In [313]:
h.bridges

[]

In [357]:
import json
#Process submarine cables
submarine_cable_file = "/home/public/Data/GLOBAL/INFRA/SUBMARINE_CABLES/all_submarine_cables.json"
inS = pd.read_json(submarine_cable_file)

submarine_cable_file_geo = "/home/public/Data/GLOBAL/INFRA/SUBMARINE_CABLES/all_submarine_cables_geo.json"
inS_geo = gpd.read_file(submarine_cable_file_geo)


In [355]:
inS.head()

Unnamed: 0,cable_id,id,name
0,1571,acs-alaska-oregon-network-akorn,ACS Alaska-Oregon Network (AKORN)
1,1307,aden-djibouti,Aden-Djibouti
2,1317,adria-1,Adria-1
3,1691,aeconnect-1,AEConnect-1
4,1629,africa-coast-to-europe-ace,Africa Coast to Europe (ACE)


In [358]:
inS_geo.head()


Unnamed: 0,color,slug,geometry
0,4d50a2,acs-alaska-oregon-network-akorn,(LINESTRING (-151.291670394858 60.689928950949...
1,09addc,aden-djibouti,(LINESTRING (43.1479928876509 11.5948813293645...
2,66b646,adria-1,(LINESTRING (19.9195913741185 39.6195333476855...
3,933d97,aeconnect-1,(LINESTRING (-9.23214578701738 54.207114982820...
4,8dc740,africa-coast-to-europe-ace,(LINESTRING (6.30006977231538 2.36791255870531...
