In [1]:
import os

import pandas as pd
import geopandas as gpd

import wbgapi as wb  # https://blogs.worldbank.org/opendata/introducing-wbgapi-new-python-package-accessing-world-bank-data

In [2]:
# Define input data
in_admin = "/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp"
inA = gpd.read_file(in_admin)
ssa = inA.loc[inA["Region"] == "Sub-Saharan Africa"]
ssa

Unnamed: 0,OBJECTID,ISO_A2,WB_ADM0_CO,WB_ADM0_NA,Shape_Leng,Shape_Area,ISO3,UN_m49,Region,incomeG,lendingC,FID_100,geometry
4,5,SD,6,Sudan,8852111.0,1844887.0,SDN,736,Sub-Saharan Africa,Lower middle income,IDA,400,"MULTIPOLYGON (((4282246.284 2048756.881, 42821..."
6,7,AO,8,Angola,7684195.0,1246754.0,AGO,24,Sub-Saharan Africa,Upper middle income,Blend,600,"MULTIPOLYGON (((1310361.144 -1897234.549, 1310..."
24,25,BJ,29,Benin,2162187.0,115598.0,BEN,204,Sub-Saharan Africa,Low income,IDA,2400,"POLYGON ((316053.073 1392001.912, 316482.462 1..."
29,30,BW,35,Botswana,4399139.0,578256.1,BWA,72,Sub-Saharan Africa,Upper middle income,IBRD,2900,"POLYGON ((2812242.525 -2013145.254, 2812863.83..."
36,37,BF,42,Burkina Faso,3558434.0,273280.2,BFA,854,Sub-Saharan Africa,Low income,IDA,3600,"POLYGON ((-50724.952 1698516.119, -49209.894 1..."
37,38,BI,43,Burundi,1016007.0,26857.15,BDI,108,Sub-Saharan Africa,Low income,IDA,3700,"POLYGON ((3387552.350 -259257.290, 3388263.005..."
39,40,CM,45,Cameroon,5477015.0,465546.9,CMR,120,Sub-Saharan Africa,Lower middle income,IDA,3900,"MULTIPOLYGON (((1072528.923 401904.202, 107231..."
41,42,CV,47,Cabo Verde,1029808.0,4056.343,CPV,132,Sub-Saharan Africa,Lower middle income,Blend,4100,"MULTIPOLYGON (((-2749534.392 1677117.552, -274..."
43,44,CF,49,Central African Republic,5541381.0,619590.9,CAF,140,Sub-Saharan Africa,Low income,IDA,4300,"POLYGON ((2546490.141 1224324.736, 2546280.824..."
44,45,TD,50,Chad,6342130.0,1264759.0,TCD,148,Sub-Saharan Africa,Low income,IDA,4400,"POLYGON ((2671667.779 2213892.183, 2671667.779..."


In [3]:
# Identify indicators related to GDP
wb.series.info(q="gdp")

id,value
EG.GDP.PUSE.KO.PP,GDP per unit of energy use (PPP $ per kg of oil equivalent)
EG.GDP.PUSE.KO.PP.KD,GDP per unit of energy use (constant 2017 PPP $ per kg of oil equivalent)
EG.USE.COMM.GD.PP.KD,"Energy use (kg of oil equivalent) per $1,000 GDP (constant 2017 PPP)"
NY.GDP.DEFL.KD.ZG,"Inflation, GDP deflator (annual %)"
NY.GDP.DEFL.KD.ZG.AD,"Inflation, GDP deflator: linked series (annual %)"
NY.GDP.DEFL.ZS,GDP deflator (base year varies by country)
NY.GDP.DEFL.ZS.AD,GDP deflator: linked series (base year varies by country)
NY.GDP.DISC.CN,Discrepancy in expenditure estimate of GDP (current LCU)
NY.GDP.DISC.KN,Discrepancy in expenditure estimate of GDP (constant LCU)
NY.GDP.MKTP.CD,GDP (current US$)


In [4]:
wb.series.info(q="urban")

id,value
AG.LND.EL5M.UR.K2,Urban land area where elevation is below 5 meters (sq. km)
AG.LND.EL5M.UR.ZS,Urban land area where elevation is below 5 meters (% of total land area)
AG.LND.TOTL.UR.K2,Urban land area (sq. km)
EG.CFT.ACCS.UR.ZS,"Access to clean fuels and technologies for cooking, urban (% of urban population)"
EG.ELC.ACCS.UR.ZS,"Access to electricity, urban (% of urban population)"
EN.POP.EL5M.UR.ZS,Urban population living in areas where elevation is below 5 meters (% of total population)
EN.URB.MCTY,Population in urban agglomerations of more than 1 million
EN.URB.MCTY.TL.ZS,Population in urban agglomerations of more than 1 million (% of total population)
SH.H2O.BASW.UR.ZS,"People using at least basic drinking water services, urban (% of urban population)"
SH.H2O.SMDW.UR.ZS,"People using safely managed drinking water services, urban (% of urban population)"


In [17]:
# These are the datasets we are interested in extracting
selected_indicators = ["NY.GDP.MKTP.CD", "SP.URB.TOTL", "SP.URB.TOTL.IN.ZS"]
urb_data = wb.data.DataFrame(selected_indicators, economy=ssa["ISO3"].values, mrnev=1)
urb_data = urb_data.reset_index()  # .drop(['index'], axis=1)
urb_data.loc[urb_data["economy"] == "SYC"]

Unnamed: 0,economy,NY.GDP.MKTP.CD,SP.URB.TOTL,SP.URB.TOTL.IN.ZS
39,SYC,1454458000.0,57542.0,57.972


In [18]:
for ind in selected_indicators:
    print(f'{ind}: {wb.series.info(ind).items[0]["value"]}')

NY.GDP.MKTP.CD: GDP (current US$)
SP.URB.TOTL: Urban population
SP.URB.TOTL.IN.ZS: Urban population (% of total population)


In [19]:
urb_data.columns = ["ISO3", "GDP", "UrubPop", "UrbPercent"]
urb_data.to_csv("SSA_countries_GDP_Urbanization.csv")

In [12]:
urb_data.loc[urb_data["economy"] == "SYC"]

Unnamed: 0,economy,NY.GDP.MKTP.CD,SP.URB.TOTL,SP.URB.TOTL.IN.ZS
39,SYC,1454458000.0,57542.0,57.972


In [None]:
wb.data.DataFrame?

# Create geospatial data

In [None]:
pp_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Mapping/URBAN_SUMMARIES"
excel_files = [x for x in os.listdir(pp_folder) if x.endswith("xlsx")]
for excel_file in excel_files:
    curD = pd.read_excel(os.path.join(pp_folder, excel_file))
    cols = list(curD.columns)
    cols[0] = "ISO3"
    cols[1] = "country"
    cols[2] = "Urban_Type"
    curD.columns = cols
    curD["ISO3"] = curD["ISO3"].apply(lambda x: x.upper())
    curG = pd.merge(ssa, curD, on="ISO3")
    curG = pd.merge(curG, urb_data, on="ISO3")
    curG = gpd.GeoDataFrame(curG, geometry="geometry", crs=4326)
    curG.to_file(
        os.path.join(pp_folder, excel_file.replace(".xlsx", ".geojson")),
        driver="GeoJSON",
    )
    # Create point file as well
    curG["geometry"] = curG["geometry"].apply(lambda x: x.centroid)
    curG.to_file(
        os.path.join(pp_folder, excel_file.replace(".xlsx", "_CENTROID.geojson")),
        driver="GeoJSON",
    )

In [None]:
pd.DataFrame(curG).drop(["geometry"], axis=1).to_csv(
    os.path.join(pp_folder, excel_file.replace(".xlsx", "_joined.csv"))
)

In [None]:
curD.head()