# Chart and map urbanization data

In [1]:
import sys
import os
import multiprocessing

import pandas as pd
import geopandas as gpd

from osgeo import gdal

# Import raster helpers
sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")

from GOSTRocks.misc import tPrint

# Import GOST urban functions
sys.path.append("../../../src")

# Import local functions
from novelUrbanization import *

%load_ext autoreload
%autoreload 2

In [2]:
base_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data"
dou_pop = "gpo"
db_pop = "cpo15"
aapc_folder = os.path.join(base_folder, "AAPPC", "Delineations")
agg_folder = f"/home/wb411133/data/Projects/MR_Novel_Urbanization/Aggregate_Data/DOU{dou_pop}_DB{db_pop}"
if not os.path.exists(agg_folder):
    os.makedirs(agg_folder)

# Generate a list of iso3 codes already processed
processed_iso3 = []
for folder in os.listdir(base_folder):
    if "URBAN_DATA_new" in folder:
        processed_iso3.append(folder[:3])
print(processed_iso3)

['BGD', 'AGO', 'COL', 'EGY', 'ETH', 'GHA', 'TZA', 'VNM', 'BDI', 'BEN', 'BFA', 'BWA', 'CAF', 'CIV', 'CMR', 'COD', 'COG', 'COM', 'CPV', 'ERI', 'GAB', 'GIN', 'GMB', 'GNB', 'KEN', 'LBR', 'LSO', 'MDG', 'MLI', 'MOZ', 'MRT', 'MUS', 'MWI', 'NAM', 'NER', 'NGA', 'RWA', 'SDN', 'SEN', 'SLE', 'SOM', 'SSD', 'STP', 'SWZ', 'SYC', 'TCD', 'TGO', 'UGA', 'ZAF', 'ZMB', 'ZWE', 'MAU', 'MAR', 'DZA', 'TUN', 'LBY', 'PAK']


In [7]:
# Generate comparison of DOU gpo and DB cpo15
iso3 = "AGO"
country_folder = os.path.join(base_folder, f"{iso3}_URBAN_DATA_new_naming")
urb = urban_data(iso3, country_folder, aapc_folder)
comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop], debug=True)
if comboRes:
    if not os.path.exists(agg_folder):
        os.makedirs(agg_folder)
    urb.write_results(comboRes, agg_folder, dbhd="co")
tPrint(iso3)

/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AGO_URBAN_DATA_new_naming/FINAL_STANDARD/ago_gpo_urban.tif
/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AGO_URBAN_DATA_new_naming/FINAL_STANDARD/ago_gpo_urban_hd.tif
/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AAPPC/Delineations/ago_cpo15d10b3000_cc.tif
/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AAPPC/Delineations/ago_cpo15d10b3000_co.tif
/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/AAPPC/Delineations/ago_cpo15d10b3000_ur.tif
08:55:44	AGO


In [None]:
def multiP(iso3):
    dou_pop = "gpo"
    db_pop = "cpo15"
    country_folder = os.path.join(base_folder, f"{iso3}_URBAN_DATA_new_naming")
    urb = urban_data(iso3, country_folder, aapc_folder)
    comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop], debug=True)
    if comboRes:
        agg_folder = f"/home/wb411133/data/Projects/MR_Novel_Urbanization/Aggregate_Data/DOU{dou_pop}_DB{db_pop}"
        urb.write_results(comboRes, agg_folder)
    tPrint(iso3)

In [None]:
with multiprocessing.Pool(len(processed_iso3)) as mp:
    mp.map(multiP, processed_iso3)

In [3]:
sum_files = []
bin_files = []

for root, dirs, files in os.walk(agg_folder):
    for f in files:
        if f.endswith("sum_co.tif"):
            sum_files.append(os.path.join(root, f))
        if f.endswith("binary_co.tif"):
            bin_files.append(os.path.join(root, f))

In [8]:
vrt_options = gdal.BuildVRTOptions(resampleAlg="cubic", addAlpha=True)
pop_layer = "cpo"
my_vrt = gdal.BuildVRT(
    f"DOU{dou_pop}_DB{db_pop}_sum.vrt", sum_files, options=vrt_options
)
my_vrt = None
my_vrt = gdal.BuildVRT(
    f"DOU{dou_pop}_DB{db_pop}_binary.vrt", bin_files, options=vrt_options
)
my_vrt = None

# Kappa comparison

In [None]:
iso3 = "BGD"
country_folder = os.path.join(base_folder, f"{iso3}_URBAN_DATA_new_naming")
urb = urban_data(iso3, country_folder, aapc_folder)
comboRes = urb.generate_combo_layer(pop_type=pop_layer, debug=True)
res = urb.jaccard_index(pop_type=pop_layer)
res

In [19]:
def jaccardP(iso3, debug=False):
    country_folder = os.path.join(base_folder, f"{iso3}_URBAN_DATA_new_naming")
    urb = urban_data(iso3, country_folder, aapc_folder)
    if not debug:
        try:
            comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop])
            res = urb.jaccard_index(pop_type=[dou_pop, db_pop])
        except:
            res = {"urb_jaccard": -1, "hd_jaccard": -1}
    else:
        comboRes = urb.generate_combo_layer(pop_type=[dou_pop, db_pop])
        res = urb.jaccard_index(pop_type=[dou_pop, db_pop])
    tPrint(f"{iso3}: {res}")
    return {iso3: res}


jaccardP("NAM", True)

"""
with multiprocessing.Pool(len(processed_iso3)) as mp:
    all_jaccard = mp.map(jaccardP, processed_iso3)

"""

10:25:01	NAM: {'urb_jaccard': 0.006563989261262187, 'hd_jaccard': 0.050436400057232794}


'\nwith multiprocessing.Pool(len(processed_iso3)) as mp:\n    all_jaccard = mp.map(jaccardP, processed_iso3)\n\n'

In [20]:
res = {}
for cntry in all_jaccard:
    cISO = list(cntry.keys())[0]
    res[cISO] = cntry[cISO]
res = pd.DataFrame(res).transpose()
res = res.sort_values("urb_jaccard", ascending=False)
res

Unnamed: 0,urb_jaccard,hd_jaccard
MUS,0.556007,0.500252
GMB,0.280411,0.626646
GHA,0.259383,0.571611
NGA,0.245156,0.482058
MWI,0.243907,0.148016
ZAF,0.232032,0.481568
KEN,0.226092,0.139112
TGO,0.171249,0.593245
BEN,0.165398,0.480615
RWA,0.141513,0.367058


In [None]:
# Attach geometry to the res
admin0_polys = gpd.read_file("/home/public/Data/GLOBAL/ADMIN/Admin0_Polys.shp")
res["ISO3"] = res.index

In [None]:
temp_res = res.loc[res["hd_jaccard"] > 0]
temp_res = temp_res.merge(admin0_polys, on="ISO3")
temp_res = gpd.GeoDataFrame(temp_res, geometry="geometry", crs=4326)
temp_res.to_file(f"{agg_folder}_national_jaccard.shp")

In [None]:
temp_res["ISO3"].values

In [None]:
# Create subset of GHS UCDB
ucdb_file = "/home/public/Data/GLOBAL/URBAN/GHS/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg"
ucdb = gpd.read_file(ucdb_file)
ucdb = ucdb.loc[ucdb["CTR_MN_ISO"].isin(temp_res["ISO3"].values)]

In [None]:
ucdb.shape

In [None]:
all_grps = []
cnt_size = 2
for idx, grp in ucdb.groupby("CTR_MN_ISO"):
    grp = grp.sort_values("P15", ascending=False)
    if grp.shape[0] > cnt_size:
        all_grps.append(grp.iloc[0:cnt_size, :])
    else:
        all_grps.append(grp)

In [None]:
sel_res = pd.concat(all_grps)
sel_res.shape

In [None]:
sel_res.to_file(f"{agg_folder}_select_cities.geojson", driver="GeoJSON")

In [None]:
f"{agg_folder}_select_cities.geojson"