# Summarizing Urbanization in Nigeria

For the provided household survey, summarize urbanization (cpo15 and cpo20, dartboard and degree of urbanization)

In [None]:
import sys
import os
import rasterio
import rasterio.warp

import pandas as pd
import geopandas as gpd

from shapely.geometry import Point

from tqdm.notebook import tqdm

# Import raster helpers
sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")

import GOSTRocks.dataMisc as dataMisc
from GOSTRocks.misc import tPrint

# Import GOST urban functions
sys.path.append("../../../src")
import GOST_Urban.urban_helper as helper

# Import local functions
from novelUrbanization import *

%load_ext autoreload
%autoreload 2

In [None]:
urban_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/"
hh_folder = os.path.join(urban_folder, "HH_Files")

The urbanization files were downloaded from the GOST AWS bucket; but the whole thing could work directly off that.

In [None]:
# get urban_files
dou_urban_files = []
db_urban_files = []

dou_urban_1k_files = []
db_urban_1k_files = []

for root, dirs, files in os.walk(urban_folder):
    for f in files:
        if f.startswith("nga_"):  # grab all the 250m resolution files
            if f.endswith("_urban.tif") or f.endswith("_urban_hd.tif"):
                dou_urban_files.append(os.path.join(root, f))
            if f.endswith("_cc.tif") or f.endswith("_co.tif") or f.endswith("_ur.tif"):
                db_urban_files.append(os.path.join(root, f))
        if f.startswith("nga1k_"):  # grab all the 1km resolution files
            if f.endswith("_urban.tif") or f.endswith("_urban_hd.tif"):
                dou_urban_1k_files.append(os.path.join(root, f))
            if f.endswith("_cc.tif") or f.endswith("_co.tif") or f.endswith("_ur.tif"):
                db_urban_1k_files.append(os.path.join(root, f))

pop_files = list(
    set(
        ["_".join(os.path.basename(x).split("_")[:2]) + ".tif" for x in dou_urban_files]
    )
)
pop_files = [os.path.join(urban_folder, x) for x in pop_files]

In [None]:
pop_files

In [None]:
input_files = dou_urban_files
template_r = rasterio.open(input_files[0])

In [None]:
hh_2018 = os.path.join(hh_folder, "NGA_2018_to_GIS.csv")
hh_2022 = os.path.join(hh_folder, "NGA_2022_to_GIS.csv")


def read_file(in_file):
    curD = pd.read_csv(in_file)
    geoms = [Point(x) for x in zip(curD["hh_gps_longitude"], curD["hh_gps_latitude"])]
    curD = gpd.GeoDataFrame(curD, geometry=geoms, crs=4326)
    return curD


hh_2018_data = read_file(hh_2018).to_crs(template_r.crs)
hh_2022_data = read_file(hh_2022).to_crs(template_r.crs)


def get_pair(t):
    try:
        return [t.x, t.y]
    except:
        return [0, 0]


hh_2018_pairs = [get_pair(t) for t in hh_2018_data["geometry"]]
hh_2022_pairs = [get_pair(t) for t in hh_2022_data["geometry"]]

In [None]:
# sample values from urban layers
out_hh_2018 = hh_2018_data.copy()
out_hh_2022 = hh_2022_data.copy()

for urban_file in tqdm(input_files):
    curR = rasterio.open(urban_file)
    cur_name = os.path.basename(urban_file).replace(".tif", "")
    cur_res_2018 = [x[0] for x in list(curR.sample(hh_2018_pairs))]
    out_hh_2018[cur_name] = cur_res_2018

    cur_res_2022 = [x[0] for x in list(curR.sample(hh_2022_pairs))]
    out_hh_2022[cur_name] = cur_res_2022

In [None]:
out_hh_2018 = out_hh_2018.to_crs(4326)
out_hh_2018.to_file(os.path.join(hh_folder, "hh_2018.geojson"), driver="GeoJSON")
pd.DataFrame(out_hh_2018.drop(["geometry"], axis=1)).to_csv(
    os.path.join(hh_folder, "hh_2018_urban_attributed.csv")
)

out_hh_2022 = out_hh_2022.to_crs(4326)
out_hh_2022.to_file(os.path.join(hh_folder, "hh_2022.geojson"), driver="GeoJSON")
pd.DataFrame(out_hh_2022.drop(["geometry"], axis=1)).to_csv(
    os.path.join(hh_folder, "hh_2022_urban_attributed.csv")
)

# Zonal stats at administrative level 2

In [None]:
# Run zonal stats at admin 2
adm2_bounds = dataMisc.get_geoboundaries("NGA", "ADM2")
# adm1_bounds = dataMisc.get_geoboundaries('NGA', 'ADM1')
adm1_bounds = gpd.read_file(os.path.join(urban_folder, "new_lga_nigeria_2003.shp"))

In [None]:
adm2_bounds.head()

In [None]:
adm1_bounds.head()

In [None]:
dou_urban_files

In [None]:
db_urban_files

In [None]:
final_res = adm1_bounds.copy()
for pop_layer in pop_files:
    # zonal stats on DOU filess
    pop_name = os.path.basename(pop_layer)[:-4]
    dou_urban_file = os.path.join(urban_folder, f"{pop_name}_urban.tif")
    dou_hd_urban_file = os.path.join(urban_folder, f"{pop_name}_urban_hd.tif")

    help_xx = helper.summarize_population(
        pop_layer, adm1_bounds, dou_urban_file, dou_hd_urban_file
    )
    zonal_res = help_xx.calculate_zonal()
    zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if "SUM" in x]]
    for col in zonal_res.columns:
        final_res[col] = zonal_res[col]

    # zonal stats on DB files
    db_cc_file = os.path.join(urban_folder, f"{pop_name}d10b3000_cc.tif")
    db_co_file = os.path.join(urban_folder, f"{pop_name}d10b3000_co.tif")
    db_ur_file = os.path.join(urban_folder, f"{pop_name}d10b3000_ur.tif")
    if os.path.exists(db_cc_file):
        help_xx = helper.summarize_population(
            pop_layer, adm1_bounds, db_cc_file, db_co_file
        )
        zonal_res = help_xx.calculate_zonal()
        zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if "SUM" in x]]
        for col in zonal_res.columns:
            final_res[col] = zonal_res[col]

        help_xx = helper.summarize_population(
            pop_layer, adm1_bounds, db_ur_file, db_co_file
        )
        zonal_res = help_xx.calculate_zonal()
        zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if "SUM" in x]]
        for col in zonal_res.columns:
            final_res[col] = zonal_res[col]
    else:
        tPrint(f"Cannot process {pop_name} for DB")

    tPrint(pop_name)

In [None]:
final_res.to_file(os.path.join(urban_folder, "new_lga_nigeria_2003_URBAN_POP.shp"))
pd.DataFrame(final_res.drop(["geometry"], axis=1)).to_csv(
    os.path.join(urban_folder, "new_lga_nigeria_2003_URBAN_POP.csv")
)

In [None]:
[x for x in final_res.columns]

In [None]:
db_urban_files

In [None]:
res_prefix = "nga1k"
pop_layer = os.path.join(urban_folder, f"{res_prefix}_cpo15.tif")
urban_layer = os.path.join(urban_folder, f"{res_prefix}_cpo15_urban.tif")
hd_layer = os.path.join(urban_folder, f"{res_prefix}_cpo15_urban_hd.tif")

help_xx = helper.summarize_population(pop_layer, adm2_bounds, urban_layer, hd_layer)
zonal_res = help_xx.calculate_zonal()
zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if "SUM" in x]]
zonal_res["shapeID"] = adm2_bounds["shapeID"]
zonal_res["shapeName"] = adm2_bounds["shapeName"]

pop_layer = os.path.join(urban_folder, f"{res_prefix}_cpo20.tif")
urban_layer = os.path.join(urban_folder, f"{res_prefix}_cpo20_urban.tif")
hd_layer = os.path.join(urban_folder, f"{res_prefix}_cpo20_urban_hd.tif")

help_xx = helper.summarize_population(pop_layer, adm2_bounds, urban_layer, hd_layer)
zonal_res2 = help_xx.calculate_zonal()
zonal_res2 = zonal_res2.loc[:, [x for x in zonal_res2.columns if "SUM" in x]]
zonal_res2["shapeID"] = adm2_bounds["shapeID"]
zonal_res2["shapeName"] = adm2_bounds["shapeName"]

In [None]:
zonal_res.merge(zonal_res2).to_csv(
    os.path.join(hh_folder, f"DOU_zonal_stats_{res_prefix}.csv")
)

In [None]:
res_prefix = "nga"
pop_layer = os.path.join(urban_folder, f"{res_prefix}_cpo15.tif")
co_layer = f"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_co.tif"
ur_layer = f"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_ur.tif"
cc_layer = f"/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_cc.tif"

help_xx = helper.summarize_population(pop_layer, adm2_bounds, co_layer, ur_layer)
zonal_res = help_xx.calculate_zonal(convert_urban_binary=True)
zonal_res = zonal_res.loc[:, [x for x in zonal_res.columns if "SUM" in x]]

help_xx2 = helper.summarize_population(pop_layer, adm2_bounds, cc_layer)
zonal_res2 = help_xx2.calculate_zonal(convert_urban_binary=True)
zonal_res2 = zonal_res2.loc[:, [x for x in zonal_res2.columns if "SUM" in x]]

zonal_res[f"_{res_prefix}_cpo15d10b3000_cc_SUM"] = zonal_res2[
    f"_{res_prefix}_cpo15d10b3000_cc_SUM"
]
zonal_res["shapeID"] = adm2_bounds["shapeID"]

In [None]:
zonal_res

In [None]:
zonal_res.to_csv(os.path.join(hh_folder, f"DB_zonal_stats_{res_prefix}.csv"))

In [None]:
adm2_bounds.to_file(os.path.join(hh_folder, "adm2_geobounds.geojson"), driver="GeoJSON")