# Summarizing Urbanization in Nigeria

For the provided household survey, summarize urbanization (cpo15 and cpo20, dartboard and degree of urbanization)

In [1]:
import sys, os, importlib, shutil, pathlib, datetime, math
import requests
import rasterio, elevation, richdem
import rasterio.warp

import pandas as pd
import geopandas as gpd
import numpy as np

from shapely.geometry import MultiPolygon, Polygon, box, Point
from rasterio import features
from datetime import datetime

from tqdm.notebook import tqdm

#Import raster helpers
sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")

import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.dataMisc as dataMisc
import GOSTRocks.metadataMisc as meta
from GOSTRocks.misc import tPrint

#Import GOST urban functions
sys.path.append("../../../src")
import GOST_Urban.UrbanRaster as urban
import GOST_Urban.urban_helper as helper

#Import local functions
import novelUrbanization as nu
from novelUrbanization import *

%load_ext autoreload
%autoreload 2



METADATA Library: Could not import arcgis libraries


In [2]:
urban_folder = "/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/"
hh_folder = os.path.join(urban_folder, "HH_Files")

The urbanization files were downloaded from the GOST AWS bucket; but the whole thing could work directly off that.

In [20]:
# get urban_files
dou_urban_files = []
db_urban_files = []

dou_urban_1k_files = []
db_urban_1k_files = []

for root, dirs, files in os.walk(urban_folder):
    for f in files:
        if f.startswith('nga_'): # grab all the 250m resolution files
            if f.endswith("_urban.tif") or f.endswith("_urban_hd.tif"):
                dou_urban_files.append(os.path.join(root, f))
            if f.endswith("_cc.tif") or f.endswith("_co.tif") or f.endswith("_ur.tif"):
                db_urban_files.append(os.path.join(root, f))
        if f.startswith('nga1k_'): # grab all the 1km resolution files
            if f.endswith("_urban.tif") or f.endswith("_urban_hd.tif"):
                dou_urban_1k_files.append(os.path.join(root, f))
            if f.endswith("_cc.tif") or f.endswith("_co.tif") or f.endswith("_ur.tif"):
                db_urban_1k_files.append(os.path.join(root, f))
            
pop_files = list(set(["_".join(os.path.basename(x).split("_")[:2]) + ".tif" for x in dou_urban_files]))
pop_files = [os.path.join(urban_folder, x) for x in pop_files]

In [21]:
pop_files

['/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_cpo20.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gpo.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_cpo15.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_upo15.tif']

In [None]:
input_files = dou_urban_files
template_r = rasterio.open(input_files[0])

In [None]:
hh_2018 = os.path.join(hh_folder, 'NGA_2018_to_GIS.csv')
hh_2022 = os.path.join(hh_folder, 'NGA_2022_to_GIS.csv')

def read_file(in_file):
    curD = pd.read_csv(in_file)
    geoms = [Point(x) for x in zip(curD['hh_gps_longitude'], curD['hh_gps_latitude'])]
    curD = gpd.GeoDataFrame(curD, geometry=geoms, crs=4326)
    return(curD)

hh_2018_data = read_file(hh_2018).to_crs(template_r.crs)
hh_2022_data = read_file(hh_2022).to_crs(template_r.crs)

def get_pair(t):
    try:
        return([t.x, t.y])
    except:
        return([0,0])
hh_2018_pairs = [get_pair(t) for t in hh_2018_data['geometry']]
hh_2022_pairs = [get_pair(t) for t in hh_2022_data['geometry']]

In [None]:
# sample values from urban layers
out_hh_2018 = hh_2018_data.copy()
out_hh_2022 = hh_2022_data.copy()

for urban_file in tqdm(input_files):
    curR = rasterio.open(urban_file)
    cur_name = os.path.basename(urban_file).replace(".tif", "")
    cur_res_2018 = [x[0] for x in list(curR.sample(hh_2018_pairs))]
    out_hh_2018[cur_name] = cur_res_2018
    
    cur_res_2022 = [x[0] for x in list(curR.sample(hh_2022_pairs))]
    out_hh_2022[cur_name] = cur_res_2022
    

In [None]:
out_hh_2018 = out_hh_2018.to_crs(4326)
out_hh_2018.to_file(os.path.join(hh_folder, 'hh_2018.geojson'), driver='GeoJSON')
pd.DataFrame(out_hh_2018.drop(['geometry'], axis=1)).to_csv(os.path.join(hh_folder, 'hh_2018_urban_attributed.csv'))

out_hh_2022 = out_hh_2022.to_crs(4326)
out_hh_2022.to_file(os.path.join(hh_folder, 'hh_2022.geojson'), driver='GeoJSON')
pd.DataFrame(out_hh_2022.drop(['geometry'], axis=1)).to_csv(os.path.join(hh_folder, 'hh_2022_urban_attributed.csv'))

# Zonal stats at administrative level 2

In [26]:
# Run zonal stats at admin 2
adm2_bounds = dataMisc.get_geoboundaries('NGA', 'ADM2')
#adm1_bounds = dataMisc.get_geoboundaries('NGA', 'ADM1')
adm1_bounds = gpd.read_file(os.path.join(urban_folder, "new_lga_nigeria_2003.shp"))

In [27]:
adm2_bounds.head()

Unnamed: 0,shapeName,shapeISO,shapeID,shapeGroup,shapeType,geometry
0,Eastern Obolo,,59680162B7891718144591,NGA,ADM2,"POLYGON ((7.56201 4.51388, 7.57134 4.51233, 7...."
1,Ekeremor,,59680162B22876202690460,NGA,ADM2,"POLYGON ((5.99261 4.89302, 5.98824 4.90425, 5...."
2,Degema,,59680162B23543460253472,NGA,ADM2,"POLYGON ((6.85818 4.39824, 6.95485 4.37353, 6...."
3,Andoni,,59680162B90577513466378,NGA,ADM2,"POLYGON ((7.32460 4.43947, 7.33342 4.44372, 7...."
4,Akpabuyo,,59680162B58958286313368,NGA,ADM2,"POLYGON ((8.39659 4.78065, 8.40093 4.78882, 8...."


In [28]:
adm1_bounds.head()

Unnamed: 0,STATE,LGA,AREA,PERIMETER,LONGITUDE,LATITUDE,FULL_NAME,geometry
0,Sokoto,Gada,1193.977,170.095,,,,"POLYGON ((5.53632 13.88793, 5.53480 13.88488, ..."
1,Sokoto,Illela,1298.423,174.726,,,,"POLYGON ((5.53632 13.88793, 5.54517 13.88419, ..."
2,Sokoto,Tangaza,2460.715,209.702,,,,"POLYGON ((4.85548 13.76724, 4.86189 13.78085, ..."
3,Borno,Abadam,2430.515,288.957,,,,"POLYGON ((12.83189 13.39871, 12.83397 13.40439..."
4,Lake,Lake chad,5225.912,497.039,,,,"POLYGON ((13.48608 13.30821, 13.48296 13.31344..."


In [31]:
dou_urban_files

['/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_upo15_urban.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_upo15_urban_hd.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_cpo15_urban.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gpo_urban.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_cpo15_urban_hd.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_cpo20_urban.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_cpo20_urban_hd.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gpo_urban_hd.tif']

In [32]:
db_urban_files

['/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gbud10b3000_ur.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gbud10b3000_cc.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gpod10b3000_cc.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gpod10b3000_co.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gpod10b3000_ur.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_upo15d10b3000_cc.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_upo15d10b3000_ur.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_upo15d10b3000_co.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gbud10b3000_co.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_cpo15d10b3000_cc.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization

In [46]:
final_res = adm1_bounds.copy()
for pop_layer in pop_files:
    # zonal stats on DOU filess
    pop_name = os.path.basename(pop_layer)[:-4]    
    dou_urban_file    = os.path.join(urban_folder, f'{pop_name}_urban.tif')
    dou_hd_urban_file = os.path.join(urban_folder, f'{pop_name}_urban_hd.tif')
    
    help_xx = helper.summarize_population(pop_layer, adm1_bounds, dou_urban_file, dou_hd_urban_file)
    zonal_res = help_xx.calculate_zonal()
    zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if "SUM" in x]]
    for col in zonal_res.columns:
        final_res[col] = zonal_res[col]
    
    # zonal stats on DB files
    db_cc_file = os.path.join(urban_folder, f'{pop_name}d10b3000_cc.tif')
    db_co_file = os.path.join(urban_folder, f'{pop_name}d10b3000_co.tif')
    db_ur_file = os.path.join(urban_folder, f'{pop_name}d10b3000_ur.tif')
    if os.path.exists(db_cc_file):
        help_xx = helper.summarize_population(pop_layer, adm1_bounds, db_cc_file, db_co_file)
        zonal_res = help_xx.calculate_zonal()
        zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if "SUM" in x]]
        for col in zonal_res.columns:
            final_res[col] = zonal_res[col]

        help_xx = helper.summarize_population(pop_layer, adm1_bounds, db_ur_file, db_co_file)
        zonal_res = help_xx.calculate_zonal()
        zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if "SUM" in x]]
        for col in zonal_res.columns:
            final_res[col] = zonal_res[col]
    else:
        tPrint(f"Cannot process {pop_name} for DB")
        
    tPrint(pop_name)

10:37:35	Cannot process nga_cpo20 for DB
10:37:35	nga_cpo20
10:37:56	nga_gpo
10:38:18	nga_cpo15
10:38:39	nga_upo15


In [52]:
final_res.to_file(os.path.join(urban_folder, "new_lga_nigeria_2003_URBAN_POP.shp" ))
pd.DataFrame(final_res.drop(["geometry"], axis=1)).to_csv(os.path.join(urban_folder, "new_lga_nigeria_2003_URBAN_POP.csv"))

  final_res.to_file(os.path.join(urban_folder, "new_lga_nigeria_2003_URBAN_POP.shp" ))


In [50]:
[x for x in final_res.columns]attack

['STATE',
 'LGA',
 'AREA',
 'PERIMETER',
 'LONGITUDE',
 'LATITUDE',
 'FULL_NAME',
 'geometry',
 'TOTALPOP_nga_cpo20_SUM',
 '_nga_cpo20_urban_SUM',
 '_nga_cpo20_urban_hd_SUM',
 'TOTALPOP_nga_gpo_SUM',
 '_nga_gpo_urban_SUM',
 '_nga_gpo_urban_hd_SUM',
 '_nga_gpod10b3000_cc_SUM',
 '_nga_gpod10b3000_co_SUM',
 '_nga_gpod10b3000_ur_SUM',
 'TOTALPOP_nga_cpo15_SUM',
 '_nga_cpo15_urban_SUM',
 '_nga_cpo15_urban_hd_SUM',
 '_nga_cpo15d10b3000_cc_SUM',
 '_nga_cpo15d10b3000_co_SUM',
 '_nga_cpo15d10b3000_ur_SUM',
 'TOTALPOP_nga_upo15_SUM',
 '_nga_upo15_urban_SUM',
 '_nga_upo15_urban_hd_SUM',
 '_nga_upo15d10b3000_cc_SUM',
 '_nga_upo15d10b3000_co_SUM',
 '_nga_upo15d10b3000_ur_SUM']

In [45]:
db_urban_files

['/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gbud10b3000_ur.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gbud10b3000_cc.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gpod10b3000_cc.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gpod10b3000_co.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gpod10b3000_ur.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_upo15d10b3000_cc.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_upo15d10b3000_ur.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_upo15d10b3000_co.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_gbud10b3000_co.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/nga_cpo15d10b3000_cc.tif',
 '/home/wb411133/data/Projects/MR_Novel_Urbanization

In [None]:
res_prefix = 'nga1k'
pop_layer = os.path.join(urban_folder, f'{res_prefix}_cpo15.tif')
urban_layer = os.path.join(urban_folder, f'{res_prefix}_cpo15_urban.tif')
hd_layer = os.path.join(urban_folder, f'{res_prefix}_cpo15_urban_hd.tif')

help_xx = helper.summarize_population(pop_layer, adm2_bounds, urban_layer, hd_layer)
zonal_res = help_xx.calculate_zonal()
zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if "SUM" in x]]
zonal_res['shapeID'] = adm2_bounds['shapeID']
zonal_res['shapeName'] = adm2_bounds['shapeName']

pop_layer = os.path.join(urban_folder, f'{res_prefix}_cpo20.tif')
urban_layer = os.path.join(urban_folder, f'{res_prefix}_cpo20_urban.tif')
hd_layer = os.path.join(urban_folder, f'{res_prefix}_cpo20_urban_hd.tif')

help_xx = helper.summarize_population(pop_layer, adm2_bounds, urban_layer, hd_layer)
zonal_res2 = help_xx.calculate_zonal()
zonal_res2 = zonal_res2.loc[:,[x for x in zonal_res2.columns if "SUM" in x]]
zonal_res2['shapeID'] = adm2_bounds['shapeID']
zonal_res2['shapeName'] = adm2_bounds['shapeName']

In [None]:
zonal_res.merge(zonal_res2).to_csv(os.path.join(hh_folder, f'DOU_zonal_stats_{res_prefix}.csv'))

In [None]:
res_prefix = 'nga'
pop_layer = os.path.join(urban_folder, f'{res_prefix}_cpo15.tif')
co_layer = f'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_co.tif'
ur_layer = f'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_ur.tif'
cc_layer = f'/home/wb411133/data/Projects/MR_Novel_Urbanization/Data/NGA_1K_res/{res_prefix}_cpo15d10b3000_cc.tif'

help_xx = helper.summarize_population(pop_layer, adm2_bounds, co_layer, ur_layer)
zonal_res = help_xx.calculate_zonal(convert_urban_binary=True)
zonal_res = zonal_res.loc[:,[x for x in zonal_res.columns if "SUM" in x]]

help_xx2 = helper.summarize_population(pop_layer, adm2_bounds, cc_layer)
zonal_res2 = help_xx2.calculate_zonal(convert_urban_binary=True)
zonal_res2 = zonal_res2.loc[:,[x for x in zonal_res2.columns if "SUM" in x]]

zonal_res[f'_{res_prefix}_cpo15d10b3000_cc_SUM'] = zonal_res2[f'_{res_prefix}_cpo15d10b3000_cc_SUM']
zonal_res['shapeID'] = adm2_bounds['shapeID']


In [None]:
zonal_res

In [None]:
zonal_res.to_csv(os.path.join(hh_folder, f'DB_zonal_stats_{res_prefix}.csv'))

In [None]:
adm2_bounds.to_file(os.path.join(hh_folder, 'adm2_geobounds.geojson'), driver='GeoJSON')