# Benchmarking cities in MENA

In support of an upcoming Urban flagship report, the MENA team is looking for a series of zonal statistics:

- Nighttime Lights, Population, and built-area:  
  - Entire FUA  
  - Its associated urban center / “core”  
  - Associated “periphery”  

The unit of analysis is the Functional Urban Areas (FUAs) from the [UCDB Database](https://human-settlement.emergency.copernicus.eu/ghs_stat_ucdb2015mt_r2019a.php). For each FUA, we need to grab the associated urban periphary (lower threshold urban areas)

In [None]:
import sys
import os
import itertools
import rasterio

import geopandas as gpd
import pandas as pd
import numpy as np

from scipy.spatial import cKDTree
from shapely.geometry import Point
from operator import itemgetter

sys.path.append("C:/WBG/Work/Code/GOSTrocks/src")



In [None]:
data_folder = "C:/WBG/Work/data"
ucdb_file = os.path.join(
    data_folder,
    "URBAN",
    "GHS_STAT_UCDB2015MT_GLOBE_R2019A",
    "GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg",
)
fua_file = os.path.join(
    data_folder, "URBAN", "GHS_FUA_UCDB2015_GLOBE_R2019A_54009_1K_V1_0.gpkg"
)

import GOSTrocks.ntlMisc as ntlMisc
import GOSTrocks.rasterMisc as rMisc
from GOSTrocks.misc import tPrint

In [2]:
data_folder = "s3://wbg-geography01/URBANIZATION/MENA/Extents/"
ucdb_file       = "/home/wb411133/Code/GOSTurban/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg"
fua_file        = os.path.join(data_folder, "GHS_FUA_UCDB2015_GLOBE_R2019A_54009_1K_V1_0.gpkg")
fua_peripheries = os.path.join(data_folder, "FUA_peripheries.gpkg")
flaring_locations_file = "https://thedocs.worldbank.org/en/doc/d01b4aebd8a10513c0e341de5e1f652e-0400072024/related/2012-2023-individual-flare-volume-estimates.xlsx?_gl=1*19fhic5*_gcl_au*MzM5MTcxNjUwLjE3MTg2NTk5ODU."

In [3]:
flaring_d = pd.read_excel(flaring_locations_file)
flaring_d['ID'] = flaring_d.index
flaring_geoms = [Point(x) for x in zip(flaring_d['Longitude'], flaring_d['Latitude'])]
flaring_d = gpd.GeoDataFrame(flaring_d, geometry=flaring_geoms, crs=4326)
all_flares = flaring_d.unary_union
flaring_d.head()

Unnamed: 0,COUNTRY,Latitude,Longitude,bcm,MMscfd,Year,Field Type,Location,Flare Level,Flaring Vol (million m3),ID,geometry
0,Albania,40.748105,19.657746,0.0,0.0,2012,OIL,ONSHORE,Small,0.0,0,POINT (19.65775 40.74811)
1,Albania,40.748105,19.657746,0.00012,0.011601,2013,OIL,ONSHORE,Small,0.119908,1,POINT (19.65775 40.74811)
2,Albania,40.748105,19.657746,0.006156,0.595589,2014,OIL,ONSHORE,Small,6.155797,2,POINT (19.65775 40.74811)
3,Albania,40.748105,19.657746,0.018834,1.822192,2015,OIL,ONSHORE,Small,18.833535,3,POINT (19.65775 40.74811)
4,Albania,40.747395,19.655184,0.021211,2.05223,2016,OIL,ONSHORE,Small,21.211124,4,POINT (19.65518 40.74739)


In [4]:
inU = gpd.read_file(ucdb_file)
# If the peripheries exists read them in, if not, create them
inF = gpd.read_file(fua_file)
m_crs = inF.crs
inF = inF.to_crs(inU.crs)
try:
    inP = gpd.read_file(fua_peripheries)
except:    
    fua_peripheries = inF.copy()
    for idx, row in inF.iterrows():
        # grab the related UCDBs
        ucdb_ids = row["UC_IDs"].split(";")
        ucdb_ids = [int(x) for x in ucdb_ids]
        sel_cores = inD.loc[inD["ID_HDC_G0"].isin(ucdb_ids)]
        periphery_geom = row["geometry"].difference(sel_cores.unary_union)
        fua_peripheries.loc[idx, "geometry"] = periphery_geom

    fua_peripheries.to_file(os.path.join(out_folder, "FUA_peripheries.gpkg"), driver="GPKG")
    inP = fua_peripheries
inP = inP.to_crs(inU.crs)
inP['geometry'] = inP.buffer(0)
inU['geometry'] = inU.buffer(0)
inF['geometry'] = inF.buffer(0)

inU['geometry'] = inU.buffer(0)inU['geometry'] = inU.buffer(0)inU['geometry'] = inU.buffer(0)inU['geometry'] = inU.buffer(0)fua_res = "/home/wb411133/temp/fua_ntl_zonal.csv"Poison fua_res = "/home/wb411133/temp/fua_ntl_zonal.csv"
try:
    fua_zonal = pd.read_csv(fua_res)
except:
    fua_zonal = ntlMisc.run_zonal(inF, verbose=True)
    fua_zonal.to_csv(fua_res)

In [None]:
core_res = "/home/wb411133/temp/cores_ntl_zonal.csv"
try:
    core_zonal = pd.read_csv(core_res)
except:
    core_zonal = ntlMisc.run_zonal(inU, verbose=True)
    core_zonal.to_csv(core_res)

In [None]:
''' The clipping process produces multi and null geometries, which breaks this process
#### TODO: Determine if this is necessary
per_res = "/home/wb411133/temp/periphary_ntl_zonal.csv"
try:
    per_zonal = pd.read_csv(per_res)
except:
    per_zonal = ntlMisc.run_zonal(inP, verbose=True)
    per_zonal.to_csv(per_res)
'''

# Adjust for flaring

Two steps to adjust the nighttime lights data for flaring:
1. Mute the nighttime lights data within a buffer zone of each flaring location
2. Determine distances between city extents (FUA) and nearest flaring location

In [5]:
#2 Distance calculations

def ckdnearest(gdfA, gdfB, gdfB_cols=['ID']):
    A = np.concatenate(
        [np.array(geom.coords) for geom in gdfA.geometry.to_list()])
    B = [np.array(geom.coords) for geom in gdfB.geometry.to_list()]
    B_ix = tuple(itertools.chain.from_iterable(
        [itertools.repeat(i, x) for i, x in enumerate(list(map(len, B)))]))
    B = np.concatenate(B)
    ckd_tree = cKDTree(B)
    dist, idx = ckd_tree.query(A, k=1)
    idx = itemgetter(*idx)(B_ix)
    gdf = pd.concat(
        [gdfA, gdfB.loc[idx, gdfB_cols].reset_index(drop=True),
         pd.Series(dist, name='dist')], axis=1)
    return gdf

inF_centroid = inF.copy()
inF_centroid['geometry'] = inF_centroid['geometry'].centroid
inF_centroid = inF_centroid.to_crs(m_crs)
flaring_d = flaring_d.to_crs(m_crs)

nearest_calc = ckdnearest(inF_centroid, flaring_d)

'''
inF['Inter_Flare'] = 0
all_flares = flaring_d.unary_union
inF = inF.to_crs(flaring_d.crs)
for idx, row in inF.iterrows():
    if row['geometry'].intersects(all_flares):
        inF.loc[idx, 'Inter_Flare'] = 1
'''


  inF_centroid['geometry'] = inF_centroid['geometry'].centroid


"\ninF['Inter_Flare'] = 0\nall_flares = flaring_d.unary_union\ninF = inF.to_crs(flaring_d.crs)\nfor idx, row in inF.iterrows():\n    if row['geometry'].intersects(all_flares):\n        inF.loc[idx, 'Inter_Flare'] = 1\n"

In [8]:
#1 Mute nighttime lights data within a mask
### Buffer the flare locations by the defined distance
buffer_dist = 5000 #(metres)
buffered_flare = flaring_d.copy()
buffered_flare['geometry'] = buffered_flare['geometry'].apply(lambda x: x.buffer(buffer_dist))
buffered_flare = buffered_flare.to_crs(4326)

### create a mask raster using the buffered flare locations
ntl_images = ntlMisc.aws_search_ntl()
flare_mask = rMisc.rasterizeDataFrame(buffered_flare, None, templateRaster=ntl_images[0], nodata=0)
flare_mask = (~flare_mask['vals'].astype(bool)).astype(int) 

In [9]:
### Use the mask in the zonal calculation
finalF = inF.copy()
ntl_image = ntl_images[0]
date = os.path.basename(ntl_image).split("_")[2][:6]
raw_ntl = rMisc.zonalStats(inF, ntl_image, minVal=0.1, reProj=True)
raw_ntl = pd.DataFrame(raw_ntl, columns = ['SUM','MIN','MAX','MEAN'])
finalF[f'raw_{date}'] = raw_ntl['SUM']

ntl_r = rasterio.open(ntl_images[0])
ntl_data = ntl_r.read()
masked_ntl_data = ntl_data * flare_mask
with rMisc.create_rasterio_inmemory(ntl_r.profile, masked_ntl_data) as masked_ntl_raster:
    masked_ntl = rMisc.zonalStats(inF, masked_ntl_raster, minVal=0.1, reProj=True)
    masked_ntl = pd.DataFrame(masked_ntl, columns = ['SUM','MIN','MAX','MEAN'])
finalF[f'mask_{date}'] = masked_ntl['SUM']

In [12]:
finalF.to_file(f'/home/wb411133/temp/{date}_{buffer_dist}_zonal_ntl.gpkg', driver='GPKG')

# Create mapping and debugging data

In [None]:
#Write flare mask to disk
flare_mask_file = os.path.join('/home/wb411133/temp', "flare_mask.tif")
flare_profile = ntl_r.profile.copy()
flare_mask = flare_mask.astype('int16')
flare_profile.update(dtype = flare_mask.dtype)

with rasterio.open(flare_mask_file, 'w', **flare_profile) as out_flare:
    out_flare.write_band(1, flare_mask)

In [None]:
# Combine raw and masked ntl results with urban extents
outF = nearest_calc.copy()
outF['rawNTL'] = raw_ntl_df['SUM']
outF['maskedNTL'] = masked_ntl_df['SUM']
outF.to_file(os.path.join('/home/wb411133/temp', "urban_extents_ntl_flaring.gpkg"), driver="GPKG")

## Combine NTL results
We have zonal results for the entire FUA and for the core as monthly results; there is a two step process to create the final results:
1. Combine monthly results into annual results  
2. Use FUA and cores to generate three stats: FUA SoL, Core SoL, and Periphery SoL

In [None]:
def combine_ntl_annual(curD):
    '''curD is a data frame of ntl zonal results'''
    for yr in range(2012, 2024):
        cur_columns = [x for x in curD.columns if f"ntl_{yr}" in x]
        sel_d = curD.loc[:,cur_columns]
        curD[f'ntl{yr}_SoL'] = sel_d.sum(axis=1)/len(cur_columns)
    return(curD)

out_folder = "s3://wbg-geography01/URBANIZATION/MENA/ZONAL_RES/NTL/"
combine_ntl_annual(fua_zonal).to_csv(os.path.join(out_folder, "fua_VIIRS_SoL.csv"))
combine_ntl_annual(core_zonal).to_csv(os.path.join(out_folder, "core_VIIRS_SoL.csv"))

In [None]:
combine_ntl_annual(core_zonal)

# DEBUGGING

In [None]:
tempF = inF.loc[inF['eFUA_ID'] == 1281]
tempF['geometry'] = tempF['geometry'].iloc[0].buffer(0.001)

In [None]:
rMisc.zonalStats(tempF, ntl_images[2], minVal=0.1, reProj=True, allTouched=True)

In [None]:
tempF['geometry'].iloc[0].buffer(0.001)

In [None]:
tempF