In [None]:
import sys
import os
import json
import rasterio

import pandas as pd
import geopandas as gpd

sys.path.insert(0, "../../../../gostrocks/src")

import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.ntlMisc as ntl
from GOSTRocks.misc import tPrint

sys.path.append("../../../src")

import GOST_Urban.UrbanRaster as urban
import GOST_Urban.urban_helper as clippy

%load_ext autoreload
%autoreload 2

# read in local important parameters
local_json = "/home/wb411133/Code/urbanParameters.json"
with open(local_json, "r") as inJ:
    important_vars = json.load(inJ)

In [None]:
iso3 = "UKR"
output_dir = f"/home/wb411133/data/Projects/{iso3}_Urbanization"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

population_file = f"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/MOSAIC_ppp_prj_2020/ppp_prj_2020_{iso3}.tif"
admin_bounds = "/home/public/Data/COUNTRY/UKR/ADMIN/geoBoundaries-UKR-ADM3.geojson"
GHSL_file = "/home/public/Data/GLOBAL/GHSL/ghsl.vrt"

# Define output files
urban_extents_file = os.path.join(output_dir, f"{iso3}_urban_extents.geojson")
urban_extents_raster_file = os.path.join(output_dir, f"{iso3}_urban_extents.tif")
urban_extents_hd_file = os.path.join(output_dir, f"{iso3}_urban_extents_hd.geojson")
urban_extents_hd_raster_file = os.path.join(output_dir, f"{iso3}_urban_extents_hd.tif")
admin_urban_summary = os.path.join(output_dir, "adm3_urban_summary.shp")
urban_admin_summary = os.path.join(output_dir, f"{iso3}_ADM3_urban_summary.csv")

final_folder = os.path.join(output_dir, "Mapping_Data")
if not os.path.exists(final_folder):
    os.makedirs(final_folder)

admin_final = os.path.join(final_folder, "admin_summarized.shp")
urban_final = os.path.join(final_folder, "urban_summarized.shp")
urban_hd_final = os.path.join(final_folder, "urban_hd_summarized.shp")
focal_cities = os.path.join(final_folder, "FOCAL_AOIs.shp")

In [None]:
inAdmin = gpd.read_file(admin_bounds)
inP = rasterio.open(population_file)
inG = rasterio.open(GHSL_file)

# Run urbanization analysis
1. Create urban extents  
2. Calculate urban population in admin bounds  
3. Summarize nighttime lights in extents and admin
4. Summarize GHSL in extents and admin

In [None]:
# 1. Create urban extents
if not os.path.exists(urban_extents_file):
    urban_calculator = urban.urbanGriddedPop(inP)
    urban_extents = urban_calculator.calculateUrban(
        densVal=3,
        totalPopThresh=5000,
        smooth=False,
        queen=False,
        verbose=True,
        raster=urban_extents_raster_file,
    )
    urban_extents_hd = urban_calculator.calculateUrban(
        densVal=15,
        totalPopThresh=50000,
        smooth=True,
        queen=False,
        verbose=True,
        raster=urban_extents_raster_file,
    )
    urban_extents.to_file(urban_extents_file, driver="GeoJSON")
    urban_extents_hd.to_file(urban_extents_hd_file, driver="GeoJSON")

In [None]:
# 2. Calculate urban population in admin areas
pop_worker = clippy.summarize_population(
    population_file,
    gpd.read_file(admin_bounds),
    urban_extents_raster_file,
    urban_extents_hd_raster_file,
)
summarized_urban = pop_worker.calculate_zonal()
urban_res = summarized_urban.loc[:, [x for x in summarized_urban.columns if "SUM" in x]]
urban_res.columns = ["TOTAL_POP", "URBAN_POP", "URBAN_HD_POP"]
urban_res["shapeID"] = inAdmin["shapeID"]
urban_res["shapeName"] = inAdmin["shapeName"]
urban_res.to_csv(urban_admin_summary)

In [None]:
# 3. Summarize nighttime lights in admin bounds and urban extents
ntl_files = ntl.find_monthly_ntl()

urbanD = gpd.read_file(urban_extents_file)
urbanHD = gpd.read_file(urban_extents_hd_file)

In [None]:
viirs_folder = os.path.join(output_dir, "NTL_ZONAL_RES")
if not os.path.exists(viirs_folder):
    os.makedirs(viirs_folder)

for ntl_file in ntl_files:
    inR = rasterio.open(ntl_file)
    name = os.path.basename(ntl_file).split("_")[3]
    tPrint("Processing %s" % name)
    urban_res_file = os.path.join(viirs_folder, f"URBAN_{name}.csv")
    urban_hd_res_file = os.path.join(viirs_folder, f"HD_URBAN_{name}.csv")
    admin_res_file = os.path.join(viirs_folder, f"ADMIN_{name}.csv")

    # Urban Summary
    if not os.path.exists(urban_res_file):
        urban_res = rMisc.zonalStats(urbanD, inR, minVal=0.1)
        col_names = [f"URBAN_{name}_{x}" for x in ["SUM", "MIN", "MAX", "MEAN"]]
        urban_df = pd.DataFrame(urban_res, columns=col_names)
        urban_df.to_csv(urban_res_file)
    # HD Urban Summary
    if not os.path.exists(urban_hd_res_file):
        hd_urban_res = rMisc.zonalStats(urbanHD, inR, minVal=0.1)
        col_names = [f"HD_URBAN_{name}_{x}" for x in ["SUM", "MIN", "MAX", "MEAN"]]
        hd_urban_df = pd.DataFrame(hd_urban_res, columns=col_names)
        hd_urban_df.to_csv(urban_hd_res_file)
    # admin Summary
    if not os.path.exists(admin_res_file):
        admin_res = rMisc.zonalStats(inAdmin, inR, minVal=0.1)
        col_names = [f"ADM_URBAN_{name}_{x}" for x in ["SUM", "MIN", "MAX", "MEAN"]]
        admin_df = pd.DataFrame(admin_res, columns=col_names)
        admin_df.to_csv(admin_res_file)

In [None]:
# Compile VIIRS results
urb_files = [x for x in os.listdir(viirs_folder) if x.startswith("URBAN")]
for x in urb_files:
    tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)
    urbanD[x[:-4]] = tempD.iloc[:, 0]

hd_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith("HD_URBAN")]
for x in hd_urb_files:
    tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)
    urbanHD[x[:-4]] = tempD.iloc[:, 0]

admin_urb_files = [x for x in os.listdir(viirs_folder) if x.startswith("ADMIN")]
for x in admin_urb_files:
    tempD = pd.read_csv(os.path.join(viirs_folder, x), index_col=0)
    inAdmin[x[:-4]] = tempD.iloc[:, 0]

urbanD.to_csv(urban_viirs_summary)
urbanHD.to_csv(urban_hd_viirs_summary)
inAdmin.to_csv(admin_viirs_summary)

In [None]:
# 4. Summarize GHSL in extents and admin
ghsl_cols = [f"c_{x}" for x in [1, 2, 3, 4, 5, 6]]
admin_ghsl_summary = os.path.join(output_dir, "admin_GHSL_summary.csv")
urban_ghsl_summary = os.path.join(output_dir, "urban_GHSL_summary.csv")
urbanHD_ghsl_summary = os.path.join(output_dir, "urbanhd_GHSL_summary.csv")

if not os.path.exists(admin_ghsl_summary):
    res = rMisc.zonalStats(
        inAdmin, inG, rastType="C", unqVals=[1, 2, 3, 4, 5, 6], reProj=True
    )
    res = pd.DataFrame(res, columns=ghsl_cols)
    res["gID"] = inAdmin["shapeID"]
    res.to_csv(admin_ghsl_summary)

if not os.path.exists(urban_ghsl_summary):
    res = rMisc.zonalStats(
        urbanD, inG, rastType="C", unqVals=[1, 2, 3, 4, 5, 6], reProj=True
    )
    res = pd.DataFrame(res, columns=ghsl_cols)
    res["gID"] = urbanD["ID"]
    res.to_csv(urban_ghsl_summary)

if not os.path.exists(urbanHD_ghsl_summary):
    res = rMisc.zonalStats(
        urbanHD, inG, rastType="C", unqVals=[1, 2, 3, 4, 5, 6], reProj=True
    )
    res = pd.DataFrame(res, columns=ghsl_cols)
    res["gID"] = urbanHD["ID"]
    res.to_csv(urbanHD_ghsl_summary)

for ghsl_file in [admin_ghsl_summary, urban_ghsl_summary, urbanHD_ghsl_summary]:
    adm_ghsl = pd.read_csv(ghsl_file, index_col=0)
    adm_ghsl["b2014"] = adm_ghsl.apply(
        lambda x: x["c_3"] + x["c_4"] + x["c_5"] + x["c_6"], axis=1
    )
    adm_ghsl["b2000"] = adm_ghsl.apply(lambda x: x["c_4"] + x["c_5"] + x["c_6"], axis=1)
    adm_ghsl["b1990"] = adm_ghsl.apply(lambda x: x["c_5"] + x["c_6"], axis=1)

    def get_built(x):
        cur_built = x["b2014"]
        base_built = x["b1990"]
        if base_built == 0:
            base_built = x["b2000"]
        try:
            return (cur_built - base_built) / base_built
        except:
            return -1

    adm_ghsl["g_14_90"] = adm_ghsl.apply(get_built, axis=1)
    adm_ghsl.to_csv(ghsl_file)

# Compile all results for admin divisions and urban extents

In [None]:
# Compile data
# [shapefile, population_summary, viirs_summary, ghsl_summary, out_file]
for cur_def in [
    [
        admin_bounds,
        urban_admin_summary,
        admin_viirs_summary,
        admin_ghsl_summary,
        admin_final,
    ],
    [urban_extents_file, "", urban_viirs_summary, urban_ghsl_summary, urban_final],
    [
        urban_extents_hd_file,
        "",
        urban_hd_viirs_summary,
        urbanHD_ghsl_summary,
        urban_hd_final,
    ],
]:
    curD = gpd.read_file(cur_def[0])
    if cur_def[1] != "":
        curPop = pd.read_csv(cur_def[1], index_col=0)
        curD["Pop"] = curPop["TOTAL_POP"]
        curD["urbanPop"] = curPop.apply(
            lambda x: x["URBAN_POP"] / x["TOTAL_POP"], axis=1
        )
        curD["urbanPopHD"] = curPop.apply(
            lambda x: x["URBAN_HD_POP"] / x["TOTAL_POP"], axis=1
        )
    viirsD = pd.read_csv(cur_def[2], index_col=0)
    curD["NTL2013"] = viirsD.iloc[:, -8]
    curD["NTL2020"] = viirsD.iloc[:, -1]
    curD["NTL_g"] = curD.apply(
        lambda x: (x["NTL2020"] - x["NTL2013"]) / x["NTL2013"], axis=1
    )
    ghslD = pd.read_csv(cur_def[3], index_col=0)
    curD["b2014"] = ghslD["b2014"]
    curD["g_14_90"] = ghslD["g_14_90"]
    curD.to_file(cur_def[4])

In [None]:
gpd.read_file(admin_final).head()

In [None]:
gpd.read_file(urban_final).head()

In [None]:
gpd.read_file(urban_hd_final).head()

# Extract sample data for mapping

In [None]:
out_ntl_2013 = os.path.join(final_folder, "VIIRS_2013.tif")
out_ntl_2014 = os.path.join(final_folder, "VIIRS_2014.tif")
out_ntl_2020 = os.path.join(final_folder, "VIIRS_2020.tif")

In [None]:
# extract nighttime lights for 2013 and 2020
ntl_files = ntl.find_monthly_ntl()
ntl_files

In [None]:
if not os.path.exists(out_ntl_2013):
    rMisc.clipRaster(rasterio.open(ntl_files[1]), inAdmin, out_ntl_2013)

if not os.path.exists(out_ntl_2014):
    rMisc.clipRaster(rasterio.open(ntl_files[2]), inAdmin, out_ntl_2014)

if not os.path.exists(out_ntl_2020):
    rMisc.clipRaster(rasterio.open(ntl_files[-1]), inAdmin, out_ntl_2020)

In [None]:
# Extract ghsl for the 9 focal cities
in_cities = gpd.read_file(focal_cities)
in_cities.head()

In [None]:
cnt = 0
max_cnt = 100
for idx, row in in_cities.iterrows():
    out_file = os.path.join(final_folder, f"ghsl_{row['Name']}.tif")
    if not os.path.exists(out_file):
        rMisc.clipRaster(
            inG,
            gpd.GeoDataFrame(
                pd.DataFrame(row).transpose(), geometry="geometry", crs=in_cities.crs
            ),
            out_file,
        )
    cnt += 1
    if cnt >= max_cnt:
        break