In [None]:
import sys
import os
import rasterio

import pandas as pd
import geopandas as gpd

from tqdm.notebook import tqdm

sys.path.append(r"C:\WBG\Work\Code\GOSTrocks\src")
import GOSTrocks.rasterMisc as rMisc

%load_ext autoreload
%autoreload 2

In [None]:
gdp_folder = r"C:\WBG\Work\data\GDP\CHICAGO"
gdp_file = os.path.join(
    gdp_folder, "final_GDP_0_25deg_postadjust_pop_dens_0_01_adjust.csv"
)
gdp_shp_file = os.path.join(gdp_folder, "shapefile", "geom_0_25deg.shp")
gdp_col = "cell_GDPC_const_2017_PPP"
sel_year = 2017

ucdb_file = "C:/WBG/Work/data/URBAN/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg"
in_ucdb = gpd.read_file(ucdb_file).loc[:, ["ID_HDC_G0", "geometry"]]

pop_file = "C:/WBG/Work/data/POP/ppp_2020_1km_Aggregated.tif"
inPop = rasterio.open(pop_file)

bad_cols = ["method", "cell_size"]
gdp_df = pd.read_csv(gdp_file).drop(columns=bad_cols)
gdp_gdf = gpd.read_file(gdp_shp_file)

In [None]:
gdp_df.head()

In [None]:
gdp_gdf.head()

## Join the selected GDP data to the shapefile


In [None]:
sel_gdp = gdp_df.loc[
    gdp_df.year == sel_year,
    ["cell_id", "iso", gdp_col, "pop_cell", "subcell_id", "subcell_id_0_25"],
]
sel_gdp.head()

In [None]:
# Create consistent, single column keys
gdp_gdf["gID"] = (
    gdp_gdf["cell_id"].astype(str)
    + "_"
    + gdp_gdf["sbcll_d"].astype(str)
    + "_"
    + gdp_gdf["s__0_25"].astype(str)
)
sel_gdp["gID"] = (
    sel_gdp["cell_id"].astype(str)
    + "_"
    + sel_gdp["subcell_id"].astype(str)
    + "_"
    + sel_gdp["subcell_id_0_25"].astype(str)
)

In [None]:
combo_gdp = pd.merge(gdp_gdf, sel_gdp, on="gID", how="inner").loc[
    :, ["gID", "iso_x", gdp_col, "pop_cell", "geometry"]
]
combo_gdp.head()

In [None]:
combo_gdp.crs

In [None]:
out_res = {}
for idx, row in tqdm(in_ucdb.iterrows(), total=in_ucdb.shape[0]):
    # Identify the cells that intersect this city and union together
    sel_city = gpd.GeoDataFrame([row], crs=in_ucdb.crs)
    sel_overlay = gpd.sjoin(sel_city, combo_gdp, how="inner", predicate="intersects")
    sel_grids = combo_gdp.loc[combo_gdp["gID"].isin(sel_overlay.gID)]

    city_grids = gpd.overlay(sel_city, sel_grids, how="intersection")
    res = rMisc.zonalStats(city_grids, inPop, minVal=0, verbose=False)
    res = pd.DataFrame(res, columns=["sum", "min", "max", "mean"])
    city_grids["wpPOP"] = res["sum"]
    city_grids["GDP_city"] = city_grids[gdp_col] * city_grids["wpPOP"]
    out_res[row["ID_HDC_G0"]] = {
        "TOTAL_GDP": city_grids["GDP_city"].sum(),
        "GDP_CELLS": city_grids.shape[0],
    }

In [None]:
city_gdp_res = pd.DataFrame(out_res).T
city_gdp_res.to_csv(os.path.join(gdp_folder, f"city_{gdp_col}_res.csv"))

In [None]:
gdp_folder

# DEBUGGING

In [None]:
inD = gpd.read_file(ucdb_file)
inD.head()