In [1]:
import sys, os
import rasterio

import pandas as pd
import geopandas as gpd

from tqdm.notebook import tqdm

sys.path.append(r"C:\WBG\Work\Code\GOSTrocks\src")
import GOSTrocks.shapeMisc as shapeMisc
import GOSTrocks.dataMisc as dMisc
import GOSTrocks.rasterMisc as rMisc
from GOSTrocks.misc import tPrint

%load_ext autoreload
%autoreload 2

In [2]:
gdp_folder = r"C:\WBG\Work\data\GDP\CHICAGO"
gdp_file = os.path.join(gdp_folder, "final_GDP_0_25deg_postadjust_pop_dens_0_01_adjust.csv")
gdp_shp_file = os.path.join(gdp_folder, "shapefile", "geom_0_25deg.shp")
gdp_col = "cell_GDPC_const_2017_PPP"
sel_year = 2017

ucdb_file = "C:/WBG/Work/data/URBAN/GHS_STAT_UCDB2015MT_GLOBE_R2019A/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg"
in_ucdb = gpd.read_file(ucdb_file).loc[:, ['ID_HDC_G0', 'geometry']]

pop_file = "C:/WBG/Work/data/POP/ppp_2020_1km_Aggregated.tif"
inPop = rasterio.open(pop_file)

bad_cols = ['method', 'cell_size']
gdp_df = pd.read_csv(gdp_file).drop(columns=bad_cols)
gdp_gdf = gpd.read_file(gdp_shp_file)

In [3]:
gdp_df.head()

Unnamed: 0,cell_id,iso,year,predicted_GCP_const_2017_USD,predicted_GCP_current_USD,predicted_GCP_const_2017_PPP,predicted_GCP_current_PPP,pop_cell,cell_GDPC_const_2017_USD,cell_GDPC_current_USD,cell_GDPC_const_2017_PPP,cell_GDPC_current_PPP,subcell_id,subcell_id_0_25,is_cell_censored,national_population,longitude,latitude
0,42801,AUS,2012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,4,1,22928000.0,140.75,-29.0
1,42802,AUS,2012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,2,1,22928000.0,141.0,-29.0
2,42802,AUS,2012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2,4,1,22928000.0,141.25,-29.0
3,42802,AUS,2012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,2,1,22928000.0,141.5,-29.0
4,42802,AUS,2012,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,4,1,22928000.0,141.75,-29.0


In [4]:
gdp_gdf.head()

Unnamed: 0,iso,fid_2,cell_id,sbcll_d,s__0_25,geometry
0,AFG,297768.0,18611,2,4,"MULTIPOLYGON (((70.33517 38.01025, 70.33531 38..."
1,AFG,297773.0,18611,4,1,"POLYGON ((70.557 38.25525, 70.55699 38.25541, ..."
2,AFG,297774.0,18611,4,2,"POLYGON ((70.50496 38.12181, 70.50502 38.12186..."
3,AFG,297775.0,18611,4,3,"POLYGON ((70.81003 38.44411, 70.81168 38.4442,..."
4,AFG,297776.0,18611,4,4,"POLYGON ((71 38, 70.75 38, 70.75 38.25, 71 38...."


## Join the selected GDP data to the shapefile


In [5]:
sel_gdp = gdp_df.loc[gdp_df.year == sel_year, ['cell_id', 'iso', gdp_col, 'pop_cell', 'subcell_id', 'subcell_id_0_25']]
sel_gdp.head()

Unnamed: 0,cell_id,iso,cell_GDPC_const_2017_PPP,pop_cell,subcell_id,subcell_id_0_25
1358460,42801,AUS,0.0,0.0,4,4
1358461,42802,AUS,0.0,0.0,2,2
1358462,42802,AUS,0.0,0.0,2,4
1358463,42802,AUS,0.0,0.0,4,2
1358464,42802,AUS,0.0,0.0,4,4


In [6]:
# Create consistent, single column keys
gdp_gdf['gID'] = gdp_gdf['cell_id'].astype(str) + "_" + gdp_gdf['sbcll_d'].astype(str) + "_" + gdp_gdf['s__0_25'].astype(str)
sel_gdp['gID'] = sel_gdp['cell_id'].astype(str) + "_" + sel_gdp['subcell_id'].astype(str) + "_" + sel_gdp['subcell_id_0_25'].astype(str)

In [7]:
combo_gdp = pd.merge(gdp_gdf, sel_gdp, on='gID', how='inner').loc[:, ['gID', 'iso_x', gdp_col, 'pop_cell', 'geometry']]
combo_gdp.head()

Unnamed: 0,gID,iso_x,cell_GDPC_const_2017_PPP,pop_cell,geometry
0,18611_2_4,AFG,3e-06,6966.0,"MULTIPOLYGON (((70.33517 38.01025, 70.33531 38..."
1,18611_2_4,AFG,4e-06,2587.0,"MULTIPOLYGON (((70.33517 38.01025, 70.33531 38..."
2,18611_4_1,AFG,3e-06,8340.0,"POLYGON ((70.557 38.25525, 70.55699 38.25541, ..."
3,18611_4_1,AFG,7e-06,5505.0,"POLYGON ((70.557 38.25525, 70.55699 38.25541, ..."
4,18611_4_2,AFG,1e-06,18136.0,"POLYGON ((70.50496 38.12181, 70.50502 38.12186..."


In [8]:
combo_gdp.crs

<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

In [9]:
out_res = {}
for idx, row in tqdm(in_ucdb.iterrows(), total=in_ucdb.shape[0]):
    # Identify the cells that intersect this city and union together
    sel_city = gpd.GeoDataFrame([row], crs=in_ucdb.crs)
    sel_overlay = gpd.sjoin(sel_city, combo_gdp, how='inner', predicate='intersects')
    sel_grids = combo_gdp.loc[combo_gdp['gID'].isin(sel_overlay.gID)]


    city_grids = gpd.overlay(sel_city, sel_grids, how='intersection')
    res = rMisc.zonalStats(city_grids, inPop, minVal=0, verbose=False)
    res = pd.DataFrame(res, columns=['sum', 'min', 'max', 'mean'])
    city_grids['wpPOP'] = res['sum']
    city_grids['GDP_city'] = city_grids[gdp_col] * city_grids['wpPOP']
    out_res[row['ID_HDC_G0']] = {
                                    'TOTAL_GDP': city_grids['GDP_city'].sum(),
                                    'GDP_CELLS': city_grids.shape[0],
                                }   

  0%|          | 0/13135 [00:00<?, ?it/s]

  city_grids = gpd.overlay(sel_city, sel_grids, how='intersection')
  city_grids = gpd.overlay(sel_city, sel_grids, how='intersection')
  city_grids = gpd.overlay(sel_city, sel_grids, how='intersection')
  city_grids = gpd.overlay(sel_city, sel_grids, how='intersection')
  city_grids = gpd.overlay(sel_city, sel_grids, how='intersection')
  city_grids = gpd.overlay(sel_city, sel_grids, how='intersection')
  city_grids = gpd.overlay(sel_city, sel_grids, how='intersection')
  city_grids = gpd.overlay(sel_city, sel_grids, how='intersection')


In [12]:
city_gdp_res = pd.DataFrame(out_res).T
city_gdp_res.to_csv(os.path.join(gdp_folder, f"city_{gdp_col}_res.csv"))

In [13]:
gdp_folder

'C:\\WBG\\Work\\data\\GDP\\CHICAGO'

# DEBUGGING

In [11]:
inD = gpd.read_file(ucdb_file)
inD.head()

Unnamed: 0,ID_HDC_G0,QA2_1V,AREA,BBX_LATMN,BBX_LONMN,BBX_LATMX,BBX_LONMX,GCPNT_LAT,GCPNT_LON,CTR_MN_NM,...,EX_SS_P00,EX_SS_P15,EX_EQ19PGA,EX_EQ19MMI,EX_EQ19_Q,EX_HW_IDX,SDG_LUE9015,SDG_A2G14,SDG_OS15MX,geometry
0,1.0,1.0,185.0,21.247683,-158.043016,21.422193,-157.730529,21.340678,-157.893497,United States,...,397443.031445,444041.529529,,,missing,,0.074385,0.226415,56.41,"MULTIPOLYGON (((-158.01244 21.42219, -157.9915..."
1,2.0,2.0,42.0,-17.641184,-149.628088,-17.517631,-149.508018,-17.534103,-149.568053,French Polynesia,...,0.0,0.0,,,missing,,0.128,0.284119,,"MULTIPOLYGON (((-149.56967 -17.51763, -149.508..."
2,3.0,1.0,55.0,34.858517,-120.475511,34.989334,-120.389183,34.923123,-120.434372,United States,...,0.0,0.0,0.0,0.0,available,2.79174,0.48114,0.040129,23.64,"MULTIPOLYGON (((-120.46375 34.98933, -120.4411..."
3,4.0,1.0,48.0,36.582997,-121.952215,36.635743,-121.811816,36.60772,-121.882378,United States,...,0.0,0.0,0.0,0.0,available,,0.44484,0.138683,42.17,"MULTIPOLYGON (((-121.95221 36.63574, -121.9179..."
4,5.0,1.0,60.0,34.38822,-119.853855,34.457831,-119.658413,34.427664,-119.743693,United States,...,0.0,0.0,0.0,0.0,available,4.25502,0.55676,0.061348,36.5,"MULTIPOLYGON (((-119.82444 34.45783, -119.8131..."
