# Run zonal stats on H3 cells

This notebook focuses on using AWS as both input and output

In [1]:
import sys, os, importlib, math, multiprocessing, boto3
import rasterio, geojson

import pandas as pd
import geopandas as gpd
import numpy as np

from h3 import h3
from tqdm.notebook import tqdm
from shapely.geometry import Polygon

sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")
import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.ntlMisc as ntl
import GOSTRocks.mapMisc as mapMisc
from GOSTRocks.misc import tPrint

sys.path.append("../src")
import h3_helper
import country_zonal

%load_ext autoreload
%autoreload 2



In [2]:
# Define S3 parameters
bucket = 'wbg-geography01' 
geo_prefix   = 'Space2Stats/h3_spatial_data'
stats_prefix = 'Space2Stats/h3_stats_data'
region = 'us-east-1'
s3client = boto3.client('s3', region_name=region)
out_folder = "/home/wb411133/projects/Space2Stats/"

h3_lvl = 5

In [3]:
admin_bounds = "/home/public/Data/GLOBAL/ADMIN/ADMIN2/HighRes_20230328/shp/WB_GAD_ADM0.shp"
inA = gpd.read_file(admin_bounds)
inA['ID'] = inA.index
inA = inA.loc[inA['WB_REGION'] == "LCR"]
inA.head()

Unnamed: 0,ISO_A3,ISO_A2,WB_A3,HASC_0,GAUL_0,WB_REGION,WB_STATUS,SOVEREIGN,NAM_0,geometry,ID
4,ARG,AR,ARG,AR,12,LCR,Member State,ARG,Argentina,"MULTIPOLYGON (((-58.43833 -34.00238, -58.44420...",4
6,ATG,AG,ATG,AG,11,LCR,Member State,ATG,Antigua and Barbuda,"MULTIPOLYGON (((-61.84439 17.72899, -61.84423 ...",6
22,BHS,BS,BHS,BS,20,LCR,Member State,BHS,The Bahamas,"MULTIPOLYGON (((-73.31020 20.96701, -73.31006 ...",22
25,BLZ,BZ,BLZ,BZ,28,LCR,Member State,BLZ,Belize,"MULTIPOLYGON (((-87.61940 17.17505, -87.61993 ...",25
26,BOL,BO,BOL,BO,33,LCR,Member State,BOL,Bolivia,"POLYGON ((-65.39157 -10.37980, -65.39104 -10.3...",26


In [4]:
# Define layers to run zonal stats against
## TODO - this section should pull from Benny's config files
global_pop_layer = "/home/public/Data/GLOBAL/GHSL/Pop/GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif"
global_urban = "/home/public/Data/GLOBAL/GHSL/SMOD/GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
ntl_layers = ntl.aws_search_ntl()

In [5]:
# Set up arguments for all selected countries
all_args = []
for idx, row in inA.iterrows():
    h3_path = os.path.join("s3://", bucket, geo_prefix, row['WB_REGION'], row['ISO_A3'], f"h3_level_{h3_lvl}.geojson")
    all_args.append([row['ISO_A3'], h3_path, row])
    print(h3_path)

s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/ARG/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/ATG/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/BHS/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/BLZ/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/BOL/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/BRA/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/BRB/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/CHL/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/CRI/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/DMA/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/DOM/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/ECU/h3_level_5.geojson
s3://wbg-geography01/Space2Stats/h3_spatial_data/LCR/GRD/h3_level_5.geojson
s3://wbg-geo

In [7]:
def run_ntl_zonal(iso3, h3_cells, row):
    gpd_row = gpd.GeoDataFrame([row], crs=inA.crs, geometry='geometry')
    zonalC = country_zonal.country_h3_zonal(iso3, gpd_row, "ID", 5, '', h3_grid=h3_cells)
    for ntl_file in tqdm(ntl_layers):
        cDate = os.path.basename(ntl_file).split("_")[2]
        out_file = os.path.join('s3://', bucket, stats_prefix,  row['WB_REGION'], row['ISO_A3'], f"NTL_{cDate}_{h3_lvl}.csv")
        try:
            pd.read_csv(out_file, index_col=0, nrows=2)
        except:
            zonal_res_pop = zonalC.zonal_raster_urban(ntl_file, global_urban)
            zonal_res_pop.to_csv(out_file)

In [None]:
with multiprocessing.Pool(processes=min([70,len(all_args)])) as pool:
    results = pool.starmap(run_ntl_zonal, all_args)


Generating h3 grid level 5: 100%|██████████| 4/4 [00:00<00:00, 237.59it/s]

Generating h3 grid level 5: 100%|██████████| 58/58 [00:00<00:00, 867.93it/s]

Generating h3 grid level 5:   0%|          | 0/3 [00:00<?, ?it/s][A
Generating h3 grid level 5:   0%|          | 0/21 [00:00<?, ?it/s][A
Generating h3 grid level 5:   0%|          | 0/6 [00:00<?, ?it/s][A
Generating h3 grid level 5: 100%|██████████| 42/42 [00:00<00:00, 538.95it/s]

Generating h3 grid level 5:   0%|          | 0/933 [00:00<?, ?it/s][A
Generating h3 grid level 5:   0%|          | 0/28 [00:00<?, ?it/s]
Generating h3 grid level 5: 100%|██████████| 3/3 [00:00<00:00,  8.12it/s][A

Generating h3 grid level 5: 100%|██████████| 61/61 [00:00<00:00, 930.78it/s]

Generating h3 grid level 5:   0%|          | 0/17 [00:00<?, ?it/s][A
Generating h3 grid level 5: 100%|██████████| 21/21 [00:00<00:00, 48.27it/s][A

Generating h3 grid level 5:  39%|███▉      | 11/28 [00:00<00:00, 70.75it/s][A
Generating h3 grid level 5:   0%|    

Generating h3 grid level 5:  89%|████████▊ | 7024/7916 [00:03<00:00, 1891.20it/s][A
Generating h3 grid level 5:  91%|█████████▏| 7237/7916 [00:03<00:00, 1897.99it/s][A
Generating h3 grid level 5:  95%|█████████▌| 7537/7916 [00:03<00:00, 2174.50it/s][A
Generating h3 grid level 5: 100%|██████████| 7916/7916 [00:03<00:00, 2062.04it/s][A

Generating h3 grid level 5: 100%|██████████| 775/775 [00:09<00:00, 78.51it/s]  [A

Generating h3 grid level 5:  36%|███▌      | 252/704 [00:09<00:21, 21.50it/s]  [A
Generating h3 grid level 5:  53%|█████▎    | 370/704 [00:10<00:08, 37.72it/s][A
Generating h3 grid level 5:  63%|██████▎   | 441/704 [00:10<00:05, 50.12it/s][A
Generating h3 grid level 5:  75%|███████▌  | 530/704 [00:10<00:02, 72.73it/s][A
Generating h3 grid level 5: 100%|██████████| 704/704 [00:10<00:00, 66.47it/s][A

Generating h3 grid level 5:  25%|██▌       | 424/1671 [00:22<00:00, 1351.27it/s][A
Generating h3 grid level 5:  39%|███▊      | 645/1671 [00:22<00:48, 21.29it/s]  [A

Generating h3 grid level 5:  40%|███▉      | 17687/44283 [00:08<00:10, 2431.51it/s][A
Generating h3 grid level 5:  41%|████      | 17949/44283 [00:08<00:10, 2431.01it/s][A
Generating h3 grid level 5:  41%|████      | 18205/44283 [00:08<00:13, 1891.36it/s][A
Generating h3 grid level 5:  42%|████▏     | 18421/44283 [00:08<00:13, 1908.86it/s][A
Generating h3 grid level 5:  42%|████▏     | 18769/44283 [00:08<00:11, 2287.85it/s][A
Generating h3 grid level 5:  43%|████▎     | 19022/44283 [00:09<00:10, 2301.03it/s][A
Generating h3 grid level 5:  44%|████▎     | 19268/44283 [00:09<00:12, 2036.26it/s][A
Generating h3 grid level 5:  44%|████▍     | 19488/44283 [00:09<00:13, 1780.51it/s][A
Generating h3 grid level 5:  44%|████▍     | 19681/44283 [00:09<00:14, 1663.04it/s][A
Generating h3 grid level 5:  45%|████▌     | 19955/44283 [00:09<00:12, 1913.74it/s][A
Generating h3 grid level 5:  46%|████▌     | 20185/44283 [00:09<00:11, 2009.50it/s][A
Generating h3 grid level 5:  46%|████▌     

Generating h3 grid level 5:  31%|███       | 739/2382 [02:11<00:07, 221.23it/s][A
Generating h3 grid level 5:  32%|███▏      | 774/2382 [02:11<00:06, 249.49it/s][A
Generating h3 grid level 5:  34%|███▍      | 805/2382 [02:11<00:05, 263.73it/s][A
Generating h3 grid level 5:  35%|███▌      | 840/2382 [02:11<00:05, 286.04it/s][A
Generating h3 grid level 5:  37%|███▋      | 872/2382 [02:11<00:05, 291.28it/s][A
Generating h3 grid level 5:  38%|███▊      | 904/2382 [02:11<00:05, 295.48it/s][A
Generating h3 grid level 5:  40%|███▉      | 941/2382 [02:12<00:04, 314.60it/s][A
Generating h3 grid level 5:  41%|████      | 974/2382 [02:12<00:04, 302.49it/s][A
Generating h3 grid level 5:  42%|████▏     | 1006/2382 [02:12<00:05, 244.37it/s][A
Generating h3 grid level 5:  43%|████▎     | 1033/2382 [02:12<00:05, 240.61it/s][A
Generating h3 grid level 5:  44%|████▍     | 1059/2382 [02:12<00:05, 231.51it/s][A
Generating h3 grid level 5:  46%|████▌     | 1084/2382 [02:12<00:05, 230.24it/s][A


In [None]:
ntl_file = ntl_layers[0]


In [None]:
row