# Generate H3 geospatial data on AWS bucket

In [1]:
import sys, os, importlib, math, multiprocessing, boto3
import rasterio, geojson

import pandas as pd
import geopandas as gpd
import numpy as np

from h3 import h3
from tqdm import tqdm
from shapely.geometry import Polygon

sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")
import GOSTRocks.rasterMisc as rMisc
import GOSTRocks.ntlMisc as ntl
import GOSTRocks.mapMisc as mapMisc
from GOSTRocks.misc import tPrint

sys.path.append("../src")
import h3_helper
import country_zonal

%load_ext autoreload
%autoreload 2



In [2]:
# Define S3 parameters
bucket = 'wbg-geography01' 
prefix = 'Space2Stats/h3_spatial_data'
region = 'us-east-1'
s3client = boto3.client('s3', region_name=region)
out_folder = "/home/wb411133/projects/Space2Stats/"

In [3]:
admin_bounds = "/home/public/Data/GLOBAL/ADMIN/ADMIN2/HighRes_20230328/shp/WB_GAD_ADM0.shp"
inA = gpd.read_file(admin_bounds)
inA['ID'] = inA.index
inA.head()

Unnamed: 0,ISO_A3,ISO_A2,WB_A3,HASC_0,GAUL_0,WB_REGION,WB_STATUS,SOVEREIGN,NAM_0,geometry,ID
0,AFG,AF,AFG,AF,1,SAR,Member State,AFG,Afghanistan,"POLYGON ((70.04663 37.54360, 70.04676 37.54356...",0
1,ALB,AL,ALB,AL,3,ECA,Member State,ALB,Albania,"MULTIPOLYGON (((20.46186 41.55588, 20.46177 41...",1
2,AND,AD,ADO,AD,7,Other,Member State,AND,Andorra,"POLYGON ((1.46171 42.50602, 1.46176 42.50605, ...",2
3,ARE,AE,ARE,AE,255,MENA,Member State,ARE,United Arab Emirates,"MULTIPOLYGON (((53.83064 24.06414, 53.82975 24...",3
4,ARG,AR,ARG,AR,12,LCR,Member State,ARG,Argentina,"MULTIPOLYGON (((-58.43833 -34.00238, -58.44420...",4


In [4]:
all_args = []
h3_level = 6

for idx, row in inA.iterrows():
    cur_prefix = os.path.join(prefix, row['WB_REGION'], row['ISO_A3'])
    '''
    out_file = f's3://{bucket}/{cur_prefix}/h3_level_{h3_level}.geojson'
    try:
        xx = gpd.read_file(out_file)
    except:
        print(out_file)
    '''
    print(cur_prefix)
    all_args.append([row, h3_level])
    

Space2Stats/h3_spatial_data/SAR/AFG
Space2Stats/h3_spatial_data/ECA/ALB
Space2Stats/h3_spatial_data/Other/AND
Space2Stats/h3_spatial_data/MENA/ARE
Space2Stats/h3_spatial_data/LCR/ARG
Space2Stats/h3_spatial_data/ECA/ARM
Space2Stats/h3_spatial_data/LCR/ATG
Space2Stats/h3_spatial_data/Other/AUS
Space2Stats/h3_spatial_data/Other/AUS
Space2Stats/h3_spatial_data/Other/CXR
Space2Stats/h3_spatial_data/Other/CCK
Space2Stats/h3_spatial_data/Other/HMD
Space2Stats/h3_spatial_data/Other/NFK
Space2Stats/h3_spatial_data/Other/AUT
Space2Stats/h3_spatial_data/ECA/AZE
Space2Stats/h3_spatial_data/AFR/BDI
Space2Stats/h3_spatial_data/Other/BEL
Space2Stats/h3_spatial_data/AFR/BEN
Space2Stats/h3_spatial_data/AFR/BFA
Space2Stats/h3_spatial_data/SAR/BGD
Space2Stats/h3_spatial_data/ECA/BGR
Space2Stats/h3_spatial_data/MENA/BHR
Space2Stats/h3_spatial_data/LCR/BHS
Space2Stats/h3_spatial_data/ECA/BIH
Space2Stats/h3_spatial_data/ECA/BLR
Space2Stats/h3_spatial_data/LCR/BLZ
Space2Stats/h3_spatial_data/LCR/BOL
Space2St

In [5]:
def generate_grid(row, lvl):
    cur_prefix = os.path.join(prefix, row['WB_REGION'], row['ISO_A3'])
    out_file = f's3://{bucket}/{cur_prefix}/h3_level_{lvl}.geojson'
    tPrint(f"Starting {cur_prefix}")    
    cur_gpd = gpd.GeoDataFrame(pd.DataFrame(row).transpose(), geometry='geometry', crs=inA.crs)
    zonalC = country_zonal.country_h3_zonal(row['ISO_A3'], cur_gpd, "ID", lvl, out_folder)
    try:
        h3_grid = zonalC.generate_h3_grid()
        h3_grid.to_file(out_file, driver="GeoJSON")
        tPrint(f"Completed {cur_prefix}")
        return(h3_grid)
    except:
        tPrint(f"Error processing lvl {lvl} for {row['ISO_A3']}")



In [None]:
with multiprocessing.Pool(processes=min([70,len(all_args)])) as pool:
    results = pool.starmap(generate_grid, all_args)

14:19:51	Starting Space2Stats/h3_spatial_data/ECA/ALB14:19:51	Starting Space2Stats/h3_spatial_data/Other/AND

14:19:51	Starting Space2Stats/h3_spatial_data/SAR/AFG
14:19:51	Starting Space2Stats/h3_spatial_data/MENA/ARE
14:19:51	Starting Space2Stats/h3_spatial_data/Other/AUS14:19:51	Starting Space2Stats/h3_spatial_data/LCR/ATG

14:19:51	Starting Space2Stats/h3_spatial_data/ECA/ARM


Generating h3 grid level 5: 100%|██████████| 4/4 [00:00<00:00, 2587.88it/s]


14:19:51	Starting Space2Stats/h3_spatial_data/LCR/ARG


Generating h3 grid level 5: 100%|██████████| 58/58 [00:00<00:00, 823.74it/s]
Generating h3 grid level 5:   0%|          | 0/19 [00:00<?, ?it/s]

14:19:51	Starting Space2Stats/h3_spatial_data/Other/HMD14:19:51	Starting Space2Stats/h3_spatial_data/Other/CXR14:19:51	Starting Space2Stats/h3_spatial_data/Other/CCK

14:19:51	Starting Space2Stats/h3_spatial_data/Other/NFK


Generating h3 grid level 5: 100%|██████████| 16/16 [00:00<00:00, 1909.32it/s]


14:19:51	Error processing lvl 5 for AUS


Generating h3 grid level 5:   0%|          | 0/42 [00:00<?, ?it/s]

14:19:51	Starting Space2Stats/h3_spatial_data/Other/AUT


Generating h3 grid level 5: 100%|██████████| 42/42 [00:00<00:00, 2342.41it/s]
Generating h3 grid level 5:   0%|          | 0/74 [00:00<?, ?it/s]

14:19:51	Starting Space2Stats/h3_spatial_data/ECA/AZE
14:19:51	Starting Space2Stats/h3_spatial_data/AFR/BDI
14:19:51	Starting Space2Stats/h3_spatial_data/Other/BEL
14:19:52	Starting Space2Stats/h3_spatial_data/AFR/BEN
14:19:52	Starting Space2Stats/h3_spatial_data/Other/AUS


Generating h3 grid level 5: 100%|██████████| 74/74 [00:00<00:00, 1108.39it/s]


14:19:52	Starting Space2Stats/h3_spatial_data/AFR/BFA
14:19:52	Starting Space2Stats/h3_spatial_data/MENA/BHR
14:19:52	Starting Space2Stats/h3_spatial_data/SAR/BGD
14:19:52	Starting Space2Stats/h3_spatial_data/ECA/BGR
14:19:52	Starting Space2Stats/h3_spatial_data/ECA/BIH
14:19:52	Starting Space2Stats/h3_spatial_data/LCR/BHS
14:19:52	Error processing lvl 5 for NFK14:19:52	Starting Space2Stats/h3_spatial_data/LCR/BLZ

14:19:52	Starting Space2Stats/h3_spatial_data/ECA/BLR
14:19:52	Starting Space2Stats/h3_spatial_data/LCR/BOL
14:19:52	Error processing lvl 5 for CCK


Generating h3 grid level 5:   0%|          | 0/97 [00:00<?, ?it/s]

14:19:52	Starting Space2Stats/h3_spatial_data/LCR/BRB
14:19:52	Starting Space2Stats/h3_spatial_data/Other/BRN


Generating h3 grid level 5:   0%|          | 0/4 [00:00<?, ?it/s]

14:19:52	Starting Space2Stats/h3_spatial_data/SAR/BTN


Generating h3 grid level 5: 100%|██████████| 19/19 [00:00<00:00, 36.12it/s]
Generating h3 grid level 5: 100%|██████████| 4/4 [00:00<00:00, 194.60it/s]
Generating h3 grid level 5:   0%|          | 0/24 [00:00<?, ?it/s]

14:19:52	Starting Space2Stats/h3_spatial_data/AFR/BWA


Generating h3 grid level 5:  39%|███▉      | 38/97 [00:00<00:00, 379.23it/s]

14:19:52	Starting Space2Stats/h3_spatial_data/LCR/BRA

Generating h3 grid level 5:   0%|          | 0/21 [00:00<?, ?it/s]


14:19:52	Starting Space2Stats/h3_spatial_data/AFR/CAF


Generating h3 grid level 5: 100%|██████████| 97/97 [00:00<00:00, 498.38it/s]
Generating h3 grid level 5: 100%|██████████| 2/2 [00:00<00:00,  2.13it/s]s]
Generating h3 grid level 5: 100%|██████████| 21/21 [00:00<00:00, 74.76it/s]
Generating h3 grid level 5: 100%|██████████| 7/7 [00:00<00:00,  8.58it/s]


14:19:53	Completed Space2Stats/h3_spatial_data/Other/CXR
14:19:53	Completed Space2Stats/h3_spatial_data/Other/AND
14:19:53	Completed Space2Stats/h3_spatial_data/LCR/ATG
14:19:53	Completed Space2Stats/h3_spatial_data/Other/HMD


Generating h3 grid level 5:   0%|          | 0/933 [00:00<?, ?it/s]

14:19:53	Completed Space2Stats/h3_spatial_data/ECA/ALB


Generating h3 grid level 5:   4%|▍         | 1/24 [00:01<00:23,  1.03s/it]

14:19:53	Completed Space2Stats/h3_spatial_data/LCR/BRB


Generating h3 grid level 5: 100%|██████████| 24/24 [00:01<00:00, 23.03it/s]


14:19:53	Completed Space2Stats/h3_spatial_data/MENA/BHR


Generating h3 grid level 5:  20%|██        | 188/933 [00:00<00:01, 551.09it/s]

14:19:53	Completed Space2Stats/h3_spatial_data/Other/BRN


Generating h3 grid level 5:   3%|▎         | 3/98 [00:00<00:12,  7.42it/s]it/s]

14:19:53	Completed Space2Stats/h3_spatial_data/ECA/ARM


Generating h3 grid level 5: 100%|██████████| 933/933 [00:00<00:00, 1457.21it/s]
Generating h3 grid level 5:  31%|███       | 76/249 [00:00<00:00, 747.61it/s]

14:19:54	Starting Space2Stats/h3_spatial_data/Other/CHE
14:19:54	Completed Space2Stats/h3_spatial_data/ECA/BIH
14:19:54	Completed Space2Stats/h3_spatial_data/Other/BEL
14:19:54	Starting Space2Stats/h3_spatial_data/EAP/MAC14:19:54	Starting Space2Stats/h3_spatial_data/EAP/HKG



Generating h3 grid level 5: 100%|██████████| 5/5 [00:00<00:00, 268.36it/s]
Generating h3 grid level 5: 100%|██████████| 13/13 [00:02<00:00,  5.80it/s]/s] 
Generating h3 grid level 5:  64%|██████▍   | 631/989 [00:01<00:00, 491.66it/s]

14:19:54	Completed Space2Stats/h3_spatial_data/SAR/BTN
14:19:54	Completed Space2Stats/h3_spatial_data/LCR/BLZ


Generating h3 grid level 5:  84%|████████▍ | 829/989 [00:01<00:00, 720.99it/s]

14:19:55	Starting Space2Stats/h3_spatial_data/EAP/TWN


Generating h3 grid level 5: 100%|██████████| 989/989 [00:01<00:00, 718.49it/s]


14:19:55	Starting Space2Stats/h3_spatial_data/AFR/CIV
14:19:55	Starting Space2Stats/h3_spatial_data/AFR/CMR
14:19:55	Starting Space2Stats/h3_spatial_data/LCR/CHL
14:19:55	Starting Space2Stats/h3_spatial_data/AFR/COM
14:19:55	Starting Space2Stats/h3_spatial_data/AFR/CPV
14:19:55	Starting Space2Stats/h3_spatial_data/LCR/CRI
14:19:55	Starting Space2Stats/h3_spatial_data/AFR/COD
14:19:55	Starting Space2Stats/h3_spatial_data/AFR/COG
14:19:55	Starting Space2Stats/h3_spatial_data/ECA/CYP


Generating h3 grid level 5:   0%|          | 0/1671 [00:00<?, ?it/s]

14:19:55	Starting Space2Stats/h3_spatial_data/Other/CUB
14:19:55	Starting Space2Stats/h3_spatial_data/ECA/CZE
14:19:55	Starting Space2Stats/h3_spatial_data/LCR/DMA

Generating h3 grid level 5:   0%|          | 0/426 [00:00<?, ?it/s]




Generating h3 grid level 5: 100%|██████████| 22/22 [00:00<00:00, 423.11it/s]


14:19:55	Starting Space2Stats/h3_spatial_data/Other/DEU
14:19:55	Starting Space2Stats/h3_spatial_data/Other/FRO
14:19:55	Starting Space2Stats/h3_spatial_data/Other/DNK14:19:55	Starting Space2Stats/h3_spatial_data/EAP/CHN
14:19:55	Completed Space2Stats/h3_spatial_data/AFR/BDI



Generating h3 grid level 5:   0%|          | 0/176 [00:00<?, ?it/s]72.59it/s]

14:19:55	Starting Space2Stats/h3_spatial_data/LCR/DOM


Generating h3 grid level 5:  11%|█         | 185/1671 [00:00<00:01, 783.12it/s]

14:19:55	Starting Space2Stats/h3_spatial_data/MENA/EGY14:19:55	Starting Space2Stats/h3_spatial_data/MENA/DZA
