In [2]:
import sys, os, importlib, math, multiprocessing, boto3, pickle
import rasterio, geojson

import pandas as pd
import geopandas as gpd
import numpy as np

from h3 import h3
from tqdm import tqdm
from shapely.geometry import Polygon

sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")
import GOSTrocks.rasterMisc as rMisc
import GOSTrocks.ntlMisc as ntl
import GOSTrocks.mapMisc as mapMisc
from GOSTrocks.misc import tPrint

sys.path.append("../../../src")
import h3_helper

%load_ext autoreload
%autoreload 2

In [54]:
# Define the AWS variables
# Define S3 parameters
bucket = 'wbg-geography01' 
prefix = 'Space2Stats/h3_stats_data/ADM_GLOBAL/VIIRS_ANNUAL_EOG'
region = 'us-east-1'
s3client = boto3.client('s3', region_name=region)

#Define local variables
admin_bounds = "/home/wb411133/data/Global/ADMIN/Admin2_Polys.shp"
admin0_bounds = "/home/wb411133/data/Global/ADMIN/Admin0_Polys.shp"
out_folder = "/home/wb411133/projects/Space2Stats/VIIRS_ADMIN"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)


In [None]:
inA = gpd.read_file(admin_bounds)
inA['id'] = list(inA.index)
inA = inA.loc[:,['WB_ADM0_NA','WB_ADM1_NA','WB_ADM2_NA','WB_REGION','id','geometry']]

In [9]:
# Search for zonal results
# Loop through the S3 bucket and get all the keys for files that are .tif 
more_results = True
loops = 0
verbose=True
while more_results:
    if verbose:
        print(f"Completed loop: {loops}")
    if loops > 0:
        objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=token)
    else:
        objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix)
    more_results = objects['IsTruncated']
    if more_results:
        token = objects['NextContinuationToken']
    loops += 1
    for res in objects['Contents']:
        if res['Key'].endswith('csv'):            
            cur_variable = res['Key'].split("/")[-3]
            good_res.append(res['Key'])
            
res = {}
for res_file in good_res:
    final_file = f's3://{bucket}/{res_file}'
    def_file = os.path.basename(final_file)
    try:
        res[def_file].append(final_file)
    except:
        res[def_file] = [final_file]

Completed loop: 0


In [41]:
for key, items in res.items():
    key = key.split(".")[0]
    all_res_file = os.path.join(out_folder, f'ZONAL_ADMIN2_{key}.csv')
    national_res_file = os.path.join(out_folder, f'ZONAL_National_{key}.csv')
    
    all_dfs = [pd.read_csv(x) for x in items]
    combo_res = pd.concat(all_dfs)
    all_res = pd.merge(inA, combo_res, on='id')
    pd.DataFrame(all_res).drop(['geometry'], axis=1).to_csv(all_res_file)
    
    adm0_res = all_res.groupby(['WB_ADM0_NA']).agg({"SUM":'sum','WB_ADM2_NA':'count'}).reset_index()
    adm0_res.columns = ['WB_ADM0_NA', 'SUM_of_Lights', 'ADM2_Count']
    adm0_res.to_csv(national_res_file)
    
    tPrint(key)

12:15:51	VNL_npp_2023_global_vcmslcfg_v2_c202402081600
12:15:53	VNL_v21_npp_2013_global_vcmcfg_c202205302300
12:15:55	VNL_v21_npp_2014_global_vcmslcfg_c202205302300
12:15:56	VNL_v21_npp_2015_global_vcmslcfg_c202205302300
12:15:57	VNL_v21_npp_2016_global_vcmslcfg_c202205302300
12:15:59	VNL_v21_npp_2017_global_vcmslcfg_c202205302300
12:16:00	VNL_v21_npp_2018_global_vcmslcfg_c202205302300
12:16:01	VNL_v21_npp_2019_global_vcmslcfg_c202205302300
12:16:03	VNL_v21_npp_2020_global_vcmslcfg_c202205302300
12:16:04	VNL_v21_npp_2021_global_vcmslcfg_c202205302300
12:16:06	VNL_v22_npp-j01_2022_global_vcmslcfg_c202303062300


In [51]:
national_res_files = [os.path.join(out_folder, x) for x in os.listdir(out_folder) if "National" in x]
national_summary = inA.copy()
for nfile in national_res_files:
    year = os.path.basename(nfile).split("_")[5]
    try:
        year = (int(year))
    except:
        year = os.path.basename(nfile).split("_")[4]
        year = (int(year))
    curD = pd.read_csv(nfile, index_col=0)
    if nfile == national_res_files[0]:
        curD = curD.drop(['ADM2_Count'], axis=1)
        curD.columns = ['WB_ADM0_NA',f'SoL_{year}']
        final = curD
    else:
        final[f'SoL_{year}'] = curD['SUM_of_Lights']

In [53]:
final.to_csv(f'{out_folder}_national_summaries.csv')

In [60]:
adm0 = [curD.dissolve() for lbl, curD in inA.groupby('WB_ADM0_NA')]
adm0

[                                            geometry   WB_ADM0_NA  WB_ADM1_NA  \
 0  POLYGON ((63.31910 29.45670, 63.30590 29.45889...  Afghanistan  Badakhshan   
 
   WB_ADM2_NA WB_REGION  id  
 0    Baharak       SAR   0  ,
                                             geometry WB_ADM0_NA WB_ADM1_NA  \
 0  MULTIPOLYGON (((19.83331 40.05114, 19.82395 40...    Albania      Berat   
 
   WB_ADM2_NA WB_REGION   id  
 0      Berat       ECA  329  ,
                                             geometry WB_ADM0_NA WB_ADM1_NA  \
 0  MULTIPOLYGON (((1.28911 20.72614, 1.27914 20.7...    Algeria      Adrar   
 
   WB_ADM2_NA WB_REGION   id  
 0      Adrar      MENA  703  ,
                                             geometry             WB_ADM0_NA  \
 0  MULTIPOLYGON (((-170.66429 -14.25196, -170.660...  American Samoa (U.S.)   
 
               WB_ADM1_NA             WB_ADM2_NA WB_REGION    id  
 0  American Samoa (U.S.)  American Samoa (U.S.)       EAP  2244  ,
                              

In [67]:
adm0_full = pd.concat(adm0)
adm0_full = gpd.GeoDataFrame(adm0_full, geometry='geometry', crs=4326)
adm0_full = adm0_full.drop(['WB_ADM1_NA','WB_ADM2_NA','id'], axis=1)
adm0_full.to_file(f'{out_folder}_ADM0_bounds.geojson', driver='GeoJSON')