In [1]:
import sys, os, importlib, math, multiprocessing, boto3, pickle
import rasterio, geojson

import pandas as pd
import geopandas as gpd
import numpy as np

from h3 import h3
from tqdm import tqdm
from shapely.geometry import Polygon

sys.path.insert(0, "/home/wb411133/Code/gostrocks/src")
import GOSTrocks.rasterMisc as rMisc
import GOSTrocks.ntlMisc as ntl
import GOSTrocks.mapMisc as mapMisc
from GOSTrocks.misc import tPrint

sys.path.append("../../../src")
import h3_helper

%load_ext autoreload
%autoreload 2



In [15]:
# Define the AWS variables
# Define S3 parameters
bucket = 'wbg-geography01' 
prefix = 'Space2Stats/h3_stats_data/ADM_GLOBAL/VIIRS_Monthly_LEN'
region = 'us-east-1'
s3client = boto3.client('s3', region_name=region)

#Define local variables
admin_bounds = "/home/wb411133/data/Global/ADMIN/Admin2_Polys.shp"
admin0_bounds = "/home/wb411133/data/Global/ADMIN/Admin0_Polys.shp"
out_folder = "/home/wb411133/projects/Space2Stats/VIIRS_ADMIN"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)


In [11]:
inA = gpd.read_file(admin_bounds)
inA['id'] = list(inA.index)
inA = inA.loc[:,['WB_ADM0_NA','WB_ADM1_NA','WB_ADM2_NA','WB_REGION','id','geometry']]

In [16]:
# Search for zonal results
# Loop through the S3 bucket and get all the keys for files that are .tif 
more_results = True
loops = 0
verbose=True
good_res = []
while more_results:
    if verbose:
        print(f"Completed loop: {loops}")
    if loops > 0:
        objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=token)
    else:
        objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix)
    more_results = objects['IsTruncated']
    if more_results:
        token = objects['NextContinuationToken']
    loops += 1
    for res in objects['Contents']:
        if res['Key'].endswith('csv'):            
            cur_variable = res['Key'].split("/")[-3]
            good_res.append(res['Key'])
            
res = {}
for res_file in good_res:
    final_file = f's3://{bucket}/{res_file}'
    def_file = os.path.basename(final_file)
    try:
        res[def_file].append(final_file)
    except:
        res[def_file] = [final_file]

Completed loop: 0


In [17]:
for key, items in res.items():
    key = key.split(".")[0]
    all_res_file = os.path.join(out_folder, f'ZONAL_ADMIN2_{key}.csv')
    national_res_file = os.path.join(out_folder, f'ZONAL_National_{key}.csv')
    
    all_dfs = [pd.read_csv(x) for x in items]
    combo_res = pd.concat(all_dfs)
    all_res = pd.merge(inA, combo_res, on='id')
    pd.DataFrame(all_res).drop(['geometry'], axis=1).to_csv(all_res_file)
    
    adm0_res = all_res.groupby(['WB_ADM0_NA']).agg({"SUM":'sum','WB_ADM2_NA':'count'}).reset_index()
    adm0_res.columns = ['WB_ADM0_NA', 'SUM_of_Lights', 'ADM2_Count']
    adm0_res.to_csv(national_res_file)
    
    tPrint(key)

16:26:55	DNB_j01_20230601-20230630_global_ecm-slcorr_v10_ops
16:26:57	DNB_j01_20230701-20230731_global_ecm-slcorr_v10_ops
16:26:57	DNB_j01_20230901-20230930_global_ecm-slcorr_v10_ops
16:26:59	DNB_j01_20231001-20231031_global_ecm-slcorr_v10_ops
16:26:59	DNB_j01_20231101-20231130_global_ecm-slcorr_v10_ops
16:26:59	DNB_j01_20231201-20231231_global_ecm-slcorr_v10_ops
16:27:01	DNB_j01_20240101-20240131_global_ecm-slcorr_v10_ops
16:27:02	DNB_j01_20240201-20240229_global_ecm-slcorr_v10_ops
16:27:02	DNB_j01_20240301-20240331_global_ecm-slcorr_v10_ops
16:27:04	DNB_j01_20240401-20240430_global_ecm-slcorr_v10_ops
16:27:04	DNB_j01_20240501-20240531_global_ecm-slcorr_v10_ops
16:27:06	DNB_npp_20120119-20120131_global_vcm-slcorr_v10_rp2
16:27:07	DNB_npp_20120201-20120229_global_vcm-slcorr_v10_rp2
16:27:07	DNB_npp_20120301-20120331_global_vcm-slcorr_v10_rp2
16:27:09	DNB_npp_20120401-20120430_global_vcm-slcorr_v10_rp2
16:27:09	DNB_npp_20120501-20120531_global_vcm-slcorr_v10_rp2
16:27:09	DNB_npp_2012060

16:28:49	DNB_npp_20220901-20220930_global_ecm-slcorr_v10_ops
16:28:51	DNB_npp_20221001-20221031_global_ecm-slcorr_v10_ops
16:28:52	DNB_npp_20221101-20221130_global_ecm-slcorr_v10_ops
16:28:52	DNB_npp_20221201-20221231_global_ecm-slcorr_v10_ops
16:28:54	DNB_npp_20230101-20230131_global_ecm-slcorr_v10_ops
16:28:54	DNB_npp_20230201-20230228_global_ecm-slcorr_v10_ops
16:28:54	DNB_npp_20230301-20230331_global_ecm-slcorr_v10_ops
16:28:56	DNB_npp_20230401-20230430_global_ecm-slcorr_v10_ops
16:28:56	DNB_npp_20230501-20230531_global_ecm-slcorr_v10_ops
16:28:57	DNB_npp_20230601-20230630_global_ecm-slcorr_v10_ops
16:28:59	DNB_npp_20230701-20230731_global_ecm-slcorr_v10_ops
16:28:59	DNB_npp_20230801-20230831_global_ecm-slcorr_v10_ops
16:29:00	DNB_npp_20230901-20230930_global_ecm-slcorr_v10_ops
16:29:02	DNB_npp_20231001-20231031_global_ecm-slcorr_v10_ops
16:29:02	DNB_npp_20231101-20231130_global_ecm-slcorr_v10_ops
16:29:02	DNB_npp_20231201-20231231_global_ecm-slcorr_v10_ops
16:29:04	DNB_npp_2024010

In [38]:
national_res_files = [os.path.join(out_folder, x) for x in os.listdir(out_folder) if "National" in x]
national_summary = inA.copy()
good_months = ['01','04','07','10']
try:
    del(final)
except:
    pass
for nfile in national_res_files:
    date = os.path.basename(nfile).split("_")[4][:8]
    try:
        month = date[4:6]
        date = int(date)
        if month in good_months:
            curD = pd.read_csv(nfile, index_col=0)
            try:
                final[f'SoL_{date}'] = curD['SUM_of_Lights']
            except:
                curD = curD.drop(['ADM2_Count'], axis=1)
                curD.columns = ['AAA_WB_ADM0_NA',f'SoL_{date}']
                final = curD                
    except:
        pass

In [41]:
final = final.reindex(sorted(final.columns), axis=1)
final.to_csv("Quarterly_NTL_zonal_sums.csv")

In [18]:
national_res_files = [os.path.join(out_folder, x) for x in os.listdir(out_folder) if "National" in x]
national_summary = inA.copy()
for nfile in national_res_files:
    year = os.path.basename(nfile).split("_")[5]
    try:
        year = (int(year))
    except:
        year = os.path.basename(nfile).split("_")[4]
        year = (int(year))
    curD = pd.read_csv(nfile, index_col=0)
    if nfile == national_res_files[0]:
        curD = curD.drop(['ADM2_Count'], axis=1)
        curD.columns = ['WB_ADM0_NA',f'SoL_{year}']
        final = curD
    else:
        final[f'SoL_{year}'] = curD['SUM_of_Lights']

ValueError: invalid literal for int() with base 10: '20230601-20230630'

In [None]:
final.head()

In [None]:
final.to_csv(f'{out_folder}_national_summaries.csv')

In [None]:
adm0 = [curD.dissolve() for lbl, curD in inA.groupby('WB_ADM0_NA')]
adm0

In [None]:
adm0_full = pd.concat(adm0)
adm0_full = gpd.GeoDataFrame(adm0_full, geometry='geometry', crs=4326)
adm0_full = adm0_full.drop(['WB_ADM1_NA','WB_ADM2_NA','id'], axis=1)
adm0_full.to_file(f'{out_folder}_ADM0_bounds.geojson', driver='GeoJSON')