# Inspect Generated Zonal Stats
This notebook will run through a checks to see if the generated h3 zonal stats have been calculated correctly. This will include checking at the following steps:

1. H1 CSV files on S3
2. Aggregated parquet files on S3
3. S2S database

In [1]:
import os, sys, time
import requests
import json
import h3

import geopandas as gpd
import pandas as pd
import folium as flm

from dotenv import load_dotenv
from geojson_pydantic import Feature, Polygon
from lonboard import Map, ScatterplotLayer
from shapely import from_geojson
from space2stats import StatsTable
from shapely.geometry import shape, Point, Polygon
from typing import Dict


sys.path.append("../../src")

def tPrint(s):
    """prints the time along with the message"""
    print("%s\t%s" % (time.strftime("%H:%M:%S"), s))

In [2]:
# Define input variables
iso3 = 'KEN'
ADM = "ADM0"
s3_csv_base = 's3://wbg-geography01/Space2Stats/h3_stats_data/GLOBAL/VIIRS_Monthly_LEN/{h1}/DNB_npp_20220101-20220131_global_ecm-slcorr_v10_ops.avg_rade9_zonal.csv'
s3_parquet_file = 's3://wbg-geography01/Space2Stats/parquet/GLOBAL/NTL_VIIRS_LEN/NTL_VIIRS_LEN_2012_combined.parquet'
s2s_field = ['sum_viirs_ntl_2023', 'sum_viirs_ntl_2013']

# Fetch the admin boundaries and convert to geojson
def fetch_admin_boundaries(iso3: str, adm: str) -> gpd.GeoDataFrame:
    """Fetch administrative boundaries from GeoBoundaries API."""
    url = f"https://www.geoboundaries.org/api/current/gbOpen/{iso3}/{adm}/"
    res = requests.get(url).json()
    return gpd.read_file(res["gjDownloadURL"])

adm_boundaries = fetch_admin_boundaries(iso3, ADM)
geojson_str = adm_boundaries.to_json()
adm_geojson = json.loads(geojson_str)
adm_features = adm_geojson["features"]
feature = adm_features[0]

# Read in the environment variables 
load_dotenv("../../../dev_db.env")

True

In [3]:
with StatsTable.connect() as stats_table:
    xx = stats_table.fields()
[print(x) for x in xx if "viirs" in x]

sum_viirs_ntl_2024
sum_viirs_ntl_201201
sum_viirs_ntl_201202
sum_viirs_ntl_201203
sum_viirs_ntl_201204
sum_viirs_ntl_201205
sum_viirs_ntl_201206
sum_viirs_ntl_201207
sum_viirs_ntl_201208
sum_viirs_ntl_201209
sum_viirs_ntl_201210
sum_viirs_ntl_201211
sum_viirs_ntl_201212
sum_viirs_ntl_201301
sum_viirs_ntl_201302
sum_viirs_ntl_201303
sum_viirs_ntl_201304
sum_viirs_ntl_201305
sum_viirs_ntl_201306
sum_viirs_ntl_201307
sum_viirs_ntl_201308
sum_viirs_ntl_201309
sum_viirs_ntl_201310
sum_viirs_ntl_201311
sum_viirs_ntl_201312
sum_viirs_ntl_201501
sum_viirs_ntl_201502
sum_viirs_ntl_201503
sum_viirs_ntl_201504
sum_viirs_ntl_201505
sum_viirs_ntl_201506
sum_viirs_ntl_201507
sum_viirs_ntl_201508
sum_viirs_ntl_201509
sum_viirs_ntl_201510
sum_viirs_ntl_201511
sum_viirs_ntl_201512
sum_viirs_ntl_201601
sum_viirs_ntl_201602
sum_viirs_ntl_201603
sum_viirs_ntl_201604
sum_viirs_ntl_201605
sum_viirs_ntl_201606
sum_viirs_ntl_201607
sum_viirs_ntl_201608
sum_viirs_ntl_201609
sum_viirs_ntl_201610
sum_viirs_ntl_2

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [None]:
with StatsTable.connect() as stats_table:
    data = stats_table.summaries(
        aoi=feature,
        spatial_join_method="touches",
        #fields=[s2s_field],
        fields=stats_table.fields(),
        geometry="polygon",
    )
    s2s_df = pd.DataFrame(data)
    s2s_gdf = s2s_df.copy()
    s2s_gdf['geometry'] = s2s_gdf['geometry'].apply(lambda x: from_geojson(x))
    s2s_gdf = gpd.GeoDataFrame(s2s_gdf, geometry='geometry', crs=4326)
    all_fields = stats_table.fields()

s2s_df.head()

In [None]:
m = s2s_gdf.explore(
    column=s2s_field,
    tooltip=s2s_field,
    cmap='YlGnBu',
    legend=True,
    scheme='naturalbreaks',
    legend_kwds=dict(colorbar=True, caption='Population', interval=False),
    style_kwds=dict(weight=0, fillOpacity=0.8),
    name='Population by Hexagon'
)
flm.LayerControl('topright', collapsed = False).add_to(m)
m

## Assess s3 csv file

In [None]:
s2s_df['h1'] = s2s_df['hex_id'].apply(lambda x: h3.cell_to_parent(x, 0))
for unq_h1 in s2s_df['h1'].unique():
    s3_file = s3_csv_base.format(h1=unq_h1)
    curD = pd.read_csv(s3_file, index_col=0)
curD.head()

In [None]:
def get_geom(x):
    xx = h3.cell_to_latlng(x)
    return(Point([xx[1], xx[0]]))
curD = curD.reset_index()
curD = curD.merge(s2s_gdf, left_on="id", right_on="hex_id", how='right')
curD = gpd.GeoDataFrame(curD, geometry='geometry', crs=4326).reset_index()

In [None]:
m = curD.explore(
    column=s2s_field,
    tooltip=s2s_field,
    cmap='YlGnBu',
    legend=True,
    scheme='naturalbreaks',
    legend_kwds=dict(colorbar=True, caption='Population', interval=False),
    style_kwds=dict(weight=0, fillOpacity=0.8),
    name='Population by Hexagon'
)
flm.LayerControl('topright', collapsed = False).add_to(m)
m

## From S3 Parquet

In [None]:
gdf = pd.read_parquet(s3_parquet_file)
gdf = gdf.reset_index()
gdf_columns = list(gdf.columns)
gdf_columns

In [None]:
gdf_columns[0] = 'hex_id'
gdf.columns = gdf_columns
gdf.to_parquet(s3_parquet_file, index=False)

In [None]:
gdf_s3 = gdf.merge(curD, left_on='hex_id', right_on='id', how='right')
gdf_s3 = gpd.GeoDataFrame(gdf_s3, geometry='geometry', crs=4326)
gdf_s3.head()

In [None]:
m = gdf_s3.explore(
    column=s2s_field,
    tooltip=s2s_field,
    cmap='YlGnBu',
    legend=True,
    scheme='naturalbreaks',
    legend_kwds=dict(colorbar=True, caption='Population', interval=False),
    style_kwds=dict(weight=0, fillOpacity=0.8),
    name='Population by Hexagon'
)
flm.LayerControl('topright', collapsed = False).add_to(m)
m