In [None]:
import os
import boto3

import rasterio

import pandas as pd
import numpy as np

from GOSTrocks.misc import tPrint

In [None]:
bucket = "wbg-geography01"
prefix = "URBANIZATION/MR_Novel_Poverty/"
file_ends_with = "_wat.tif"

s3client = boto3.client("s3", verify=False)

# Loop through the S3 bucket and get all the file keys
more_results = True
try:
    del token  # noqa
except Exception:
    pass
loops = 0

all_res = []
while more_results:
    print(f"Completed loop: {loops}")
    if loops > 0:
        objects = s3client.list_objects_v2(
            Bucket=bucket,
            ContinuationToken=token,  # noqa
            Prefix=prefix,  # noqa
        )
    else:
        objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix)
    more_results = objects["IsTruncated"]
    if more_results:
        token = objects["NextContinuationToken"]
    loops += 1
    for res in objects["Contents"]:
        if res["Key"].endswith(file_ends_with):
            all_res.append(res["Key"])

In [None]:
all_res = pd.DataFrame(all_res, columns=["Key"])
all_res["ISO3"] = all_res["Key"].apply(lambda x: x.split("/")[-1][:3])
all_res["file"] = all_res["Key"].apply(lambda x: os.path.basename(x))
all_res["res"] = "250"
all_res.loc[all_res["file"].str.contains("1k"), "res"] = "1000"
all_res = all_res.loc[all_res["ISO3"] != "NO_"]
all_res

In [None]:
def summarize_water(water_key, verbose=False):
    admin_bounds = water_key.replace("_wat", "_adm")
    with rasterio.Env(GDAL_HTTP_UNSAFESSL="YES"):
        waterR = rasterio.open(f"s3://{bucket}/{water_key}")
        water = waterR.read(1)
        water[water == waterR.nodata] = np.nan

        adminR = rasterio.open(f"s3://{bucket}/{admin_bounds}")
        admin = adminR.read(1)
        admin[admin == adminR.nodata] = np.nan

        water_admin = water * admin
        water_res = np.unique(water_admin, return_counts=True)
    return {
        "water": water,
        "admin": admin,
        "water_admin": water_admin,
        "water_res": water_res,
    }


# xx = summarize_water(all_res['Key'].iloc[0])

In [None]:
summarize_water(all_res.loc[all_res["ISO3"] == "stp", "Key"].iloc[0])

In [None]:
sel_res = "1000"

all_res["non_water"] = 0.0
all_res["water"] = 0.0

focal_set = all_res.loc[(all_res["res"] == sel_res)].copy()
for idx, rows in focal_set.iterrows():
    tPrint("Processing: %s" % rows["ISO3"])
    try:
        xx = summarize_water(rows["Key"])
        all_res.loc[idx, "non_water"] = xx["water_res"][1][0]
        all_res.loc[idx, "water"] = xx["water_res"][1][1]
    except Exception as e:
        print(f"Error with {rows['Key']}")
        print(e)

all_res.loc[focal_set.index].to_csv(f"C:/Temp/water_summary_{sel_res}.csv", index=False)