In [1]:
import os
# These are the default AWS configurations for the Analysis Sandbox.
# that are set in the environmnet variables. 
aws_default_config = {
    #'AWS_NO_SIGN_REQUEST': 'YES', 
    'AWS_SECRET_ACCESS_KEY': 'fake',
    'AWS_ACCESS_KEY_ID': 'fake',
}

# To access public bucket, need to remove the AWS credentials in 
# the environment variables or the following error will occur.
# PermissionError: The AWS Access Key Id you provided does not exist in our records.

for key in aws_default_config.keys():
    if key in os.environ:
        del os.environ[key]

In [2]:
import json
import logging

import click
import fsspec

from deafrica_conflux.cli.logs import logging_setup
from deafrica_conflux.io import check_dir_exists, check_file_exists, check_if_s3_uri
from deafrica_conflux.stack import stack_polygon_timeseries_to_csv

In [3]:
verbose = 1
# Path to the directory containing the parquet files output during polygon drill.
drill_output_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/conflux/drill_outputs_6M/"
# Path to the directory write the csvs to
output_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/senegal_basin/surface_area_change"
polygon_stringids_to_tileids_file = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/conflux/historical_extent_rasters/polygons_stringids_to_tileids.json"
polygon_uids = "krvt190zcz"

In [4]:
# Set up logger.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [5]:
# Support pathlib Paths
output_directory = str(output_directory)
drill_output_directory = str(drill_output_directory)
polygon_stringids_to_tileids_file = str(polygon_stringids_to_tileids_file)

In [6]:
if not check_dir_exists(drill_output_directory):
    _log.error(f"Directory {drill_output_directory} does not exist!")
    raise FileNotFoundError(f"Directory {drill_output_directory} does not exist!)")

[2024-01-29 07:20:37,675] {credentials.py:611} INFO - Found credentials in shared credentials file: ~/.aws/credentials


In [7]:
# Create the output directory if it does not exist.
if not check_dir_exists(output_directory):
    if check_if_s3_uri(output_directory):
        fsspec.filesystem("s3").makedirs(output_directory, exist_ok=True)
    else:
        fsspec.filesystem("file").makedirs(output_directory, exist_ok=True)
    _log.info(f"Created directory {output_directory}")

In [8]:
if not check_file_exists(polygon_stringids_to_tileids_file):
    _log.error(f"File {polygon_stringids_to_tileids_file} does not exist!")
    raise FileNotFoundError(f"File {polygon_stringids_to_tileids_file} does not exist!)")

In [9]:
if check_if_s3_uri(polygon_stringids_to_tileids_file):
    fs = fsspec.filesystem("s3")
else:
    fs = fsspec.filesystem("file")

with fs.open(polygon_stringids_to_tileids_file) as f:
    polygon_stringids_to_tileids = json.load(f)

In [10]:
polygon_uids = polygon_uids.split(",")
polygon_uids = [i.strip() for i in polygon_uids]

In [11]:
for polygon_uid in polygon_uids:
        stack_polygon_timeseries_to_csv(
            polygon_uid=polygon_uid,
            polygon_stringids_to_tileids=polygon_stringids_to_tileids,
            drill_output_directory=drill_output_directory,
            output_directory=output_directory,
        )

[2024-01-29 07:20:39,635] {stack.py:89} INFO - Stacking timeseries for the polygon krvt190zcz
[2024-01-29 07:20:41,126] {io.py:347} INFO - Found 1401 parquet files.
[2024-01-29 07:21:04,130] {stack.py:133} INFO - CSV file written to s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/senegal_basin/surface_area_change/krvt/krvt190zcz.csv
