In [1]:
import os
# These are the default AWS configurations for the Analysis Sandbox.
# that are set in the environmnet variables. 
aws_default_config = {
    #'AWS_NO_SIGN_REQUEST': 'YES', 
    'AWS_SECRET_ACCESS_KEY': 'fake',
    'AWS_ACCESS_KEY_ID': 'fake',
}

# To access public bucket, need to remove the AWS credentials in 
# the environment variables or the following error will occur.
# PermissionError: The AWS Access Key Id you provided does not exist in our records.

for key in aws_default_config.keys():
    if key in os.environ:
        del os.environ[key]

In [2]:
import json
import logging

import click
import fsspec

from deafrica_conflux.cli.logs import logging_setup
from deafrica_conflux.io import check_dir_exists, check_file_exists, check_if_s3_uri
from deafrica_conflux.stack import stack_polygon_timeseries_to_csv

In [3]:
verbose = 1
# Path to the directory containing the parquet files output during polygon drill.
drill_output_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/senegal_basin/conflux/drill_outputs_2Y/"
# Path to the directory write the csvs to
output_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/senegal_basin/surface_area_change_2Y"
polygon_stringids_to_tileids_file = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/senegal_basin/conflux/historical_extent_rasters/polygons_stringids_to_tileids.json"
polygon_uids = "edwzn0km4p"
temporal_range = None
overwrite = True

In [4]:
# Set up logger.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [5]:
# Support pathlib Paths
output_directory = str(output_directory)
drill_output_directory = str(drill_output_directory)
polygon_stringids_to_tileids_file = str(polygon_stringids_to_tileids_file)

In [6]:
if not check_dir_exists(drill_output_directory):
    _log.error(f"Directory {drill_output_directory} does not exist!")
    raise FileNotFoundError(f"Directory {drill_output_directory} does not exist!)")

[2024-01-30 06:39:01,710] {credentials.py:611} INFO - Found credentials in shared credentials file: ~/.aws/credentials


In [7]:
# Create the output directory if it does not exist.
if not check_dir_exists(output_directory):
    if check_if_s3_uri(output_directory):
        fsspec.filesystem("s3").makedirs(output_directory, exist_ok=True)
    else:
        fsspec.filesystem("file").makedirs(output_directory, exist_ok=True)
    _log.info(f"Created directory {output_directory}")

In [8]:
if not check_file_exists(polygon_stringids_to_tileids_file):
    _log.error(f"File {polygon_stringids_to_tileids_file} does not exist!")
    raise FileNotFoundError(f"File {polygon_stringids_to_tileids_file} does not exist!)")

In [9]:
if check_if_s3_uri(polygon_stringids_to_tileids_file):
    fs = fsspec.filesystem("s3")
else:
    fs = fsspec.filesystem("file")

with fs.open(polygon_stringids_to_tileids_file) as f:
    polygon_stringids_to_tileids = json.load(f)

In [10]:
polygon_uids = polygon_uids.split(",")
polygon_uids = [i.strip() for i in polygon_uids]

In [11]:
%%time
for polygon_uid in polygon_uids:
        stack_polygon_timeseries_to_csv(
            polygon_uid=polygon_uid,
            polygon_stringids_to_tileids=polygon_stringids_to_tileids,
            drill_output_directory=drill_output_directory,
            output_directory=output_directory,
            temporal_range=temporal_range,
            overwrite=overwrite
        )

[2024-01-29 19:13:34,707] {stack.py:170} INFO - Stacking timeseries for the polygon edwzn0km4p
[2024-01-29 19:13:37,521] {io.py:347} INFO - Found 1137 parquet files.
[2024-01-29 19:13:54,200] {stack.py:235} INFO - CSV file written to s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/senegal_basin/surface_area_change_2Y/edwz/edwzn0km4p.csv
CPU times: user 3.81 s, sys: 460 ms, total: 4.27 s
Wall time: 19.5 s


In [12]:
import pandas as pd
pd.read_csv("s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/senegal_basin/surface_area_change_2Y/edwz/edwzn0km4p.csv")

Unnamed: 0,date,px_total,px_wet,area_wet_m2,px_dry,area_dry_m2,px_invalid,area_invalid_m2,pc_wet,pc_dry,pc_invalid
0,2022-01-04,88223,501.0,450900.0,535.0,481500.0,87187.0,78468300.0,,,98.825703
1,2022-01-05,169310,67615.0,60853500.0,57321.0,51588900.0,44374.0,39936600.0,,,26.208730
2,2022-01-06,111524,46145.0,41530500.0,32678.0,29410200.0,32701.0,29430900.0,,,29.321940
3,2022-01-07,30437,1865.0,1678500.0,268.0,241200.0,28304.0,25473600.0,,,92.992082
4,2022-01-12,88223,350.0,315000.0,677.0,609300.0,87196.0,78476400.0,,,98.835904
...,...,...,...,...,...,...,...,...,...,...,...
347,2023-09-06,111524,24846.0,22361400.0,1652.0,1486800.0,85026.0,76523400.0,,,76.240092
348,2023-09-07,30437,0.0,0.0,0.0,0.0,30437.0,27393300.0,,,100.000000
349,2023-09-12,88223,0.0,0.0,0.0,0.0,88223.0,79400700.0,,,100.000000
350,2023-09-13,169310,116226.0,104603400.0,7402.0,6661800.0,45682.0,41113800.0,,,26.981277
