In [None]:
import os 

del os.environ["AWS_ACCESS_KEY_ID"]
del os.environ["AWS_SECRET_ACCESS_KEY"]

from dotenv import load_dotenv
# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
env_path = "/home/jovyan/.env"
load_dotenv(env_path)

In [None]:
import logging
from datetime import datetime

from datacube import Datacube
from odc.stats.model import DateTimeRange
from waterbodies.db import get_waterbodies_engine
from waterbodies.hopper import create_tasks_from_scenes
from waterbodies.io import check_directory_exists, find_geotiff_files
from waterbodies.logs import logging_setup
from waterbodies.surface_area_change import get_last_waterbody_observation_date
from waterbodies.text import parse_tile_id_from_filename

In [None]:
verbose =1
run_type = "regular-update"
historical_extent_rasters_directory = "s3://deafrica-services/waterbodies/v0.0.2/conflux/historical_extent_rasters/"

In [None]:
# Set up logging.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [None]:
# Parse the temporal range
if run_type != "regular-update" :
    temporal_range_ = DateTimeRange(temporal_range)

In [None]:
if not check_directory_exists(path=historical_extent_rasters_directory):
    e = FileNotFoundError(f"Directory {historical_extent_rasters_directory} does not exist!")
    _log.error(e)
    raise e
else:
    historical_extent_rasters = find_geotiff_files(directory_path=historical_extent_rasters_directory)

In [None]:
# Get the tile_ids for tiles that actually contain waterbodies.
tile_ids_of_interest = [
    parse_tile_id_from_filename(file_path=raster_file)
    for raster_file in historical_extent_rasters
]

In [None]:
product = "wofs_ls"

In [None]:
# Connect to the waterbodies engine
engine = get_waterbodies_engine()

In [None]:
%%time
# TODO: Check if this should be done here or should the time range be defined outside
# this step then passed as to temporal-range parameter for this step
# Get the date of the most recent waterbody observation
last_observation_date = get_last_waterbody_observation_date(engine=engine)
today = datetime.now()

In [None]:
# Connect to the datacube
dc = Datacube(app="regular-update")

In [None]:
# Define the datacube query 
dc_query = dict(product=product, time=(last_observation_date, today))
dc_query

In [None]:
%%time
# Query the datacube for all wofs_ls datasets whose acquisition times fall within
# the temporal range specified.
scenes = dc.find_datasets(**dc_query)

In [None]:
%%time
tasks = create_tasks_from_scenes(scenes=scenes, tile_ids_of_interest=tile_ids_of_interest)

In [None]:
# View the first 100 tasks
tasks[:100]