In [1]:
import logging
from datetime import datetime

from datacube import Datacube
from odc.stats.model import DateTimeRange
from waterbodies.db import get_waterbodies_engine
from waterbodies.hopper import create_tasks_from_scenes
from waterbodies.io import check_directory_exists, find_geotiff_files
from waterbodies.logs import logging_setup
from waterbodies.surface_area_change import get_last_waterbody_observation_date
from waterbodies.text import parse_tile_id_from_filename

In [2]:
from dotenv import load_dotenv
# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
env_path = "/home/jovyan/.env"
load_dotenv(env_path)

True

In [3]:
verbose =1
run_type = "regular-update"
historical_extent_rasters_directory = "s3://deafrica-services/waterbodies/v0.0.2/conflux/historical_extent_rasters/"

In [4]:
# Set up logging.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [5]:
# Parse the temporal range
if run_type != "regular-update" :
    temporal_range_ = DateTimeRange(temporal_range)

In [6]:
if not check_directory_exists(path=historical_extent_rasters_directory):
    e = FileNotFoundError(f"Directory {historical_extent_rasters_directory} does not exist!")
    _log.error(e)
    raise e
else:
    historical_extent_rasters = find_geotiff_files(directory_path=historical_extent_rasters_directory)

In [7]:
# Get the tile_ids for tiles that actually contain waterbodies.
tile_ids_of_interest = [
    parse_tile_id_from_filename(file_path=raster_file)
    for raster_file in historical_extent_rasters
]

In [8]:
product = "wofs_ls"

In [9]:
# Connect to the waterbodies engine
engine = get_waterbodies_engine()

In [10]:
%%time
# TODO: Check if this should be done here or should the time range be defined outside
# this step then passed as to temporal-range parameter for this step
# Get the date of the most recent waterbody observation
last_observation_date = get_last_waterbody_observation_date(engine=engine)
today = datetime.now()

CPU times: user 43.7 ms, sys: 5.73 ms, total: 49.5 ms
Wall time: 1min 20s


In [11]:
# Connect to the datacube
dc = Datacube(app="regular-update")

In [12]:
# Define the datacube query 
dc_query = dict(product=product, time=(last_observation_date, today))
dc_query

{'product': 'wofs_ls',
 'time': (datetime.datetime(2023, 12, 22, 0, 0),
  datetime.datetime(2024, 3, 28, 18, 36, 27, 939339))}

In [13]:
%%time
# Query the datacube for all wofs_ls datasets whose acquisition times fall within
# the temporal range specified.
scenes = dc.find_datasets(**dc_query)

CPU times: user 1.46 s, sys: 141 ms, total: 1.6 s
Wall time: 57.6 s


In [14]:
%%time
tasks = create_tasks_from_scenes(scenes=scenes, tile_ids_of_interest=tile_ids_of_interest)

Processing   14,149 scenes: 100%|██████████| 14149/14149 [00:23<00:00, 612.53it/s]

[2024-03-28 18:37:48,733] {hopper.py:148} INFO - Filter the 4452 cells to keep only the cells containing the 2783 tile ids of interest.





[2024-03-28 18:37:49,004] {hopper.py:155} INFO - Total number of cells after filtering: 2783
[2024-03-28 18:37:49,005] {hopper.py:159} INFO - For each cell group the datasets by solar day
[2024-03-28 18:37:49,568] {hopper.py:172} INFO - Total of 12,703 unique dataset UUIDs.
[2024-03-28 18:37:49,569] {hopper.py:173} INFO - Total number of tasks: 53143
CPU times: user 23.8 s, sys: 282 ms, total: 24 s
Wall time: 24 s


In [15]:
# View the first 100 tasks
tasks[:100]

[{('2024-01-11', 199, 66): [UUID('eb5981ab-8338-5737-931c-02fea677f994'),
   UUID('1831d50f-543d-52c5-a526-7e8a8d140f51'),
   UUID('7152eed5-8dc0-5fb7-a793-37eea054169d'),
   UUID('5868aadd-d085-504f-9e7c-0c170fcdd124')]},
 {('2024-01-27', 199, 66): [UUID('7680033a-c8e9-5eda-8f0a-3e40f3bd2e8f'),
   UUID('e47e38cd-10b7-540e-bcc6-f28c0101ddfc')]},
 {('2024-02-03', 199, 66): [UUID('e98a0f0c-8ed3-5352-9b9a-648ebcd9e08e'),
   UUID('38db8165-11c5-5365-b825-f64aaa04f936')]},
 {('2024-02-04', 199, 66): [UUID('cb9c0ec1-b04a-5548-8a4d-d49660c65058'),
   UUID('5ca8089d-2256-59e6-86c9-7ea44f3dbca4')]},
 {('2024-01-16', 199, 66): [UUID('3109e19d-23a0-56fa-8bbc-cc7c2fe1fc4f'),
   UUID('0285b297-d8ee-5b5b-9255-38829f27d8db')]},
 {('2024-02-11', 199, 66): [UUID('21d78e36-94a1-525b-b603-4ea92351cfec'),
   UUID('de797454-6372-5c5d-81ef-093106de8aaf')]},
 {('2024-01-26', 199, 66): [UUID('de21b012-4dc5-565f-aa3b-6a339b2b3f57'),
   UUID('e88707cd-9e31-5035-80d1-f66355c98c7f')]},
 {('2024-01-19', 199, 66): 