In [None]:
from dotenv import load_dotenv

# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
dotenv_path = "/home/jovyan/.env"
load_dotenv(dotenv_path=dotenv_path, verbose=True, override=True)

In [None]:
import logging

import click
from datacube import Datacube

from waterbodies.db import get_waterbodies_engine
from waterbodies.hopper import find_task_datasets_ids
from waterbodies.io import check_directory_exists
from waterbodies.logs import logging_setup
from waterbodies.surface_area_change import (
    add_waterbody_observations_to_db,
    check_task_exists,
    get_waterbody_observations,
)
from waterbodies.text import get_task_id_str_from_tuple

In [None]:
verbose = 3
run_type = "backlog-processing"
solar_day = "2016-04-05"
tile_id_x = 229
tile_id_y = 63
task_datasets_ids = '[4e0999a2-183b-5c71-a7e5-da6696dcad49]'
historical_extent_rasters_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/historical_extent_rasters/"
overwrite = True

In [None]:
# Set up logging.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [None]:
if not check_directory_exists(path=historical_extent_rasters_directory):
    e = FileNotFoundError(f"Directory {historical_extent_rasters_directory} does not exist!")
    _log.error(e)
    raise e

In [None]:
product = "wofs_ls"

In [None]:
dc = Datacube(app=run_type)

In [None]:
# Connect to the database
engine = get_waterbodies_engine()
engine

In [None]:
task_id_tuple = (solar_day, tile_id_x, tile_id_y)
task_id_str = get_task_id_str_from_tuple(task_id_tuple)

In [None]:
# Get task datasets ids as list.
task_datasets_ids = task_datasets_ids.lstrip("[").rstrip("]").split(",")
task_datasets_ids

In [None]:
if not overwrite:
    # Check if there are waterbody observations with the task's task id
    # already in the database.
    exists = check_task_exists(task_id_str=task_id_str, engine=engine)

In [None]:
if overwrite or not exists:
    waterbody_observations = get_waterbody_observations(
        solar_day=solar_day,
        tile_id_x=tile_id_x,
        tile_id_y=tile_id_y,
        task_datasets_ids=task_datasets_ids,
        historical_extent_rasters_directory=historical_extent_rasters_directory,
        dc=dc,
    )
    if waterbody_observations is None:
        _log.info(f"Task {task_id_str} has no waterbody observations.")
    else:
        add_waterbody_observations_to_db(
            waterbody_observations=waterbody_observations, engine=engine, update_rows=True
        )
        _log.info(f"Task {task_id_str} complete")
else:
    _log.info(f"Task {task_id_str} already exists, skipping")

In [None]:
import pandas as pd

# Write your SQL query to select the first 5 rows
sql_query = "SELECT * FROM waterbodies_observations_test LIMIT 5;"

# Execute the query and fetch the results into Pandas DataFrame
df = pd.read_sql_query(sql_query, con=engine)

df