In [None]:
import os 

del os.environ["AWS_ACCESS_KEY_ID"]
del os.environ["AWS_SECRET_ACCESS_KEY"]

from dotenv import load_dotenv
# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
env_path = "/home/jovyan/.env"
load_dotenv(env_path)

In [None]:
import logging

import click
from datacube import Datacube

from waterbodies.db import get_waterbodies_engine
from waterbodies.io import check_directory_exists
from waterbodies.logs import logging_setup
from waterbodies.surface_area_change import (
    add_waterbody_observations_to_db,
    check_if_task_exists,
    get_waterbody_observations,
)

In [None]:
verbose =  1
overwrite = False
historical_extent_rasters_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/historical_extent_rasters/"

from uuid import UUID
task = {('2016-04-05', 199, 34): [UUID('9b916e21-2229-5121-8333-0a8b3736d440')]}
# task = {('2019-01-05', 214, 83): [UUID('5eccabe0-64d1-5b20-ad87-4e73505996cf'), UUID('b3f720d6-4bae-5f33-85a4-39516f3e4c0b')]}
task

In [None]:
# Set up logging.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [None]:
# Connect to the dev database 
engine = get_waterbodies_engine()
engine

In [None]:
dc = Datacube(app="ProcessTask")

In [None]:
if not check_directory_exists(path=historical_extent_rasters_directory):
    e = FileNotFoundError(f"Directory {historical_extent_rasters_directory} does not exist!")
    _log.error(e)
    raise e

In [None]:
if not overwrite:
    exists = check_if_task_exists(task=task, engine=engine)

In [None]:
if overwrite or not exists:
    waterbody_observations = get_waterbody_observations(
        task=task,
        historical_extent_rasters_directory=historical_extent_rasters_directory,
        dc=dc,
    )
    add_waterbody_observations_to_db(
        waterbody_observations=waterbody_observations, engine=engine, update_rows=overwrite
    )
else:
    _log.info(f"Task {task} already exists, skipping")

In [None]:
import pandas as pd
# Write your SQL query to select the first 5 rows
sql_query = "SELECT * FROM waterbody_observations_2;"

# Execute the query and fetch the results into Pandas DataFrame
df = pd.read_sql_query(sql_query, con=engine)

df