In [5]:
from dotenv import load_dotenv
# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
dotenv_path = "/home/jovyan/.env"
load_dotenv(dotenv_path=dotenv_path, verbose=True, override=True)

True

In [6]:
import logging

import click
from datacube import Datacube

from waterbodies.db import get_waterbodies_engine
from waterbodies.hopper import find_task_datasets_ids
from waterbodies.io import check_directory_exists
from waterbodies.logs import logging_setup
from waterbodies.surface_area_change import (
    add_waterbody_observations_to_db,
    check_task_exists,
    get_waterbody_observations,
)
from waterbodies.text import get_task_id_str_from_tuple

In [7]:
verbose =  1
run_type = "backlog-processing"
solar_day = "2016-04-05"
tile_id_x = 199
tile_id_y = 35
task_datasets_ids = ["9b916e21-2229-5121-8333-0a8b3736d440", "180340f9-b365-506b-b561-153af5b1490d"]
historical_extent_rasters_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/historical_extent_rasters/"
overwrite = True

In [8]:
# Set up logging.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [9]:
if not check_directory_exists(path=historical_extent_rasters_directory):
    e = FileNotFoundError(f"Directory {historical_extent_rasters_directory} does not exist!")
    _log.error(e)
    raise e

[2024-04-05 17:45:52,467] {credentials.py:557} INFO - Found credentials in environment variables.


In [10]:
product = "wofs_ls"

In [11]:
dc = Datacube(app=run_type)

In [17]:
[dc.index.datasets.get(ds_id) for ds_id in task_datasets_ids]

[Dataset <id=9b916e21-2229-5121-8333-0a8b3736d440 product=wofs_ls location=s3://deafrica-services/wofs_ls/1-0-0/176/083/2016/04/05/wofs_ls_176083_2016-04-05.stac-item.json>,
 Dataset <id=180340f9-b365-506b-b561-153af5b1490d product=wofs_ls location=s3://deafrica-services/wofs_ls/1-0-0/176/082/2016/04/05/wofs_ls_176082_2016-04-05.stac-item.json>]

In [12]:
# Connect to the database 
engine = get_waterbodies_engine()
engine

Engine(postgresql+psycopg2://waterbodies_writer:***@db-writer:5432/waterbodies)

In [13]:
task_id_tuple = (solar_day, tile_id_x, tile_id_y)
task_id_str = get_task_id_str_from_tuple(task_id_tuple)

In [14]:
if not overwrite:
    # Check if there are waterbody observations with the task's task id
    # already in the database.
    exists = check_task_exists(task_id_str=task_id_str, engine=engine)

In [15]:
%%time
if overwrite or not exists:
    # Get the waterbody observations for the task.
    waterbody_observations = get_waterbody_observations(solar_day=solar_day,
                                                        tile_id_x=tile_id_x,
                                                        tile_id_y=tile_id_y,
                                                        task_datasets_ids=task_datasets_ids,
                                                        historical_extent_rasters_directory=historical_extent_rasters_directory,
                                                        dc=dc)
    # Add the waterbody observations to the database.
    add_waterbody_observations_to_db(waterbody_observations=waterbody_observations, engine=engine, update_rows=True)
    _log.info(f"Task {task_id_str} complete")
else:
    _log.info(f"Task {task_id_str} already exists, skipping")

[2024-04-05 17:46:18,172] {surface_area_change.py:266} INFO - Found 0 out of 717 waterbody observations already in the waterbody_observations_2 table
[2024-04-05 17:46:18,176] {surface_area_change.py:322} INFO - No waterbody observations to update in the waterbody_observations_2 table
[2024-04-05 17:46:18,177] {surface_area_change.py:325} INFO - Inerting 717 waterbody observations in the waterbody_observations_2 table
[2024-04-05 17:46:18,361] {<timed exec>:11} INFO - Task 2016-04-05/x199/y035 complete
CPU times: user 2.03 s, sys: 247 ms, total: 2.28 s
Wall time: 7.04 s


In [16]:
waterbody_observations

Unnamed: 0,obs_id,task_id,uid,date,px_total,px_wet,area_wet_m2,px_dry,area_dry_m2,px_invalid,area_invalid_m2
0,2016-04-05/x199/y035_k6hczzz21q,2016-04-05/x199/y035,k6hczzz21q,2016-04-05,7,,,7.0,6300.0,,
1,2016-04-05/x199/y035_k6hfme87k3,2016-04-05/x199/y035,k6hfme87k3,2016-04-05,16,,,16.0,14400.0,,
2,2016-04-05/x199/y035_k6hfmtz2mh,2016-04-05/x199/y035,k6hfmtz2mh,2016-04-05,1398,188.0,169200.0,1210.0,1089000.0,,
3,2016-04-05/x199/y035_k6hfmxr5sr,2016-04-05/x199/y035,k6hfmxr5sr,2016-04-05,13,,,13.0,11700.0,,
4,2016-04-05/x199/y035_k6hfp9sd14,2016-04-05/x199/y035,k6hfp9sd14,2016-04-05,10,,,10.0,9000.0,,
...,...,...,...,...,...,...,...,...,...,...,...
712,2016-04-05/x199/y035_k6jqf1xk7x,2016-04-05/x199/y035,k6jqf1xk7x,2016-04-05,30,,,,,30.0,27000.0
713,2016-04-05/x199/y035_k6jqh8prt1,2016-04-05/x199/y035,k6jqh8prt1,2016-04-05,7,,,,,7.0,6300.0
714,2016-04-05/x199/y035_k6jqs047vv,2016-04-05/x199/y035,k6jqs047vv,2016-04-05,30,,,,,30.0,27000.0
715,2016-04-05/x199/y035_k6jqvtj959,2016-04-05/x199/y035,k6jqvtj959,2016-04-05,9,,,,,9.0,8100.0


In [12]:
import pandas as pd
# Write your SQL query to select the first 5 rows
sql_query = "SELECT * FROM waterbody_observations_2;"

# Execute the query and fetch the results into Pandas DataFrame
df = pd.read_sql_query(sql_query, con=engine)

df

Unnamed: 0,obs_id,uid,px_total,px_wet,area_wet_m2,px_dry,area_dry_m2,px_invalid,area_invalid_m2,date,task_id
0,2024-01-11/x199/y066_kqsckdzu93,kqsckdzu93,7,,,,,7.0,6300.0,2024-01-11,2024-01-11/x199/y066
1,2024-01-11/x199/y066_kqsckeettf,kqsckeettf,21,,,,,21.0,18900.0,2024-01-11,2024-01-11/x199/y066
2,2024-01-11/x199/y066_kqsckt2pgg,kqsckt2pgg,6,,,,,6.0,5400.0,2024-01-11,2024-01-11/x199/y066
3,2024-01-11/x199/y066_kqsckx3pvx,kqsckx3pvx,6,,,,,6.0,5400.0,2024-01-11,2024-01-11/x199/y066
4,2024-01-11/x199/y066_kqsckxd6z6,kqsckxd6z6,10,,,,,10.0,9000.0,2024-01-11,2024-01-11/x199/y066
...,...,...,...,...,...,...,...,...,...,...,...
1441,2024-01-11/x199/y066_kqtkcztffq,kqtkcztffq,47,,,1.0,900.0,46.0,41400.0,2024-01-11,2024-01-11/x199/y066
1442,2024-01-11/x199/y066_kqtkf403j4,kqtkf403j4,12,,,,,12.0,10800.0,2024-01-11,2024-01-11/x199/y066
1443,2024-01-11/x199/y066_kqtm1bnh8x,kqtm1bnh8x,9,,,,,9.0,8100.0,2024-01-11,2024-01-11/x199/y066
1444,2024-01-11/x199/y066_kqtm1bqgmz,kqtm1bqgmz,6,,,,,6.0,5400.0,2024-01-11,2024-01-11/x199/y066


In [None]:
w