In [1]:
from dotenv import load_dotenv

# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
dotenv_path = "/home/jovyan/.env"
load_dotenv(dotenv_path=dotenv_path, verbose=True, override=True)

True

In [2]:
import logging
import os
import rioxarray
from datacube import Datacube
from tqdm import tqdm
from waterbodies.logs import logging_setup
from waterbodies.hopper import create_tasks_from_datasets
from waterbodies.grid import WaterbodiesGrid
from waterbodies.utils import rio_slurp_xarray
from waterbodies.text import get_tile_id_str_from_tuple
from waterbodies.io import is_s3_path

In [3]:
goas_raster_file = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/goas_v01/goas_v01.tif"
output_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/goas_v01"

In [4]:
# Set up logging.
logging_setup(3)
_log = logging.getLogger(__name__)

In [5]:
if is_s3_path(output_directory):
    # To avoid the error GDAL signalled an error: err_no=1, msg='w+ not supported for /vsis3,
    # unless CPL_VSIL_USE_TEMP_FILE_FOR_RANDOM_WRITE is set to YES'
    # when writing to s3 using rioxarray's rio.to_raster
    os.environ["CPL_VSIL_USE_TEMP_FILE_FOR_RANDOM_WRITE"] = "YES"

In [6]:
%%time
# Find all the WOfS All Time Summaries geoboxes
dc = Datacube(app="tiles")
gridspec = WaterbodiesGrid().gridspec

dc_query = dict(product="wofs_ls_summary_alltime")
datasets = dc.find_datasets(**dc_query)

tasks = create_tasks_from_datasets(datasets=datasets, tile_ids_of_interest=None, bin_solar_day=False)
tile_indices = [k for task in tasks for k,v in task.items()]
tile_geoboxes = [gridspec.tile_geobox(tile_index=tile_index) for tile_index in tile_indices]
tiles = list(zip(tile_indices, tile_geoboxes))
_log.info(f"Found {len(tiles)} tiles")

Processing 4461 datasets: 100%|██████████| 4461/4461 [00:02<00:00, 1729.08it/s]

[2024-04-24 21:03:22,243] {<timed exec>:12} INFO - Found 4456 tiles
CPU times: user 3.27 s, sys: 89.3 ms, total: 3.36 s
Wall time: 3.7 s





In [7]:
with tqdm(iterable=tiles, desc="Rasterizing tiles", total=len(tiles)) as tiles:
    for tile  in tiles:
        tile_index, tile_geobox = tile
        tile_index_str = get_tile_id_str_from_tuple(tile_index)
        tile_raster_fp = os.path.join(output_directory, f"goas_v01_{tile_index_str}.tif")
        tile_raster = rio_slurp_xarray(fname=goas_raster_file, gbox=tile_geobox, resampling="bilinear")
        tile_raster.rio.to_raster(raster_path=tile_raster_fp, compute=True)

Rasterizing tiles: 100%|██████████| 4456/4456 [56:22<00:00,  1.32it/s]
