# Split Global Oceans and Seas v01 (2021-12-14) Mask into Tiles

In [1]:
from dotenv import load_dotenv

# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
dotenv_path = "/home/jovyan/.env"
load_dotenv(dotenv_path=dotenv_path, verbose=True, override=True)

True

In [2]:
import logging
import os

import numpy as np
import rioxarray
from datacube import Datacube
from odc.geo.xr import to_cog
from tqdm import tqdm
from waterbodies.grid import WaterbodiesGrid
from waterbodies.hopper import create_tasks_from_datasets
from waterbodies.io import get_filesystem, is_s3_path
from waterbodies.logs import logging_setup
from waterbodies.text import get_tile_index_str_from_tuple
from waterbodies.utils import rio_slurp_xarray

In [3]:
goas_raster_file = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/land_sea_masks/goas_v01.tif"
output_directory = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/goas_v01/"

In [4]:
# Set up logging.
logging_setup(3)
_log = logging.getLogger(__name__)

In [5]:
if is_s3_path(output_directory):
    # To avoid the error GDAL signalled an error: err_no=1, msg='w+ not supported for /vsis3,
    # unless CPL_VSIL_USE_TEMP_FILE_FOR_RANDOM_WRITE is set to YES'
    # when writing to s3 using rioxarray's rio.to_raster
    os.environ["CPL_VSIL_USE_TEMP_FILE_FOR_RANDOM_WRITE"] = "YES"

In [6]:
%%time
# Find all the WOfS All Time Summaries geoboxes
dc = Datacube(app="tiles")
gridspec = WaterbodiesGrid().gridspec

dc_query = dict(product="wofs_ls_summary_alltime")
datasets = dc.find_datasets(**dc_query)

tasks = create_tasks_from_datasets(datasets=datasets, tile_index_filter=None, bin_solar_day=False)
tile_indices = [k for task in tasks for k,v in task.items()]
tile_geoboxes = [gridspec.tile_geobox(tile_index=tile_index) for tile_index in tile_indices]
tiles = list(zip(tile_indices, tile_geoboxes))
_log.info(f"Found {len(tiles)} tiles")

Processing 4461 datasets: 100%|██████████| 4461/4461 [00:02<00:00, 1681.73it/s]


[2024-05-15 11:16:18,772] {<timed exec>:12} INFO - Found 4456 tiles
CPU times: user 3.4 s, sys: 91.8 ms, total: 3.49 s
Wall time: 6.31 s


In [7]:
fs = get_filesystem(output_directory)
with tqdm(iterable=tiles, desc="Rasterizing tiles", total=len(tiles)) as tiles:
    for tile  in tiles:
        tile_index, tile_geobox = tile
        tile_index_str = get_tile_index_str_from_tuple(tile_index)
        tile_raster_fp = os.path.join(output_directory, f"goas_v01_{tile_index_str}.tif")
        # Convert the oceans/seas pixels from 1 to 0 and the land pixels from 0 to 1.
        tile_raster = rio_slurp_xarray(fname=goas_raster_file, gbox=tile_geobox, resampling="bilinear")
        tile_raster = np.logical_not(tile_raster).astype(int)
        # Write to file
        cog_bytes = to_cog(geo_im=tile_raster)
        with fs.open(tile_raster_fp, 'wb') as f:
            f.write(cog_bytes)

Rasterizing tiles:   0%|          | 0/4456 [00:00<?, ?it/s]

[2024-05-15 11:16:20,796] {credentials.py:557} INFO - Found credentials in environment variables.


Rasterizing tiles: 100%|██████████| 4456/4456 [1:28:22<00:00,  1.19s/it]
