# Rasterize the Global Oceans and Seas v01 (2021-12-14) dataset

Flanders Marine Institute (2021). Global Oceans and Seas, version 1. Available online at https://www.marineregions.org/. https://doi.org/10.14284/542.

This dataset is licensed under a [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).

[Disclaimer](https://www.marineregions.org/disclaimer.php)

> **Note: Run this notebook on the 8XL Analysis Sandbox or 16XL Analysis Sandbox**

In [1]:
from dotenv import load_dotenv

# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
dotenv_path = "/home/jovyan/.env"
load_dotenv(dotenv_path=dotenv_path, verbose=True, override=True)

True

In [2]:
import logging
import os

import boto3
import geopandas as gpd
from datacube.utils.cog import to_cog
from odc.geo.geobox import GeoBox
from odc.geo.geom import Geometry
from odc.geo.xr import wrap_xr
from rasterio.features import rasterize
from waterbodies.grid import WaterbodiesGrid
from waterbodies.logs import logging_setup

In [3]:
verbose = 3
bucket = "deafrica-waterbodies-dev"
object_key = "waterbodies/v0.0.2/goas_v01/goas_v01.tif"

In [4]:
# Set up logging.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [5]:
gridspec = WaterbodiesGrid().gridspec

In [6]:
product_footprint = gpd.read_file("https://explorer.digitalearth.africa/api/footprint/wofs_ls_summary_alltime").to_crs(gridspec.crs)
product_footprint_geobox = GeoBox.from_geopolygon(geopolygon=Geometry(geom=product_footprint.geometry.iloc[0], crs=product_footprint.crs), resolution=gridspec.resolution, crs=gridspec.crs)

In [7]:
%%time
# Load the geopackage.
goas_v01_gdf = gpd.read_file("~/dev/waterbodies/notebooks/GlobalOceansAndSeasv01/goas_v01.gpkg").to_crs(gridspec.crs)

CPU times: user 17.3 s, sys: 814 ms, total: 18.1 s
Wall time: 18.5 s


In [8]:
%%time
# Clip to the product footprint
goas_v01_gdf_clipped = gpd.clip(goas_v01_gdf, product_footprint)

CPU times: user 2.51 s, sys: 56.9 ms, total: 2.56 s
Wall time: 2.56 s


In [9]:
%%time
# Rasterize the clipped dataset.
goas_v01_gdf_clipped_np = rasterize(shapes=goas_v01_gdf_clipped.geometry, out_shape=product_footprint_geobox.shape, transform=product_footprint_geobox.transform)

CPU times: user 3min, sys: 34 s, total: 3min 34s
Wall time: 3min 33s


In [10]:
%%time
goas_v01_gdf_clipped_xr = wrap_xr(im=goas_v01_gdf_clipped_np, gbox=product_footprint_geobox)

CPU times: user 7.96 ms, sys: 0 ns, total: 7.96 ms
Wall time: 6.77 ms


In [11]:
%%time
cog_bytes = to_cog(geo_im=goas_v01_gdf_clipped_xr)

CPU times: user 12min 31s, sys: 1min 53s, total: 14min 25s
Wall time: 14min 24s


In [12]:
%%time
s3_client = boto3.client('s3')
s3_client.put_object(Body=cog_bytes,
                     Bucket=bucket,
                     Key=object_key)

[2024-04-24 20:49:15,552] {credentials.py:1180} INFO - Found credentials in environment variables.
CPU times: user 705 ms, sys: 79.3 ms, total: 784 ms
Wall time: 1.95 s


{'ResponseMetadata': {'RequestId': '1PD5KKD2SB98SKF2',
  'HostId': 'vuMHBkfiBvikOpZy8mYN6dNDKIUuelCxzwxy6O7ii5ONGRvg0bje4+B80QZKkVN82nGvbzeTIaXfR9IDI7mKF9iLR18jDq7p',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'vuMHBkfiBvikOpZy8mYN6dNDKIUuelCxzwxy6O7ii5ONGRvg0bje4+B80QZKkVN82nGvbzeTIaXfR9IDI7mKF9iLR18jDq7p',
   'x-amz-request-id': '1PD5KKD2SB98SKF2',
   'date': 'Wed, 24 Apr 2024 20:49:17 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"a092aa0193a24e3bf6dffbe5d4ba3cf8"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"a092aa0193a24e3bf6dffbe5d4ba3cf8"',
 'ServerSideEncryption': 'AES256'}