# Rasterize the Global Oceans and Seas v01 (2021-12-14) dataset

Flanders Marine Institute (2021). Global Oceans and Seas, version 1. Available online at https://www.marineregions.org/. https://doi.org/10.14284/542.

This dataset is licensed under a [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).

[Disclaimer](https://www.marineregions.org/disclaimer.php)

> **Note: Run this notebook on the 8XL Analysis Sandbox**

In [1]:
from dotenv import load_dotenv

# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
dotenv_path = "/home/jovyan/.env"
load_dotenv(dotenv_path=dotenv_path, verbose=True, override=True)

True

In [2]:
import logging
import os

import boto3
import geopandas as gpd
import rioxarray  # noqa F401
from datacube.utils.cog import to_cog
from odc.geo.geobox import GeoBox
from odc.geo.geom import Geometry
from odc.geo.xr import wrap_xr
from rasterio.features import rasterize
from waterbodies.grid import WaterbodiesGrid
from waterbodies.logs import logging_setup

In [3]:
verbose = 3
bucket = "deafrica-waterbodies-dev"
object_key = "waterbodies/v0.0.2/goas_v01/goas_v01.tif"

In [4]:
# Set up logging.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [5]:
gridspec = WaterbodiesGrid().gridspec

In [6]:
africa_extent = gpd.read_file("https://raw.githubusercontent.com/digitalearthafrica/deafrica-extent/master/africa-extent.json").to_crs(gridspec.crs)
africa_extent_geobox = GeoBox.from_geopolygon(geopolygon=Geometry(geom=africa_extent.geometry.iloc[0], crs=africa_extent.crs), resolution=gridspec.resolution, crs=gridspec.crs)

In [7]:
%%time
# Load the geopackage.
goas_v01_gdf = gpd.read_file("goas_v01.gpkg").to_crs(gridspec.crs)

CPU times: user 17.6 s, sys: 682 ms, total: 18.3 s
Wall time: 18.3 s


In [8]:
%%time
# Clip to the africa extent
goas_v01_gdf_clipped = gpd.clip(goas_v01_gdf, africa_extent)

CPU times: user 1.76 s, sys: 0 ns, total: 1.76 s
Wall time: 1.76 s


In [9]:
%%time
# Rasterize the clipped dataset.
goas_v01_gdf_clipped_np = rasterize(shapes=goas_v01_gdf_clipped.geometry, out_shape=africa_extent_geobox.shape, transform=africa_extent_geobox.transform)

CPU times: user 2min 52s, sys: 23.2 s, total: 3min 16s
Wall time: 3min 15s


In [10]:
%%time
goas_v01_gdf_clipped_xr = wrap_xr(im=goas_v01_gdf_clipped_np, gbox=africa_extent_geobox)

CPU times: user 6.15 ms, sys: 3.74 ms, total: 9.89 ms
Wall time: 7.18 ms


In [11]:
del africa_extent
del africa_extent_geobox
del goas_v01_gdf
del goas_v01_gdf_clipped
del goas_v01_gdf_clipped_np

In [12]:
%%time
cog_bytes = to_cog(geo_im=goas_v01_gdf_clipped_xr)

CPU times: user 12min 15s, sys: 1min 27s, total: 13min 42s
Wall time: 13min 42s


In [13]:
%%time
s3_client = boto3.client('s3')
s3_client.put_object(Body=cog_bytes,
                     Bucket=bucket,
                     Key=object_key)

[2024-04-24 15:10:53,302] {credentials.py:1180} INFO - Found credentials in environment variables.
CPU times: user 782 ms, sys: 71.8 ms, total: 854 ms
Wall time: 2.66 s


{'ResponseMetadata': {'RequestId': 'WS4E3Y4NDDHWV2JM',
  'HostId': '4G4VtZqePh+fjCa8I3bksowWpwdr8mmV6wuvCRmhj5QpbOzerrcBchQDn5YJfeHWQhH9J2XsyaY=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': '4G4VtZqePh+fjCa8I3bksowWpwdr8mmV6wuvCRmhj5QpbOzerrcBchQDn5YJfeHWQhH9J2XsyaY=',
   'x-amz-request-id': 'WS4E3Y4NDDHWV2JM',
   'date': 'Wed, 24 Apr 2024 15:10:54 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"2a078f548845c93c6971cc408c912335"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"2a078f548845c93c6971cc408c912335"',
 'ServerSideEncryption': 'AES256'}