# Rasterize the Global Oceans and Seas v01 (2021-12-14) dataset

Flanders Marine Institute (2021). Global Oceans and Seas, version 1. Available online at https://www.marineregions.org/. https://doi.org/10.14284/542.

This dataset is licensed under a [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).

[Disclaimer](https://www.marineregions.org/disclaimer.php)

> **Note: Run this notebook on the 8XL Analysis Sandbox or 16XL Analysis Sandbox**

In [None]:
from dotenv import load_dotenv

# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
dotenv_path = "/home/jovyan/.env"
load_dotenv(dotenv_path=dotenv_path, verbose=True, override=True)

In [None]:
import logging
import os

import boto3
import geopandas as gpd
from datacube.utils.cog import to_cog
from odc.geo.geobox import GeoBox
from odc.geo.geom import Geometry
from odc.geo.xr import wrap_xr
from rasterio.features import rasterize
from waterbodies.grid import WaterbodiesGrid
from waterbodies.logs import logging_setup
from waterbodies.io import get_filesystem, load_vector_file

In [None]:
verbose = 3
bucket = "deafrica-waterbodies-dev"
object_key = "waterbodies/v0.0.2/land_sea_masks/goas_v01.tif"

In [None]:
# Set up logging.
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [None]:
gridspec = WaterbodiesGrid().gridspec

In [None]:
product_footprint = gpd.read_file("https://explorer.digitalearth.africa/api/footprint/wofs_ls_summary_alltime").to_crs(gridspec.crs)
product_footprint_geobox = GeoBox.from_geopolygon(geopolygon=Geometry(geom=product_footprint.geometry.iloc[0], crs=product_footprint.crs), resolution=gridspec.resolution, crs=gridspec.crs)

In [None]:
%%time
# Load the dataset.
url = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/land_sea_masks/goas_v1/goas_v01.parquet"
goas_v01_gdf = load_vector_file(url).to_crs(gridspec.crs)

In [None]:
%%time
# Clip to the product footprint
goas_v01_gdf_clipped = gpd.clip(goas_v01_gdf, product_footprint)

In [None]:
# Save the clipped dataset to a parquet file.
url = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/land_sea_masks/goas_v1/goas_v01_africa.parquet"
goas_v01_gdf_clipped.to_crs("EPSG:4326").to_parquet(url)

In [None]:
%%time
# Rasterize the clipped dataset.
goas_v01_gdf_clipped_np = rasterize(shapes=goas_v01_gdf_clipped.geometry, out_shape=product_footprint_geobox.shape, transform=product_footprint_geobox.transform)

In [None]:
%%time
goas_v01_gdf_clipped_xr = wrap_xr(im=goas_v01_gdf_clipped_np, gbox=product_footprint_geobox)

In [None]:
%%time
cog_bytes = to_cog(geo_im=goas_v01_gdf_clipped_xr)

In [None]:
%%time
s3_client = boto3.client('s3')
s3_client.put_object(Body=cog_bytes,
                     Bucket=bucket,
                     Key=object_key)