# Identify Coastal Tiles using the Global Oceans and Seas v01 (2021-12-14) dataset

Flanders Marine Institute (2021). Global Oceans and Seas, version 1. Available online at https://www.marineregions.org/. https://doi.org/10.14284/542.

This dataset is licensed under a [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).

[Disclaimer](https://www.marineregions.org/disclaimer.php)


> **Prerequisites**: The shapefile was downloaded from [here](https://www.vliz.be/en/imis?dasid=7842&doiid=613) and converted into a GeoParquet file using QGIS.

In [1]:
from dotenv import load_dotenv

# Path to env file containing the waterbodies database credentials
# Only necessary on the Sandbox.
dotenv_path = "/home/jovyan/.env"
load_dotenv(dotenv_path=dotenv_path, verbose=True, override=True)

True

In [2]:
import logging

import geopandas as gpd
from datacube import Datacube
from waterbodies.grid import WaterbodiesGrid
from waterbodies.hopper import create_tasks_from_datasets
from waterbodies.io import load_vector_file
from waterbodies.logs import logging_setup

In [3]:
verbose = 3
product_footprint_url = "https://explorer.digitalearth.africa/api/footprint/wofs_ls_summary_alltime"
goas_v01_url = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/land_sea_masks/goas_v1/goas_v01.parquet"
goas_v01_clipped_url = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/land_sea_masks/goas_v1/goas_v01_clipped.parquet"
coastal_tiles_url = "s3://deafrica-waterbodies-dev/waterbodies/v0.0.2/land_sea_masks/goas_v1/waterbodies_coastal_tiles.parquet"

In [4]:
logging_setup(verbose)
_log = logging.getLogger(__name__)

In [5]:
dc = Datacube(app="coastal-tiles")
gridspec = WaterbodiesGrid().gridspec

In [6]:
product_footprint_gdf = load_vector_file(product_footprint_url).to_crs(gridspec.crs)
product_footprint_gdf

Unnamed: 0,dataset_count,product_name,time_spec,geometry
0,4461,wofs_ls_summary_alltime,"[ null, null, null ]","MULTIPOLYGON (((-2112000.000 2399999.998, -211..."


In [7]:
goas_v01_gdf = load_vector_file(goas_v01_url).to_crs(gridspec.crs)
goas_v01_gdf

Unnamed: 0,name,latitude,longitude,min_Y,min_X,max_Y,max_X,area_km2,geometry
0,Southern Ocean,-68.03985,-26.63275,-85.5625,-180.0,-60.0,180.0,6793589,"MULTIPOLYGON (((17367530.445 -6351419.997, 141..."
1,South Atlantic Ocean,-33.73758,-18.83411,-60.0,-69.60084,0.07511,20.0,42815540,"MULTIPOLYGON (((-4889932.764 0.000, -4898670.8..."
2,South Pacific Ocean,-30.09612,-143.06088,-60.0,130.11129,3.39114,-67.26667,90147400,"MULTIPOLYGON (((-6843963.864 -5987437.671, -68..."
3,North Pacific Ocean,26.95013,-169.38334,0.0,117.51622,66.56286,-76.98544,77124830,"MULTIPOLYGON (((15162668.182 6202856.081, 1516..."
4,South China and Easter Archipelagic Seas,5.62943,115.46548,-10.92259,95.43328,25.56728,134.03155,6822162,"MULTIPOLYGON (((11509135.769 3148036.787, 1150..."
5,Indian Ocean,-27.27272,79.60241,-60.0,20.0,31.18586,146.91671,78162363,"MULTIPOLYGON (((5411625.915 3333798.547, 54114..."
6,Mediterranean Region,38.13065,19.70067,30.06809,-6.03255,47.3764,42.35496,2988248,"MULTIPOLYGON (((3142866.248 3666334.372, 31428..."
7,Baltic Sea,58.78478,19.22115,52.65352,9.3656,67.08059,30.3471,415600,"MULTIPOLYGON (((2639423.703 6382686.936, 26395..."
8,North Atlantic Ocean,31.77621,-40.24758,-0.93603,-98.05392,68.63872,12.00594,41741693,"MULTIPOLYGON (((-2839226.735 6814318.697, -283..."
9,Arctic Ocean,79.14792,-3.28568,51.14359,-180.0,90.0,180.0,15571669,"MULTIPOLYGON (((17203129.832 6870188.502, 1720..."


In [8]:
# Clip to the product footprint.
goas_v01_gdf_clipped = gpd.clip(goas_v01_gdf, product_footprint_gdf)
goas_v01_gdf_clipped.to_crs("EPSG:4326").to_parquet(goas_v01_clipped_url)
goas_v01_gdf_clipped

Unnamed: 0,name,latitude,longitude,min_Y,min_X,max_Y,max_X,area_km2,geometry
1,South Atlantic Ocean,-33.73758,-18.83411,-60.0,-69.60084,0.07511,20.0,42815540,"POLYGON ((899352.608 -248048.382, 899273.347 -..."
5,Indian Ocean,-27.27272,79.60241,-60.0,20.0,31.18586,146.91671,78162363,"MULTIPOLYGON (((3158299.965 -3184931.808, 3158..."
8,North Atlantic Ocean,31.77621,-40.24758,-0.93603,-98.05392,68.63872,12.00594,41741693,"MULTIPOLYGON (((-1322294.947 1208979.004, -132..."
6,Mediterranean Region,38.13065,19.70067,30.06809,-6.03255,47.3764,42.35496,2988248,"MULTIPOLYGON (((3446541.900 4126666.361, 34462..."


In [9]:
# Get all the tiles used to generate the Waterbodies Historical Extent.
dc_query = dict(product="wofs_ls_summary_alltime")
datasets = dc.find_datasets(product="wofs_ls_summary_alltime")
tasks = create_tasks_from_datasets(
    datasets=datasets, tile_index_filter=None, bin_solar_day=False
)
tile_indices = [k for task in tasks for k, v in task.items()]
tile_extents = [
    gridspec.tile_geobox(tile_index=tile_index).extent.geom
    for tile_index in tile_indices
]
tile_extents_gdf = gpd.GeoDataFrame(
    data={"tile_index": tile_indices, "geometry": tile_extents}, crs=gridspec.crs
)
tile_extents_gdf.set_index("tile_index", inplace=True)
_log.info(f"Found {len(tile_extents_gdf)} tiles")
tile_extents_gdf.head()

Processing 4461 datasets: 100%|██████████| 4461/4461 [00:02<00:00, 1644.43it/s]


[2024-06-13 19:48:00,410] {3850540251.py:16} INFO - Found 4456 tiles


Unnamed: 0_level_0,geometry
tile_index,Unnamed: 1_level_1
"(171, 99)","POLYGON ((-960000.000 2208000.000, -960000.000..."
"(171, 105)","POLYGON ((-960000.000 2784000.000, -960000.000..."
"(171, 113)","POLYGON ((-960000.000 3552000.000, -960000.000..."
"(172, 85)","POLYGON ((-864000.000 864000.000, -864000.000 ..."
"(172, 86)","POLYGON ((-864000.000 960000.000, -864000.000 ..."


In [10]:
%%time
# Identify all tiles that intersect with goas_v01_gdf_clipped
# This will be the coastal tiles.

coastal_tiles_index = tile_extents_gdf.sjoin(goas_v01_gdf_clipped, predicate="intersects", how="inner").index.to_list()

coastal_tiles = tile_extents_gdf[tile_extents_gdf.index.isin(coastal_tiles_index)]

coastal_tiles.to_crs("EPSG:4326").to_parquet(coastal_tiles_url)

coastal_tiles.reset_index()[["tile_index"]].to_parquet("../../data/coastal_tiles.parquet")

_log.info(f"Found {len(coastal_tiles)} coastal tiles")

[2024-06-13 19:48:22,486] {<timed exec>:10} INFO - Found 1402 coastal tiles
CPU times: user 21.9 s, sys: 6.22 ms, total: 21.9 s
Wall time: 22.1 s
