In [1]:
import os
import geopandas as gpd 
import pandas as pd
from shapely import box

In [2]:
data_dir = "../../data/"

In [3]:
# Load the DE Africa Waterbodies Historical Extent Product.
deafrica_waterbodies_fp =  os.path.join(data_dir, "waterbodies.parquet")
deafrica_waterbodies = gpd.read_parquet(deafrica_waterbodies_fp)
print(f"Loaded {len(deafrica_waterbodies)} DE Africa waterbodies")

Loaded 700712 DE Africa waterbodies


In [4]:
realsat_fp = os.path.join(data_dir, "ReaLSAT-2-0/ReaLSAT.shp")
# The base shapefile contains the reference shape of all the reservoirs
realsat  = gpd.read_file(realsat_fp, bbox=box(*deafrica_waterbodies.total_bounds))
# Filter further to Continent
realsat = realsat[realsat["CONTINENT"]==4].reset_index(drop=True)
print(f"Loaded {len(realsat)} waterbodies")

Loaded 34311 waterbodies


In [5]:
assert deafrica_waterbodies.crs.equals(realsat.crs)

In [6]:
# ID: the unique ID for water body. ID values are used as names of shapefiles that contain monthly shapes.
# RESERVOIR: categorizes water bodies into two sets - 1 
# represents reservoirs manually verified by visual validation, 
# and 0 represents other water bodies. 
# Note that the reservoir list is not exhaustive and water bodies with 0 value 
# could be reservoirs.

# Select LakeATLAS Attributes to keep
sel_cols = ["Lake_name", "Lake_type", "geometry"]

In [7]:
# Identify the resevoirs
reservoirs = realsat[realsat["RESERVOIR"]==1]
print(f"Found {len(reservoirs)} artificial waterbodies")

Found 1942 artificial waterbodies


In [8]:
# Identify the DE Africa waterbodies that are reservoirs based on intersection with the ReaLSAT dataset
reservoir_uids = deafrica_waterbodies.sjoin(reservoirs, how="inner", predicate="intersects")["uid"].unique()

# Set the default to 0 which means is not a reservoir
deafrica_waterbodies["ReaLSAT_Reservoir"] = 0 
deafrica_waterbodies.loc[deafrica_waterbodies["uid"].isin(reservoir_uids), "ReaLSAT_Reservoir"] = 1

print(f"{len(deafrica_waterbodies[deafrica_waterbodies['ReaLSAT_Reservoir'] == 1])} DE Africa waterbodies identified as reservoirs")

2250 DE Africa waterbodies identified as reservoirs


In [9]:
# Export the updated waterbodies
deafrica_waterbodies.to_parquet(os.path.join(data_dir, "deafrica_waterbodies_realsat_update.parquet"))