Notebook collates Antarctic active subglacial lakes from past inventories that included polygons (Smith and others, 2009; Siegfried & Fricker, 2018), latest inventory that includes point lake locations (Livingstone and others, 2022), and individual studies that are not included past inventories to generate the most recent active subglacial lake inventory.

Written 2023-01-17 by W. Sauthoff (sauthoff@mines.edu)

# Setup computing environment

In [None]:
# Install dependencies not pre-installed
%pip install openpyxl --quiet

In [None]:
# Import packages
import fiona
import geopandas as gpd
import glob
import h5py
import math
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from shapely.geometry import Point, Polygon, MultiPolygon
from shapely.ops import unary_union
import pyproj
from pyproj import CRS, Geod, Transformer

# Magic functions
%matplotlib widget

# Define utility functions
def ll2ps(lon, lat):
    """
    Transform coordinates from geodetic coordinates (lon, lat)
    to Antarctic Polar Stereograph coordinates (x, y)
    x, y = ll2ps(lon, lat)
    """
    crs_ll = CRS("EPSG:4326")
    crs_xy = CRS("EPSG:3031")
    ll_to_xy = Transformer.from_crs(crs_ll, crs_xy, always_xy = True)
    x, y = ll_to_xy.transform(lon, lat)
    return x, y

def ps2ll(x, y):
    """
    Transform coordinates from Antarctic Polar Stereograph
    to geodetic (lon, lat) coordinates
    
    lon, lat = ps2ll(x, y)
    """
    crs_ll = CRS("EPSG:4326")
    crs_xy = CRS("EPSG:3031")
    xy_to_ll = Transformer.from_crs(crs_xy, crs_ll, always_xy = True)
    lon, lat = xy_to_ll.transform(x, y)
    return lon, lat

def find_intersections(gdf1, gdf2):
    # Create an empty list to store the results
    intersections = []

    # Iterate over each geometry in gdf1
    for index1, geom1 in gdf1.geometry.items():
        # Compare with each geometry in gdf2
        for index2, geom2 in gdf2.geometry.items():
            if geom1.intersects(geom2):
                # If they intersect, add the indices to the list
                intersections.append((index1, index2))
    return intersections

# Create a Geod object for calculating area on the WGS84 ellipsoid
geod = Geod(ellps="WGS84")

# Define functions

In [None]:
def calculate_area(geometry, crs=None):
    '''
    Calculate geodesic area of polygon or multipolygon
    Can accept either a geometry with associated CRS or a geometry and separate CRS
    
    Parameters:
    -----------
    geometry : shapely.geometry.Polygon/MultiPolygon or GeoDataFrame/GeoSeries
        Input geometry
    crs : pyproj.CRS or str, optional
        Coordinate reference system. Required if geometry doesn't have CRS
        
    Returns:
    --------
    float or None
        Area in square meters, or None if geometry is invalid
    '''
    # Handle GeoDataFrame/GeoSeries input
    if hasattr(geometry, 'crs'):
        crs = geometry.crs
        if isinstance(geometry, (gpd.GeoDataFrame, gpd.GeoSeries)):
            geometry = geometry.geometry.iloc[0]
    
    # Validate CRS
    if crs is None:
        raise ValueError("No CRS provided. Input must either have CRS or CRS must be provided separately")
        
    # Convert CRS to string for comparison if it isn't already
    if not isinstance(crs, str):
        crs = str(crs)
    
    # Check if geometry is in EPSG:4326
    if 'EPSG:4326' not in crs.upper():
        raise ValueError(f"CRS must be EPSG:4326, got {crs}")
    
    # Check if geometry is None or invalid
    if geometry is None or not geometry.is_valid:
        return None
    
    # Calculate area based on geometry type
    if isinstance(geometry, Polygon):
        return abs(geod.polygon_area_perimeter(geometry.exterior.coords.xy[0], geometry.exterior.coords.xy[1])[0])
    elif isinstance(geometry, MultiPolygon):
        total_area = 0
        for part in geometry.geoms:
            total_area += abs(geod.polygon_area_perimeter(part.exterior.coords.xy[0], part.exterior.coords.xy[1])[0])
        return total_area
    else:
        raise ValueError("Input must be either Polygon or MultiPolygon")

# Import past inventories

## Smith and others, 2009

In [None]:
# Import active subglacial lake outlines from Smith and others (2009) (S09)
# As released in Smith and others, 2012 dataset (https://doi.org/10.15784/601439)
fiona.drvsupport.supported_drivers['KML'] = 'rw'
S09_outlines_lonlat = gpd.read_file('input/lake_outlines/Smith2009_lakes/Antarctic_lakes.kml', driver='KML')

# Ensure GeoDataFrame is in EPSG:4326 for geodesic area calculation
if S09_outlines_lonlat.crs != 'EPSG:4326':
    S09_outlines_lonlat = S09_outlines_lonlat.to_crs('EPSG:4326')

# Calculate the geodesic area for each polygon
S09_outlines_lonlat['area (m^2)'] = S09_outlines_lonlat['geometry'].apply(    
    lambda poly: abs(geod.polygon_area_perimeter(
    poly.exterior.coords.xy[0], poly.exterior.coords.xy[1])[0]) if poly is not None and poly.is_valid else None)

# Convert to CRS EPSG:3031
S09_outlines = S09_outlines_lonlat.to_crs(3031)

# Delete original geodataframe
del S09_outlines_lonlat

# Strip the \n characters from the name column
S09_outlines['Name'] = S09_outlines['Name'].str.strip()

# Select only the 'Name', 'geometry', and 'area (m^2)' columns
S09_outlines = S09_outlines[['Name', 'geometry', 'area (m^2)']]

# Display the modified geodataframe
S09_outlines

In [None]:
# Write Smith and others, 2009 outlines to GeoJSON
S09_outlines.to_file('output/lake_outlines/stationary_outlines/Smith2009_outlines.geojson', driver='GeoJSON')

## Siegfried and Fricker, 2018

In [None]:
# Import active subglacial lake outlines from Siegfried & Fricker (2018) (SF18)
# Original pub: https://doi.org/10.1017/aog.2017.36 
# Code for loading lake outlines available in code bank associated with Siegfried & Fricker (2021), https://doi.org/10.1029/2020GL091089: 
# https://github.com/mrsiegfried/Siegfried2021-GRL/blob/main/data/outlines/load_lakes.ipynb

# import subglacial lake outlines (Siegfried & Fricker, 2018)
h5f = h5py.File('input/lake_outlines/SiegfriedFricker2018_lakes/SiegfriedFricker2018-outlines.h5', 'r')
outline_geometries = [] # store polygons
citations = [] # store citation information

# we're going to calculate geodesic lake area because that is often screwed up
# and occasionally incorrect in the literature
areas = []

# we're going to need to do some coordinate transforms for the geodesic area
# define CRS for Antarcica and make a converter from xy to ll
CRS_LL = "EPSG:4326" # wgs84 in lon,lat
CRS_XY = h5f.attrs.get('proj_crs') # get projection from hdf5 file
XY_TO_LL = Transformer.from_crs(CRS_XY, CRS_LL, always_xy = True) # make coord transformer
geod = CRS(CRS_LL).get_geod() # geod object for calculating geodesic area on defined ellipsoid

# look through each lake and load all of it's info
for lake in h5f.keys():
    outline_x = h5f[lake]['x'][:]
    outline_y = h5f[lake]['y'][:]
    outlines_xy = np.stack((outline_x, outline_y),axis=2).reshape(outline_x.shape[1], 2)

    # A single lake with multiple polygons is NaN broken---need to identify and
    # load as a MultiPolygon. Otherwise it's easy (just load as polygon)
    if np.isnan(outlines_xy)[:,0].sum() == 0:
        geometry = Polygon(outlines_xy)
        lon, lat = XY_TO_LL.transform(outlines_xy[:,0], outlines_xy[:,1])
        this_area = abs(geod.polygon_area_perimeter(lon,lat)[0])
    else:
        this_area = 0
        # break at NaN values and load each as separate polygons
        idx = np.where(np.isnan(outlines_xy[:,0]))[0]

        # grab outline of first lake before getting into the loop
        this_outline = outlines_xy[0:idx[0],:]
        pgons = [Polygon(this_outline)] # put the first polygon in a list
        lon,lat = XY_TO_LL.transform(this_outline[:,0], this_outline[:,1])
        this_area += abs(geod.polygon_area_perimeter(lon,lat)[0])/1e6 # add its area
        for i in np.arange(0,len(idx)):
            if i == len(idx)-1:
                this_outline = outlines_xy[idx[i]+1:,:]
            else:
                this_outline = outlines_xy[idx[i]+1:idx[i+1]]

            pgons.append(Polygon(this_outline))
            lon,lat = XY_TO_LL.transform(this_outline[:,0], this_outline[:,1])
            this_area += abs(geod.polygon_area_perimeter(lon,lat)[0])/1e6
        geometry = MultiPolygon(pgons)

    # append all the results in the right place
    outline_geometries.append(geometry)
    citations.append(h5f[lake].attrs.get('citation')[0].decode('UTF-8'))
    areas.append(this_area)

# make a pandas dataframe with all the necessary info
df = pd.DataFrame(zip(h5f.keys(), outline_geometries, areas, citations),
                  columns=['name', 'geometry', 'area (m^2)', 'cite'])
# convert to geopands geodataframe
SF18_outlines = gpd.GeoDataFrame(df, crs=CRS_XY, geometry=outline_geometries)
# close HDF5 file
h5f.close()

# Display geodataframe
SF18_outlines

In [None]:
# Write Siegfried & Fricker, 2018 outlines to GeoJSON
SF18_outlines.to_file('output/lake_outlines/stationary_outlines/SiegfriedFricker2018_outlines.geojson', driver='GeoJSON')

In [None]:
# Copy SF18_outlines to isolate outlines new in SF18 to view added lakes
SF18_outlines_SF18only = SF18_outlines.copy(deep=True)

# Drop rows where 'cite' column equals the specified string
SF18_outlines_SF18only = SF18_outlines_SF18only[~SF18_outlines_SF18only['cite'].eq('Smith and others, 2009, J. Glac., doi:10.3189/002214309789470879')]

# View dataframe
SF18_outlines_SF18only

## Livingstone and others, 2022

In [None]:
# Read subglacial lake point data from Livingstone and others (2022) (L22), https://doi.org/10.1038/s43017-021-00246-9
url = 'https://static-content.springer.com/esm/art%3A10.1038%2Fs43017-021-00246-9/MediaObjects/43017_2021_246_MOESM1_ESM.xlsx'
use_cols = ['Name', 'Lat.  oN', 'Lon. oE', 'Lake Type', 'References', 'Prior Inventory']
import_rows = np.arange(0,676)
L22_points = pd.read_excel(url, sheet_name='Antarctica', usecols=use_cols, skiprows = lambda x: x not in import_rows)

# View just the active lakes of the pandas dataset
L22_activelake_points = L22_points[L22_points['Lake Type'].str.strip().isin(['Active'])]

# Reset the index, dropping the old one
L22_activelake_points = L22_activelake_points.reset_index(drop=True)

# Display geodataframe
L22_activelake_points

In [None]:
# Isolate lakes not previously included in Smith and others (2009; 2017) or Siegfried and Fricker (2018)
L22_activelake_points_new = L22_activelake_points.copy(deep=True)

# Drop rows containing either 'Smith et al. (2009)' or 'Siegfried & Fricker (2018)' with escaped parentheses
L22_activelake_points_new.drop(
    L22_activelake_points_new.loc[L22_activelake_points['References'].str.contains(
        'Smith et al\\. \\(2009\\)|Siegfried & Fricker \\(2018\\)', regex=True)].index, inplace=True)

# Reset the index after removing rows
L22_activelake_points_new = L22_activelake_points_new.reset_index(drop=True)

# Display the updated dataframe
L22_activelake_points_new

In [None]:
# Byrd1 (DL) Byrd1 (DL) were included in Smith and others, 2009 and thus Siegfried & Fricker, 2018 inventories
# Let's confirm this

In [None]:
# Search for rows where the S09 'Name' column contains Byrd lakes
S09_outlines[S09_outlines['Name'].str.contains('Byrd_1|Byrd_2', case=False, na=False)]

In [None]:
# Search for rows where the SF18 'name' column contains Byrd_1 or Byrd_2 and Thwaites lakes
SF18_outlines[SF18_outlines['name'].str.contains('Byrd_1|Byrd_2|Thw_70|Thw_124|Thw_142|Thw_170', case=False, na=False)]

In [None]:
# Remove rows where 'Name' is one of the specified values
names_to_remove = ['Byrd1 (DL)', 'Byrd2 (UL)', 'THW70', 'THW170', 'THW142', 'THW124']

L22_activelake_points_new = L22_activelake_points_new[~L22_activelake_points_new['Name'].isin(names_to_remove)]

# Reset the index after removing the rows
L22_activelake_points_new = L22_activelake_points_new.reset_index(drop=True)

# Display the updated dataframe
L22_activelake_points_new

# Generate Sauthoff and others (2025) inventory of previously identified lakes

## Add lakes from past inventories

In [None]:
# Copy SF18 outline inventory to add entries from more recent publications
# Starting with the most recent active subglacial lake inventory that includes stationary lake outlines
stationary_outlines_gdf = SF18_outlines.copy(deep=True)

In [None]:
# Examine citations column
stationary_outlines_gdf['cite'].unique()

In [None]:
# Change journal abbreviations to ISO4 standard
# Replace TC with Cryosphere
stationary_outlines_gdf = stationary_outlines_gdf.replace('Kim and others, 2016, TC, doi:10.5194/tc-10-2971-2016', 'Kim and others, 2016, Cryosphere, doi:10.5194/tc-10-2971-2016')
stationary_outlines_gdf = stationary_outlines_gdf.replace('Smith and others, 2017, TC, doi:10.5194/tc-11-451-2017', 'Smith and others, 2017, Cryosphere, doi:10.5194/tc-11-451-2017')

# Replace GRL with Geophys. Res. Lett.
stationary_outlines_gdf = stationary_outlines_gdf.replace('McMillan and others, 2013, GRL, doi:10.1002/grl.50689', 'McMillan and others, 2013, Geophys. Res. Lett., doi:10.1002/grl.50689')

# Ensure replacements worked as expected
stationary_outlines_gdf['cite'].unique()

In [None]:
# Find area of previously identified lakes inventoried in SF18 to use as guesstimate 
# for lakes without this information or figures allowing for closer guesstimate
print('mean lake area: ', np.round(np.mean(SF18_outlines['area (m^2)'])/1e6, 1), ' km^2')

In [None]:
# Add entries for previously identified lakes not included in SF18 inventory 
# Used approximated centroid point when lake outline was unavailable

# Smith 2009 Recovery_8 was argued to not be an active subglacial lake in Fricker and others (2010),
# so not included Siegfried & Fricker (2018) outlines; including here to determine if there's been activity since
lake_gdf = S09_outlines[S09_outlines['Name'] == 'Recovery_8']
name = lake_gdf['Name'].values[0]

# S09 outline inventory uses 3D polygons with z dimension vs. 2D polygons in SF18 inventory
# Extract the point values that define the perimeter of the polygon to make polygon without third z dimension
# Extract 2D coordinates (X, Y) from the 3D polygon (X, Y, Z)
xy_coords = [(x, y) for x, y, z in lake_gdf.geometry.values[0].exterior.coords]

# Create a new 2D polygon from these coordinates
lake_poly_2d = Polygon(xy_coords)
geometry = lake_poly_2d

# Store area that was previously calculated to be geodesic area
area = lake_gdf['area (m^2)'].values[0]

# Store citation info from another lake from the same S09 study in the SF18 citation format
cite = SF18_outlines[SF18_outlines['name'] == 'Bindschadler_1'].cite.values[0]

# Make entry into pandas dataframe to concatenate to inventory
gdf = gpd.GeoDataFrame([[name, geometry, area, cite]], columns=stationary_outlines_gdf.columns)

# Set CRS
gdf.crs = stationary_outlines_gdf.crs

# Ensure that new entry isn't already in inventory before adding
gdf_diff = gdf[~gdf['name'].isin(stationary_outlines_gdf['name'])]

# Add entry to inventory
stationary_outlines_gdf = pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True)

In [None]:
# Livingstone and others, 2022 active lakes points (no outlines available):

# Wingham and others, 2006 (within Livingstone and others, 2022 inventory)
# Lakes L1, U1, U2, U3
lake_names = ['L1', 'U1', 'U2', 'U3']
areas = [600e6, 200e6, 200e6, 400e6]  # Lake L1 area estimated in paper, U1-3 guestimated from their Fig. 1

for i, lake_name in enumerate(lake_names):
    # Access the row where the 'Name' matches the current lake name
    lake_row = L22_activelake_points_new.loc[L22_activelake_points_new['Name'] == lake_name]
    
    if not lake_row.empty:  # Proceed only if the lake is found in the dataframe
        name = lake_row.iloc[0]['Name']
        lon = lake_row.iloc[0]['Lon. oE']
        lat = lake_row.iloc[0]['Lat.  oN']
        geometry = Point(ll2ps(lon, lat)).buffer(math.sqrt(areas[i] / math.pi))
        area = areas[i]
        cite = 'Wingham and others, 2006, Nature, doi:10.1038/nature04660'
        
        # Make entry into GeoDataFrame to concatenate to inventory
        gdf = gpd.GeoDataFrame([[name, geometry, area, cite]], columns=stationary_outlines_gdf.columns, geometry=[geometry], crs=stationary_outlines_gdf.crs)
        
        # Ensure that new entry isn't already in inventory before adding to avoid duplicate entry
        gdf_diff = gdf[~gdf['name'].isin(stationary_outlines_gdf['name'])]
        
        # Add entry to inventory
        stationary_outlines_gdf = gpd.GeoDataFrame(pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True), crs=stationary_outlines_gdf.crs)

# N. Young personal comm. (within Wright & Siegert, 2012 and Livingstone and others, 2022 inventories)
# Lakes Site A, Site B, Site C
lake_names = ['“Site A”', '“Site B”', '“Site C”']
area = 200e6  # Estimate based on average of previously identified lakes

for lake_name in lake_names:
    # Access the row where the 'Name' matches the current lake name
    lake_row = L22_activelake_points_new.loc[L22_activelake_points_new['Name'] == lake_name]

    if not lake_row.empty:  # Proceed only if the lake is found in the dataframe
        # Strip curly double quotation marks from names and replace spaces with underscores
        name = lake_row.iloc[0]['Name'].strip('“”').replace(" ", "_")
        lon = lake_row.iloc[0]['Lon. oE']
        lat = lake_row.iloc[0]['Lat.  oN']
        geometry = Point(ll2ps(lon, lat)).buffer(math.sqrt(area / math.pi))
        cite = 'Wright & Siegert, 2012, Antarct. Sci., doi:10.1017/S095410201200048X'
        
        # Create a GeoDataFrame for the new lake
        gdf = gpd.GeoDataFrame([[name, geometry, area, cite]], columns=stationary_outlines_gdf.columns)
        gdf.crs = stationary_outlines_gdf.crs
        
        # Ensure that new entry isn't already in the inventory before adding
        gdf_diff = gdf[~gdf['name'].isin(stationary_outlines_gdf['name'])]
        
        # Add entry to inventory
        stationary_outlines_gdf = gpd.GeoDataFrame(pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True), crs=stationary_outlines_gdf.crs)

# Lacked sufficient data density in altimetry datasets data_counts to include:
# Scambos and others, 2011, Ann. Glac.
# https://doi.org/10.3189/172756411799096204 
# (within Livingstone and others, 2022 inventory)

# Crane Glacier lake

area = 4.5e6  # Area reported in original publication

for i in range(7, 8):
    # name = L22_activelake_points_new.iloc[i]['Name'].split()[0]  # Only use first word to avoid spaces
    name = '_'.join(L22_activelake_points_new.iloc[i]['Name'].split())  # Use all words connected with underscores
    lon = L22_activelake_points_new.iloc[i]['Lon. oE']
    lat = L22_activelake_points_new.iloc[i]['Lat.  oN']
    geometry = Point(ll2ps(lon, lat)).buffer(math.sqrt(area / math.pi))
    cite = 'Scambos and others, 2011, Ann. Glaciol., doi:10.3189/172756411799096204'
    gdf = gpd.GeoDataFrame([[name, geometry, area, cite]], columns=stationary_outlines_gdf.columns)
    gdf.crs = stationary_outlines_gdf.crs
    gdf_diff = gdf[~gdf['name'].isin(stationary_outlines_gdf['name'])]
    stationary_outlines_gdf = pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True)

In [None]:
# Display newly add lakes 
stationary_outlines_gdf.iloc[len(SF18_outlines):]

In [None]:
# Plot newly added lake outlines
fig, ax = plt.subplots()

# Filter rows on the 'name' column
filtered_gdf = stationary_outlines_gdf[stationary_outlines_gdf['name'].isin(['Recovery_8', 'U1', 'U2', 'U3', 'Site_A', 'Site_B', 'Site_C', 'Crane_Glacier'])]

filtered_gdf.boundary.plot(ax=ax, color='blue')
# Iterate through the GeoDataFrame to annotate each polygon.
for idx, row in filtered_gdf.iterrows():
    # Use the centroid of each polygon for the annotation location.
    centroid = row['geometry'].centroid
    ax.annotate(text=row['name'], xy=(centroid.x, centroid.y), xytext=(3, 3), textcoords="offset points")
plt.show()

### Hoffman and others, 2020

In [None]:
# Hoffman and others, 2020, Cryosphere
# https://doi.org/10.5194/tc-14-4603-2020

# Redelineations of two Thwaites lakes

# Obtained lake outlines via email from corresponding author, Hoffman

# Read lake outline geojsons into geodataframes
file_path = 'input/lake_outlines/Hoffman2020_subglacial_lakes/Thw124.geojson'
Hoffman2020_Thw124 = gpd.read_file(file_path)

file_path = 'input/lake_outlines/Hoffman2020_subglacial_lakes/Thw142.geojson'
Hoffman2020_Thw142 = gpd.read_file(file_path)

In [None]:
fig, ax = plt.subplots()

stationary_outlines_gdf[stationary_outlines_gdf['name'].isin(['Thw_70', 'Thw_124', 'Thw_142', 'Thw_170'])].boundary.plot(ax=ax)
Hoffman2020_Thw124.boundary.plot(ax=ax, color='red')
Hoffman2020_Thw142.boundary.plot(ax=ax, color='red')
plt.show()

Hoffman and others redelineated Thw124 and Thw142; however outlines are largely similar to Smith and others, 2017 delineations and outlines were not publicly available at time of investigation, so we use the Smith others, 2017 delineations contained in SF18 inventory

In [None]:
# Hoffman and others, 2020 (within Livingstone and others, 2022 inventory as lake points)
# but multiple lakes aggregated into single Haynes Glacier (HG) point

# Takahe lakes (TL; named after nearby Mt. Takahe) on Haynes Glacier (HG) & Western Thwaites (WT) lake
# detailed in Hoffman and others, 2020 supplement
# https://tc.copernicus.org/articles/14/4603/2020/tc-14-4603-2020-supplement.pdf

# Make list of lake names
lake_names = ['TL96', 'TL108', 'TL115', 'TL122', 'WT']

for lake_name in lake_names:
    # Load geojson into geodataframe
    lake_geojson_path = f'input/lake_outlines/Hoffman2020_subglacial_lakes/{lake_name}.geojson'
    lake_gdf = gpd.read_file(lake_geojson_path)

    # Extract name and geometry
    name = lake_gdf['id'].values[0]
    geometry = lake_gdf['geometry'].iloc[0]
    
    # Convert CRS to 4326 for geodesic area calculations
    lake_gdf = lake_gdf.to_crs('EPSG:4326')
    area = lake_gdf['geometry'].apply(lambda poly: abs(geod.polygon_area_perimeter(poly.exterior.coords.xy[0], poly.exterior.coords.xy[1])[0]) if poly is not None and poly.is_valid else None)[0]
    
    # Convert CRS back to 3031 for plotting
    lake_gdf = lake_gdf.to_crs('EPSG:3031')
    cite = 'Hoffman and others, 2020, Cryosphere, doi:10.5194/tc-14-4603-2020'
    
    # Create a GeoDataFrame for the current lake and append it if it's not already present
    gdf = gpd.GeoDataFrame([[name, geometry, area, cite]], columns=stationary_outlines_gdf.columns)
    gdf.crs = stationary_outlines_gdf.crs
    gdf_diff = gdf[~gdf['name'].isin(stationary_outlines_gdf['name'])]
    stationary_outlines_gdf = pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True)

In [None]:
fig, ax = plt.subplots()

# Filter rows on the 'name' column
filtered_gdf = stationary_outlines_gdf[stationary_outlines_gdf['name'].isin(lake_names)]

filtered_gdf.boundary.plot(ax=ax, color='blue')
# Iterate through the GeoDataFrame to annotate each polygon.
for idx, row in filtered_gdf.iterrows():
    # Use the centroid of each polygon for the annotation location.
    centroid = row['geometry'].centroid
    ax.annotate(text=row['name'], xy=(centroid.x, centroid.y), xytext=(3, 3), textcoords="offset points")
plt.show()

### Neckel and others, 2021

In [None]:
# Neckel and others, 2021, Geophys. Res. Lett.
# (publication not included in Livingstone and others, 2022 inventory)
# https://doi.org/10.1029/2021GL094472

# New active lakes on Jutulstraumen Glacier (Antarctica)

# Data set
# https://doi.org/10.1594/PANGAEA.927120

# Read shape file into geodataframe
file_path = 'input/lake_outlines/Neckel2021_Jutulstraumen_lakes/JG_interferometry_lake_outlines.shp'
Neckel2021_outlines = gpd.read_file(file_path)

# Display geodataframe
Neckel2021_outlines

In [None]:
# Drop columns unnecessary to our analysis
Neckel2021_outlines.drop(columns=['Date', 'Year', 'Month', 'Movement', 'Lon', 'Lat'], inplace=True)

# Rename 'Feature' col to 'Name' to match stationary_outlines_gdf
Neckel2021_outlines = Neckel2021_outlines.rename(columns={'Feature': 'name'})

# Display geodataframe
Neckel2021_outlines

In [None]:
fig, ax = plt.subplots()

Neckel2021_outlines.boundary.plot(ax=ax)

# Adding labels to the center of each polygon
for idx, row in Neckel2021_outlines.iterrows():
    centroid = row.geometry.centroid
    ax.text(centroid.x, centroid.y, row['name'], fontsize=12, ha='center', va='center')

In [None]:
# This is the only set of previously identified lakes that has spatiotemporally evolving outlines available as a data set
# For simplicity, we collapse this spatiotemporal variability into a stationary outline that is 
# the unary union of all outlines of each lake

# Define the two sets of criteria
criterias = [['D2_b', 'E1'], ['E2', 'F2']]

for criteria in criterias:
    # Filter rows where 'Feature' matches the current set of criteria
    rows_to_combine = Neckel2021_outlines[Neckel2021_outlines['name'].isin(criteria)]

    # Check if we have at least two polygons to combine for the current criteria
    if len(rows_to_combine) >= 2:
        # Perform the union of the polygons
        combined_polygon = rows_to_combine['geometry'].union_all()

        # Create a new row with the combined geometry and any other necessary attributes
        new_row_data = {
            'geometry': combined_polygon,
            'name': 'Combined_' + '_'.join(criteria),  # Example of setting the Feature column
            # Set other attributes as needed
        }
        new_row = gpd.GeoDataFrame([new_row_data], crs=gdf.crs)

        # Use pandas.concat instead of append
        Neckel2021_outlines = pd.concat([Neckel2021_outlines, new_row], ignore_index=True)

    # Ensure you reference the correct GeoDataFrame when removing rows
    Neckel2021_outlines = Neckel2021_outlines[~Neckel2021_outlines['name'].isin(criteria)]

# Add citation column
Neckel2021_outlines['cite'] = 'Neckel and others, 2021, Geophys. Res. Lett., doi:10.1029/2021GL094472'

# Reset the index of the GeoDataFrame after all operations
Neckel2021_outlines = Neckel2021_outlines.reset_index(drop=True)

# Ensure GeoDataFrame is in EPSG:4326 for geodesic area calculation
if Neckel2021_outlines.crs != 'EPSG:4326':
    Neckel2021_outlines = Neckel2021_outlines.to_crs("EPSG:4326")
    
# Calculate the geodesic area for each polygon
Neckel2021_outlines['area (m^2)'] = Neckel2021_outlines.geometry.apply(
    lambda poly: abs(geod.polygon_area_perimeter(
        poly.exterior.coords.xy[0], poly.exterior.coords.xy[1])[0]) if poly is not None and poly.is_valid else None)

# Convert to crs to epsg:3031
Neckel2021_outlines = Neckel2021_outlines.to_crs(3031)

# Add a prefix to all the names to indicate their locations on Jutulstraumen Glacier
Neckel2021_outlines['name'] = 'JG_' + Neckel2021_outlines['name']

# Add to stationary_outlines_gdf
gdf_diff = Neckel2021_outlines[~Neckel2021_outlines['name'].isin(stationary_outlines_gdf['name'])]
stationary_outlines_gdf = pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True)

In [None]:
Neckel2021_outlines

In [None]:
fig, ax = plt.subplots()

# Filter rows on the 'name' column
filtered_gdf = stationary_outlines_gdf[stationary_outlines_gdf['name'].isin(Neckel2021_outlines['name'].tolist())]

filtered_gdf.boundary.plot(ax=ax, color='blue')
# Iterate through the GeoDataFrame to annotate each polygon.
for idx, row in filtered_gdf.iterrows():
    # Use the centroid of each polygon for the annotation location.
    centroid = row['geometry'].centroid
    ax.annotate(text=row['name'], xy=(centroid.x, centroid.y), xytext=(3, 3), textcoords="offset points")
plt.show()

### Siegfried and Fricker, 2021

In [None]:
# Siegfried and Fricker, 2021, Geophys. Res. Lett.
# (publication not included in Livingstone and others, 2022 inventory)
# https://doi.org/10.1029/2020GL091089

# New active lakes near previously identified lakes on Mercer and Whillans Ice Streams

# These are discussed but no figures contain outlines; Lead author, Siegfried, confirmed no outlines were generated
# Guestimated centroid point and approximate area

# Lower Conway Subglacial Lake
name = 'LowerConwaySubglacialLake'
area = 10e6
geometry = Point(-308000, -509000).buffer(math.sqrt(area / math.pi))
cite = 'Siegfried and Fricker, 2021, Geophys. Res. Lett., doi:10.1029/2020GL091089'
gdf = gpd.GeoDataFrame([[name, geometry, area, cite]], columns=stationary_outlines_gdf.columns, geometry=[geometry])
gdf.crs = stationary_outlines_gdf.crs
gdf_diff = gdf[~gdf['name'].isin(stationary_outlines_gdf['name'])]
stationary_outlines_gdf = gpd.GeoDataFrame(pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True), crs=stationary_outlines_gdf.crs)

# Lower Subglacial Lake Mercer
name = 'LowerMercerSubglacialLake'
area = 10e6
geometry = Point(-282000, -502000).buffer(math.sqrt(area / math.pi))
cite = 'Siegfried and Fricker, 2021, Geophys. Res. Lett., doi:10.1029/2020GL091089'
gdf = gpd.GeoDataFrame([[name, geometry, area, cite]], columns=stationary_outlines_gdf.columns, geometry=[geometry])
gdf.crs = stationary_outlines_gdf.crs
gdf_diff = gdf[~gdf['name'].isin(stationary_outlines_gdf['name'])]
stationary_outlines_gdf = gpd.GeoDataFrame(pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True), crs=stationary_outlines_gdf.crs)

In [None]:
fig, ax = plt.subplots()

# Filter rows on the 'name' column
filtered_gdf = stationary_outlines_gdf[stationary_outlines_gdf['name'].isin(['MercerSubglacialLake', 'LowerMercerSubglacialLake', 'ConwaySubglacialLake', 'LowerConwaySubglacialLake'])]

filtered_gdf.boundary.plot(ax=ax, color='blue')
# Iterate through the GeoDataFrame to annotate each polygon.
for idx, row in filtered_gdf.iterrows():
    # Use the centroid of each polygon for the annotation location.
    centroid = row['geometry'].centroid
    ax.annotate(text=row['name'], xy=(centroid.x, centroid.y), xytext=(3, 3), textcoords="offset points")
plt.show()

### Freer and others, 2024

In [None]:
# Freer and others, 2024, J. Geophys. Res. Earth Surf.
# https://doi.org/10.1029/2024JF007724
# (study published after Livingstone and others, 2022 inventory)

# New active lakes near previously identified lakes on Whillans ice plain

# Lake outline files obtained via email from corresponding author, Freer

# Read shape files into separate geodataframes
sle_outline = gpd.read_file('input/lake_outlines/Freer2024_lakes/SLE-outline-BF.shp')
feeder_outline = gpd.read_file('input/lake_outlines/Freer2024_lakes/feeder-lake-outline-BF.shp')

# Concatenate the two geodataframes
Freer2024_outlines = pd.concat([sle_outline, feeder_outline], ignore_index=True)

# Convert CRS to 3031
Freer2024_outlines = Freer2024_outlines.to_crs('EPSG:3031')

# Display geodataframe
Freer2024_outlines

In [None]:
fig, ax = plt.subplots()

Freer2024_outlines.boundary.plot(ax=ax, color='blue')
SF18_outlines[SF18_outlines['name'] == 'EngelhardtSubglacialLake'].boundary.plot(ax=ax, color='k')

# Adding labels to the center of each polygon
for idx, row in Freer2024_outlines.iterrows():
    centroid = row.geometry.centroid
    ax.text(centroid.x, centroid.y, row['names'], fontsize=12, ha='center', va='center')

In [None]:
# View current SLE lake
stationary_outlines_gdf[stationary_outlines_gdf['name'] == 'EngelhardtSubglacialLake']

In [None]:
# Store SLE original name and citation
SLE_original_name = stationary_outlines_gdf[stationary_outlines_gdf['name'] == 'EngelhardtSubglacialLake']['name'].iloc[0]
SLE_original_cite = stationary_outlines_gdf[stationary_outlines_gdf['name'] == 'EngelhardtSubglacialLake']['cite'].iloc[0]

# Remove older SLE delineation from stationary_outlines_gdf
print(len(stationary_outlines_gdf[stationary_outlines_gdf['name'] == 'EngelhardtSubglacialLake']))
stationary_outlines_gdf = stationary_outlines_gdf.drop(stationary_outlines_gdf[stationary_outlines_gdf['name'] == 'EngelhardtSubglacialLake'].index)
print(len(stationary_outlines_gdf[stationary_outlines_gdf['name'] == 'EngelhardtSubglacialLake']))

In [None]:
# Iterate over each row of the Freer2024_outlines GeoDataFrame
for i, row in Freer2024_outlines.iterrows():
    # Extract name and geometry for each row
    if i == 0:
        name = SLE_original_name
    else:
        name = 'UpperEngelhardtSubglacialLake'
    geometry = row['geometry']
    
    # Create a temporary GeoDataFrame for the current lake geometry
    lake_gdf = gpd.GeoDataFrame([[geometry]], columns=['geometry'], geometry='geometry', crs=Freer2024_outlines.crs)

    # Convert CRS to 4326 for geodesic area calculations
    lake_gdf = lake_gdf.to_crs('EPSG:4326')
    
    # Calculate the area using geodesic area calculation
    area = lake_gdf['geometry'].apply(
        lambda poly: abs(geod.polygon_area_perimeter(
            poly.exterior.coords.xy[0], poly.exterior.coords.xy[1])[0]) if poly is not None and poly.is_valid else None)[0]
    
    # Convert CRS to 3031 for plotting
    lake_gdf = lake_gdf.to_crs('EPSG:3031')
    
    # Set citation information
    if i == 0:
        cite = SLE_original_cite + '; Freer and others, 2024, J. Geophys. Res. Earth Surf., doi:10.1029/2024JF007724'
    else:
        cite = 'Freer and others, 2024, J. Geophys. Res. Earth Surf., doi:10.1029/2024JF007724'
    
    # Create a GeoDataFrame for the current lake and append it to the static lakes inventory
    gdf = gpd.GeoDataFrame([[name, geometry, area, cite]], columns=stationary_outlines_gdf.columns, geometry=[geometry], crs=lake_gdf.crs)
    
    # Ensure the new lake entry isn't already in the stationary_outlines_gdf (to avoid duplicates)
    gdf_diff = gdf[~gdf['name'].isin(stationary_outlines_gdf['name'])]
    
    # Concatenate the new entry to the stationary_outlines_gdf
    stationary_outlines_gdf = gpd.GeoDataFrame(pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True), crs=stationary_outlines_gdf.crs)

# Display the updated stationary_outlines_gdf
stationary_outlines_gdf

### Arthur and others, 2025

In [None]:
# Arthur and others, 2025, Active subglacial lakes along Dronning Maud Land
# (study published after Livingstone and others, 2022 inventory)
# https://doi.org/10.5194/tc-19-375-2025
# Lake outline files:
# Arthur and others, 2024, Active subglacial lakes along Dronning Maud Land derived from ICESat-2 and ICESat 
# https://doi.org/10.21334/npolar.2024.ab777130

# Read shape file into geodataframe
file_path = 'input/lake_outlines/Arthur2025_lakes/DML_Lakes_Combined.shp'
Arthur2025_outlines = gpd.read_file(file_path)

# Display geodataframe
Arthur2025_outlines

In [None]:
# Keep only the 'name' and 'geometry' columns
Arthur2025_outlines = Arthur2025_outlines[['Name', 'geometry']]

# Rename the column 'OBJECTID' to 'name'
Arthur2025_outlines = Arthur2025_outlines.rename(columns={'Name': 'name'})

# Display the updated GeoDataFrame
Arthur2025_outlines

In [None]:
fig, ax = plt.subplots()

Arthur2025_outlines.boundary.plot(ax=ax)

# Adding labels to the center of each polygon
for idx, row in Arthur2025_outlines.iterrows():
    centroid = row.geometry.centroid
    ax.text(centroid.x, centroid.y, row['name'], fontsize=12, ha='center', va='center')

In [None]:
# Iterate over each row of the Arthur2025_outlines GeoDataFrame
for index, row in Arthur2025_outlines.iterrows():
    # Extract name and geometry for each row
    name = row['name']
    geometry = row['geometry']
    
    # Create a temporary GeoDataFrame for the current lake geometry
    lake_gdf = gpd.GeoDataFrame([[geometry]], columns=['geometry'], geometry='geometry', crs=Arthur2025_outlines.crs)
    
    # Convert CRS to 4326 for geodesic area calculations
    lake_gdf = lake_gdf.to_crs('EPSG:4326')
    
    # Calculate the area using geodesic area calculation
    area = lake_gdf['geometry'].apply(
        lambda poly: abs(geod.polygon_area_perimeter(
            poly.exterior.coords.xy[0], poly.exterior.coords.xy[1])[0]) if poly is not None and poly.is_valid else None
    )[0]
    
    # Convert CRS back to 3031 for plotting
    lake_gdf = lake_gdf.to_crs('EPSG:3031')
    
    # Set citation information
    cite = 'Arthur and others, 2025, Cryosphere, doi:10.5194/tc-19-375-2025'
    
    # Create a GeoDataFrame for the current lake and append it to the static lakes inventory
    gdf = gpd.GeoDataFrame([[name, geometry, area, cite]], columns=stationary_outlines_gdf.columns, geometry=[geometry], crs=lake_gdf.crs)
    
    # Ensure the new lake entry isn't already in the stationary_outlines_gdf (to avoid duplicates)
    gdf_diff = gdf[~gdf['name'].isin(stationary_outlines_gdf['name'])]
    
    # Concatenate the new entry to the stationary_outlines_gdf
    stationary_outlines_gdf = gpd.GeoDataFrame(pd.concat([stationary_outlines_gdf, gdf_diff], ignore_index=True), crs=stationary_outlines_gdf.crs)

# Display the updated stationary_outlines_gdf
stationary_outlines_gdf

# Finalize Sauthoff and others (2025) inventory of re-examined lakes

In [None]:
# Find invalid geometries
invalid_rows = stationary_outlines_gdf[~stationary_outlines_gdf.is_valid]

# Display the invalid rows
print(invalid_rows)

In [None]:
# Repair invalid geometries
stationary_outlines_gdf.loc[~stationary_outlines_gdf.is_valid, 'geometry'] = stationary_outlines_gdf[~stationary_outlines_gdf.is_valid].geometry.buffer(0)

# Check if there are still any invalid geometries
print(stationary_outlines_gdf[~stationary_outlines_gdf.is_valid])

In [None]:
# Detemine if there are any duplicates
def find_overlapping_geometries(gdf):
    """
    Find overlapping geometries in a GeoDataFrame.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        Input GeoDataFrame with a 'geometry' column.

    Returns
    -------
    list of tuples
        Each tuple is (index1, index2) where geometries overlap.
    """
    overlaps = []
    sindex = gdf.sindex  # spatial index for speed

    for idx, geom in gdf.geometry.items():  # <-- changed here
        if geom is None:
            continue

        # Candidate matches by bounding box
        possible_matches_index = list(sindex.intersection(geom.bounds))
        possible_matches = gdf.iloc[possible_matches_index]

        for jdx, other_geom in possible_matches.geometry.items():  # <-- changed here
            if idx >= jdx:  # avoid self & duplicate pairs
                continue
            if geom.intersects(other_geom):
                overlaps.append((idx, jdx))

    return overlaps

overlaps = find_overlapping_geometries(stationary_outlines_gdf)

if overlaps:
    print("Overlapping geometry pairs found:")
    for i, j in overlaps:
        print(f"Row {i} overlaps with Row {j}")
else:
    print("No overlaps found.")


In [None]:
# View lakes determined to have overlap
print(stationary_outlines_gdf.iloc[0]['name'], 'overlaps with', stationary_outlines_gdf.iloc[90]['name'])
print(stationary_outlines_gdf.iloc[1]['name'], 'overlaps with', stationary_outlines_gdf.iloc[91]['name'])

In [None]:
# Print citation information for lakes determined to have overlap
print(stationary_outlines_gdf.iloc[0]['cite'])
print(stationary_outlines_gdf.iloc[90]['cite'])
print(stationary_outlines_gdf.iloc[1]['cite'])
print(stationary_outlines_gdf.iloc[91]['cite'])

In [None]:
S09_outlines[S09_outlines['Name'].str.contains('Mac', case=False, na=False)]

In [None]:
# Select the four rows
rows_to_plot = stationary_outlines_gdf.iloc[[0, 90, 1, 91]]

bindschadlers = S09_outlines[S09_outlines['Name'].isin(['Bindschadler_1', 'Bindschadler_2', 'Bindschadler_3', 
                                                        'Bindschadler_4', 'Bindschadler_5', 'Bindschadler_6'])]

# Select the specific geometry from S09_outlines
bindschadler1 = S09_outlines[S09_outlines['Name'] == 'Bindschadler_1']
bindschadler2 = S09_outlines[S09_outlines['Name'] == 'Bindschadler_2']
bindschadler5 = S09_outlines[S09_outlines['Name'] == 'Bindschadler_5']

# Select the specific geometry from SF18_outlines
Bindschadler_1 = SF18_outlines[SF18_outlines['name'] == 'Bindschadler_1']
Bindschadler_2 = SF18_outlines[SF18_outlines['name'] == 'Bindschadler_2']
Mac7 = SF18_outlines[SF18_outlines['name'] == 'Mac7']
Mac8 = SF18_outlines[SF18_outlines['name'] == 'Mac8']

# Create a figure
fig, ax = plt.subplots()

bindschadlers.plot(ax=ax, facecolor='blue', edgecolor='none', linewidth=2)

# Plot S09 geometries in a distinct color
bindschadler1.plot(ax=ax, facecolor='none', edgecolor='red', lw=2)
bindschadler2.plot(ax=ax, facecolor='none', edgecolor='green', lw=2)
bindschadler5.plot(ax=ax, facecolor='none', edgecolor='purple', lw=2)

# Plot SF18 geometries in a distinct color
Bindschadler_1.plot(ax=ax, facecolor='red', edgecolor='none', lw=2, ls='--')
Bindschadler_2.plot(ax=ax, facecolor='green', edgecolor='none', lw=2, ls='--')
Mac7.plot(ax=ax, facecolor='none', edgecolor='blue', lw=2, ls='--')
Mac8.plot(ax=ax, facecolor='none', edgecolor='blue', lw=2, ls='--')

ax.set_title('Duplicated Stationary Outlines')
plt.show()

Two lakes (Bindschadler_1 and Bindschadler_2) were renamed in [Carter and others (2011)](https://doi.org/10.3189/002214311798843421) to Mac7 and Mac8 likely under the assumption that these two lakes are within the subglacial watershed of MacAyeal Ice Stream  instead of the Bindschadler Ice Stream; however, the renaming was not documented in that paper and so these lake outlines were duplicative to Bindschadler_1 and _2 in the SF18 inventory. 

Watershed delineation indicates that one of these lakes is likely in the MacAyeal subglacial watershed; however, we feel this renaming is unnecessary and confusing, so we opt to use the original names for these two lakes (Bindschadler_1 and _2).

In [None]:
# Drop Mac7 and Mac8 outlines from stationary_outlines_gdf that were renamed Bindschadler_1 and _2, 
# which were not removed from the SF18 stationary outlines inventory and thus duplicated in that inventory
print(len(stationary_outlines_gdf))
stationary_outlines_gdf = stationary_outlines_gdf[~stationary_outlines_gdf['name'].isin(['Mac7', 'Mac8'])]
print(len(stationary_outlines_gdf))

[Carter and others (2013)](https://doi.org/10.3189/2013JoG13J085) defined Lake7 and Lake8 (originally from [Fricker and Scambos (2009)](https://doi.org10.3189/002214309788608813)) as a lake system, Lake78.

In [None]:
# Ensure the reference for this lake reflects that
SF18_outlines[SF18_outlines['cite'].str.contains('Carter and others, 2013', case=False, na=False)]

## Double check area calculations

In [None]:
discrepancies = []
# Convert GeoDataFrame to EPSG:4326 for geodesic area calculation
stationary_outlines_gdf = stationary_outlines_gdf.to_crs('4326')
# Define tolerance
tolerance = 1e-5

for idx, row in stationary_outlines_gdf.iterrows():
    calculated_area = calculate_area(row.geometry, stationary_outlines_gdf.crs)
    stored_area = row['area (m^2)']
    
    # Skip if either value is None
    if calculated_area is None or stored_area is None:
        continue
        
    # Compare areas with relative tolerance
    relative_diff = abs(calculated_area - stored_area) / stored_area
    if relative_diff > tolerance:
        discrepancies.append({
            'name': row['name'],
            'stored_area_m2': stored_area,
            'calculated_area_m2': calculated_area,
            'relative_difference': relative_diff,
            'difference_percentage': relative_diff * 100
        })

# Create DataFrame from discrepancies
discrepancies_df = pd.DataFrame(discrepancies)

# Display the DataFrame with better formatting
if not discrepancies_df.empty:
    # Round numerical columns for better readability
    discrepancies_df['stored_area_m2'] = discrepancies_df['stored_area_m2'].round(2)
    discrepancies_df['calculated_area_m2'] = discrepancies_df['calculated_area_m2'].round(2)
    discrepancies_df['relative_difference'] = discrepancies_df['relative_difference'].round(2)
    discrepancies_df['difference_percentage'] = discrepancies_df['difference_percentage'].round(2)
    
    # Set pandas display options to prevent scientific notation
    pd.set_option('display.float_format', '{:.4f}'.format)
    
    print(f"Found {len(discrepancies_df)} discrepancies:")
    print(discrepancies_df.to_string(index=False))
    
    # Reset display options to default
    pd.reset_option('display.float_format')
else:
    print("No discrepancies found above the tolerance threshold.")

Discrepency at Lake78 appears to be a conversion from m^2 to km^2 because the relative difference is nearly 1e6. We will replace with a new calculation of geodesic area.

Discrepencies at Mac3-4 and Rec4 are small (around 1% or less), but still large enough to replace with a new calculation of geodesic area. 

Discrepencies at L1 and other lakes are due to the fact that we used a planar area to estimate a perfect circle outline around the reported point location of the lake to a reported or estimated area to have a stationary outline to use as a target for the evolving outlines algorithm. These outlines do not need to be altered as they were mere estimates and only used in the interim as a target for finding evolving outlines.

In [None]:
# List of lakes to update
lakes_to_update = ['Lake78', 'Mac3', 'Mac4', 'Rec4']

# Ensure GDF is in EPSG:4326
if stationary_outlines_gdf.crs.to_string().upper() != 'EPSG:4326':
    stationary_outlines_gdf = stationary_outlines_gdf.to_crs('4326')

# Update areas for specified lakes
for idx, row in stationary_outlines_gdf.iterrows():
    if row['name'] in lakes_to_update:
        calculated_area = calculate_area(row.geometry, stationary_outlines_gdf.crs)
        stationary_outlines_gdf.at[idx, 'area (m^2)'] = calculated_area

# Reproject to Antarctic polar stereographic projection
stationary_outlines_gdf = stationary_outlines_gdf.to_crs('3031')

## Determine CryoSat-2 SARIn coverage

In [None]:
# Import CryoSat-2 SARIn mode mask
# See 0_preprocess_data.ipynb for data source and pre-processing steps
gdf_SARIn_3_1 = gpd.read_file('output/CryoSat2_SARIn_mode_masks/gdf_SARIn_3_1.geojson')
gdf_SARIn_3_1_3_6_diff= gpd.read_file('output/CryoSat2_SARIn_mode_masks/gdf_SARIn_3_1_3_6_diff.geojson')

In [None]:
# Plot stationary_outlines_gdf with CS2 SARIn mode mask
fig, ax = plt.subplots(figsize=(10,10))
gdf_SARIn_3_1.plot(ax=ax, edgecolor='red', facecolor='red', label='SARIn mode mask 3.1', alpha=0.5)
gdf_SARIn_3_1_3_6_diff.plot(ax=ax, edgecolor='red', facecolor='red', label='SARIn mode mask 3.6', alpha=0.5)
stationary_outlines_gdf.plot(ax=ax, edgecolor='cyan', facecolor='cyan')
plt.show()

In [None]:
# Create a column to indicate the time period each lake has CS2 SARIn coverage if any

# Combining the SARIn mode masks the occur during the CS2-IS2 analysis time period (2010-2023)
combined_geom = unary_union([gdf_SARIn_3_1, gdf_SARIn_3_1_3_6_diff])

# Creating a new GeoDataFrame with the combined geometry
gdf_SARIn_3_1_3_6 = gpd.GeoDataFrame(geometry=[combined_geom])
gdf_SARIn_3_1_3_6.crs = 'epsg:3031'

# Use the 'within' method to find lakes that have CS2 SARIn coverage during the different SARIn mode mask versions
# See 0_preprocess_data.ipynb for more info
CS2_SARIn_3_1_col = stationary_outlines_gdf.geometry.within(gdf_SARIn_3_1.geometry.iloc[0])
CS2_SARIn_3_1_3_6_col = stationary_outlines_gdf.geometry.within(gdf_SARIn_3_1_3_6.geometry.iloc[0])

# Conditions
condition1 = (CS2_SARIn_3_1_col == True) & (CS2_SARIn_3_1_3_6_col == True)
condition2 = (CS2_SARIn_3_1_col == False) & (CS2_SARIn_3_1_3_6_col == True)

# Choices based on the conditions
choices = ['2010.5', '2013.75']

# Use np.select to apply conditions and choices
stationary_outlines_gdf['CS2_SARIn_start'] = np.select([condition1, condition2], choices, default=pd.NA)

## TODO

In [None]:
# View lakes added to Siegfried & Fricker, 2018 inventory to create revised inventory

# Drop outlines from stationary_outlines_gdf that are part of SF18_outlines (indexes 0:130)
# and Recovery_8 (idx 131) originally from S09_outlines
stationary_outlines_notinSF18_gdf = stationary_outlines_gdf.copy(deep=True)
stationary_outlines_notinSF18_gdf.drop(stationary_outlines_notinSF18_gdf.iloc[0:132].index, inplace=True)

# View dataframe
stationary_outlines_notinSF18_gdf

In [None]:
# Sort the GeoDataFrame by the 'name' column
stationary_outlines_gdf = stationary_outlines_gdf.sort_values(by='name')

# Resetting the index of the sorted GeoDataFrame
stationary_outlines_gdf = stationary_outlines_gdf.reset_index(drop=True)

In [None]:
# Display geodataframe
stationary_outlines_gdf

## Export

In [None]:
# Export
stationary_outlines_gdf.to_file('output/lake_outlines/stationary_outlines/stationary_outlines_gdf.geojson', driver='GeoJSON')