Notebook generates statistics used in article text, data in Table S2, and Figs. 2, 3, S2-5.

Written 2023-11-11 by W. Sauthoff (wsauthoff.github.io)

# Set up computing environment

In [None]:
# Import libraries
import datetime
import fiona
import functools
import geopandas as gpd
import glob
import holoviews as hv
hv.extension('bokeh')
import hvplot.pandas
from IPython.display import clear_output
import math
from math import radians
import matplotlib
from matplotlib.cm import ScalarMappable
from matplotlib.collections import LineCollection
import matplotlib.colors as mcolors
from matplotlib.colors import Normalize
from matplotlib.colors import ListedColormap
import matplotlib.dates as mdates
from matplotlib.gridspec import GridSpec
from matplotlib.legend_handler import HandlerPatch, HandlerTuple
from matplotlib.lines import Line2D
import matplotlib.patches as mpatches
from matplotlib.patches import Rectangle
import matplotlib.patheffects as PathEffects
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import os
import pandas as pd
from pathlib import Path
from pyproj import CRS, Geod, Transformer
import rioxarray
from scipy import odr
from scipy.stats import percentileofscore
from shapely.geometry import MultiPolygon, Point, Polygon
from shapely.ops import transform, unary_union
import xarray as xr

# Magic functions
%matplotlib widget

# Define data directories dependent on home environment
# Replace with your directory file paths
if os.getenv('HOME') == '/home/jovyan':
    DATA_DIR = '/home/jovyan/data'
    OUTPUT_DIR = '/home/jovyan/1_evolving_lakes/output/Figs23_S23_lake_reexamination_results'
    os.makedirs(OUTPUT_DIR, exist_ok=True)

# Define constants and coordinate transforms for the geodesic area calculation
CRS_LL = "EPSG:4326" # wgs84 in lon,lat
GEOD = CRS(CRS_LL).get_geod() # geod object for calculating geodesic area on defined ellipsoid
CRS_XY = "EPSG:3031" # Antarctic Polar Stereographic in x, y
XY_TO_LL = Transformer.from_crs(CRS_XY, CRS_LL, always_xy = True) # make coord transformer
geod = Geod(ellps="WGS84") # Create a Geod object for calculating area on the WGS84 ellipsoid

# Define utility functions
def ll2ps(lon, lat):
    """
    Transform coordinates from geodetic coordinates (lon, lat)
    to Antarctic Polar Stereograph coordinates (x, y)
    x, y = ll2ps(lon, lat)
    """
    crs_ll = CRS("EPSG:4326")
    crs_xy = CRS("EPSG:3031")
    ll_to_xy = Transformer.from_crs(crs_ll, crs_xy, always_xy = True)
    x, y = ll_to_xy.transform(lon, lat)
    return x, y

# Functions

In [None]:
def quantify_lake_extensions(lakes_gdf, evolving_outlines_union_gdf, area_threshold=0.25):
    """
    Quantify lakes with extensions beyond their original stationary outlines using geodesic area calculations.
    
    Parameters:
    -----------
    lakes_gdf : GeoDataFrame
        GeoDataFrame containing stationary lake outlines with 'name' column
    evolving_outlines_union_gdf : GeoDataFrame
        GeoDataFrame containing evolving union outlines with 'name' column
    area_threshold : float, default=0.05
        Minimum fraction of area increase to consider as a reportable extension
        (e.g., 0.05 means 5% area increase)
    
    Returns:
    --------
    tuple
        (lakes_with_extensions, extension_results)
        - lakes_with_extensions: count of lakes with reportable extensions
        - extension_results: DataFrame with detailed extension metrics for each lake
    """
    # Define the geodesic object for Earth calculations
    geod = Geod(ellps="WGS84")
    
    # Create a transformer to convert from EPSG:3031 to EPSG:4326
    project = Transformer.from_crs("EPSG:3031", "EPSG:4326", always_xy=True).transform

    def transform_to_4326(geometry):
        '''
        Transform geometry from EPSG:3031 to EPSG:4326
        '''
        if geometry is None or not geometry.is_valid:
            return None
        
        try:
            # Use functools.partial to create a function that can be used with shapely's transform
            project_func = functools.partial(project)
            transformed_geom = transform(project_func, geometry)
            return transformed_geom
        except Exception as e:
            print(f"Error transforming geometry: {e}")
            return None

    def calculate_geodesic_area_and_perimeter(geometry):
        '''
        Calculate geodesic area and perimeter of a polygon or multipolygon.
        First transforms geometry from EPSG:3031 to EPSG:4326, then performs calculations.
        '''
        
        # Ensure geometry exists and is valid
        if geometry is None or not geometry.is_valid:
            return None, None
        
        # Transform geometry to EPSG:4326
        geom_4326 = transform_to_4326(geometry)
        if geom_4326 is None:
            return None, None
            
        if isinstance(geom_4326, Polygon):
            # Calculate area and perimeter for a single polygon
            area, perimeter = geod.polygon_area_perimeter(geom_4326.exterior.coords.xy[0], 
                                                          geom_4326.exterior.coords.xy[1])
            # Subtract areas of holes if any exist
            for interior in geom_4326.interiors:
                hole_area, _ = geod.polygon_area_perimeter(interior.coords.xy[0], 
                                                           interior.coords.xy[1])
                area -= hole_area
                
            return abs(area), abs(perimeter)
            
        elif isinstance(geom_4326, MultiPolygon):
            # Calculate combined area and perimeter for multipolygons
            total_area = 0
            total_perimeter = 0
            for part in geom_4326.geoms:
                # Add the part's area
                part_area, part_perimeter = geod.polygon_area_perimeter(part.exterior.coords.xy[0], 
                                                                        part.exterior.coords.xy[1])
                total_area += abs(part_area)
                total_perimeter += abs(part_perimeter)
                
                # Subtract areas of holes if any exist
                for interior in part.interiors:
                    hole_area, _ = geod.polygon_area_perimeter(interior.coords.xy[0], 
                                                              interior.coords.xy[1])
                    total_area -= abs(hole_area)
            
            return total_area, total_perimeter
        else:
            return None, None
            
    results = []
    
    # Process each lake
    for lake_name in evolving_outlines_union_gdf['name'].unique():
        try:
            # Get stationary outline
            stationary = lakes_gdf[lakes_gdf['name'] == lake_name]['geometry'].iloc[0]
            
            # Get evolving union outline
            evolving = evolving_outlines_union_gdf[
                evolving_outlines_union_gdf['name'] == lake_name]['geometry'].iloc[0]
            
            # Skip if either geometry is missing
            if stationary is None or evolving is None:
                continue
                
            # Calculate geodesic areas
            stationary_area, _ = calculate_geodesic_area_and_perimeter(stationary)
            evolving_area, _ = calculate_geodesic_area_and_perimeter(evolving)
            
            # Calculate extension area (area in evolving that's not in stationary)
            if evolving.contains(stationary):
                # Simple case: evolving completely contains stationary
                extension = evolving.difference(stationary)
            else:
                # More complex case: find areas in evolving that aren't in stationary
                extension = evolving.difference(stationary)
            
            # Get geodesic area of the extension
            extension_area, _ = calculate_geodesic_area_and_perimeter(extension)
            
            # If any area calculation failed, skip this lake
            if stationary_area is None or evolving_area is None or extension_area is None:
                print(f"Warning: Could not calculate geodesic area for {lake_name}, skipping.")
                continue
            
            # Calculate relative extension (as percentage of original area)
            relative_extension = extension_area / stationary_area if stationary_area > 0 else 0
            
            # Calculate metrics to determine if this is a reportable extension
            has_reportable_extension = relative_extension >= area_threshold
            
            results.append({
                'lake_name': lake_name,
                'stationary_area': stationary_area,
                'evolving_area': evolving_area,
                'extension_area': extension_area,
                'relative_extension': relative_extension,
                'has_reportable_extension': has_reportable_extension
            })

            # Clear output
            clear_output(wait=True)
            
        except Exception as e:
            print(f"Error processing {lake_name}: {e}")
            continue
    
    # Create DataFrame with results
    extension_df = pd.DataFrame(results)
    
    # Count lakes with extensions
    lakes_with_extensions = extension_df['has_reportable_extension'].sum()
    
    return lakes_with_extensions, extension_df

def generate_extension_summary(extension_df, km2=True):
    """
    Generate a summary report about lake extensions.
    
    Parameters:
    -----------
    extension_df : DataFrame
        DataFrame with lake extension results from quantify_lake_extensions
    km2 : bool, default=True
        If True, displays area in square kilometers, otherwise in square meters
        
    Returns:
    --------
    str
        Summary text with key findings
    """
    total_lakes = len(extension_df)
    lakes_with_extensions = extension_df['has_reportable_extension'].sum()
    
    # Calculate average extension for lakes that have extensions
    extended_lakes = extension_df[extension_df['has_reportable_extension']]
    avg_extension = extended_lakes['relative_extension'].mean() if len(extended_lakes) > 0 else 0
    
    # Calculate total area of extensions
    total_extension_area = extended_lakes['extension_area'].sum() if len(extended_lakes) > 0 else 0
    area_unit = "km²" if km2 else "m²"
    area_divisor = 1_000_000 if km2 else 1  # Convert to km² if requested
    
    # Find lake with maximum extension
    if len(extended_lakes) > 0:
        max_extension_lake = extended_lakes.loc[extended_lakes['relative_extension'].idxmax()]
        max_extension_pct = max_extension_lake['relative_extension'] * 100
        max_lake_name = max_extension_lake['lake_name']
        
        # Find lake with largest absolute extension area
        max_area_lake = extended_lakes.loc[extended_lakes['extension_area'].idxmax()]
        max_area_value = max_area_lake['extension_area'] / area_divisor
        max_area_lake_name = max_area_lake['lake_name']
    else:
        max_extension_pct = 0
        max_lake_name = "None"
        max_area_value = 0
        max_area_lake_name = "None"
    
    # Generate summary
    summary = f"""Lake Extension Analysis Summary:
    ---------------------------------
    We found {lakes_with_extensions} lakes ({lakes_with_extensions/total_lakes*100:.1f}% of {total_lakes} analyzed) 
    with previously unidentified lake extensions beyond their original stationary outlines.
    
    For lakes with extensions:
    - Total extension area: {total_extension_area/area_divisor:.2f} {area_unit}
    - Average extension: {avg_extension*100-100:.1f}% beyond original outline
    - Largest relative extension: {max_extension_pct:.1f}% beyond original outline (Lake {max_lake_name})
    - Largest absolute extension: {max_area_value:.2f} {area_unit} (Lake {max_area_lake_name})
    """
    
    return summary

# Example usage:
# lakes_with_extensions, extension_results = quantify_lake_extensions(
#     lakes_gdf, 
#     evolving_outlines_union_gdf, 
# )
# summary = generate_extension_summary(extension_results, km2=True)
# print(summary)

In [None]:
def muliple_area_buffer(polygon, area_multiple, precision=100):
    """
    This function takes a polygon and returns a polygon with a buffer such that the area of the buffered polygon
    is approximately the specified multiple the area of the original polygon.

    :param polygon: Shapely Polygon object
    :param area_multiple: the multiple of the original polygon area you wish the buffered polygon to be
    :param precision: Precision for the iterative process to find the buffer distance
    :return: Buffered Polygon

    # Example usage
    # Define a simple square polygon
    square = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
    # Apply the function to find the buffered polygon area and bounds
    buffered_poly = muliple_area_buffer(square, 2)
    """
    original_area = polygon.area
    target_area = area_multiple * original_area
    buffer_distance = 0
    buffered_polygon = polygon

    while True:
        buffered_polygon = polygon.buffer(buffer_distance)
        if buffered_polygon.area >= target_area:
            break
        buffer_distance += precision
    
    # Convert to geodataframe
    buffered_polygon_gdf = gpd.GeoDataFrame({'geometry': [buffered_polygon]})

    return buffered_polygon

In [None]:
os.makedirs(OUTPUT_DIR + '/lake_group_dV_plots', exist_ok=True)

def plot_lake_groups_dV(lake_groups):
    """
    Create multi-panel plots for groups of lakes showing spatial overview and volume changes.
    Lakes are arranged in rows of three plots of equal size, with valid data checking.
    
    Parameters:
    -----------
    lake_groups : list of tuples
        Each tuple contains (group_name, lake_list) where:
        - group_name: str, name of the lake group for file naming and identification
        - lake_list: list of str, names of lakes to be analyzed together
    """
    
    for group_idx, (group_name, lake_list) in enumerate(lake_groups):
        print(f"\nProcessing lake group: {group_name}")
        
        # Lists to store valid lake data
        valid_lakes = []
        evolving_outlines_gdfs = []
        evolving_geom_calcs_dfs = []
        stationary_geom_calcs_dfs = []
        evolving_union_geom_calcs_dfs = []
        lake_gdfs = []
        
        # First pass: collect all valid lake data
        for lake_name in lake_list:
            print(f"Checking data for {lake_name}...")
            
            # Get lake data from stationary outlines
            lake_gdf = stationary_outlines_gdf[stationary_outlines_gdf['name'] == lake_name]
            if lake_gdf.empty:
                print(f"Skipping {lake_name}: not found in stationary outlines")
                continue
                
            # Try loading evolving outlines gdf
            try:
                evolving_outlines_gdf = gpd.read_file(os.path.join(
                    'output/lake_outlines/evolving_outlines',
                    f'{lake_name}.geojson'))
            except Exception as e:
                print(f"  Skipping {lake_name}: no evolving outlines file")
                continue

            # Attempt to open the geometric calculations CSV files
            try:
                evolving_geom_calcs_df = pd.read_csv('output/geometric_calcs/evolving_outlines_geom_calc/forward_fill/{}.csv'.format(lake_name))
                evolving_union_geom_calcs_df = pd.read_csv('output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes/{}.csv'.format(lake_name))
                stationary_geom_calcs_df = pd.read_csv('output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/{}.csv'.format(lake_name))
            except FileNotFoundError:
                print(f"At least one of the geometric calculations CSV files for {lake_name} not found. Skipping...")
                continue

            # Convert strings to datetime
            evolving_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_geom_calcs_df['mid_pt_datetime'])
            evolving_union_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_union_geom_calcs_df['mid_pt_datetime'])
            stationary_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(stationary_geom_calcs_df['mid_pt_datetime'])

            # If we got here, all data is valid
            print(f"Valid data found for {lake_name}")
            valid_lakes.append(lake_name)
            lake_gdfs.append(lake_gdf)
            evolving_outlines_gdfs.append(evolving_outlines_gdf)
            evolving_geom_calcs_dfs.append(evolving_geom_calcs_df)
            stationary_geom_calcs_dfs.append(stationary_geom_calcs_df)
            evolving_union_geom_calcs_dfs.append(evolving_union_geom_calcs_df)
        
        # Skip this group if no valid lakes found
        if not valid_lakes:
            print(f"Skipping group {group_name}: no valid lakes found")
            continue
            
        print(f"\nCreating plots for valid lakes in group {group_name}: {valid_lakes}")

        # Calculate plot layout (including space for combined plot)
        n_lakes = len(valid_lakes)
        n_plots = n_lakes + 1  # Add 1 for the combined plot
        n_rows = (n_plots + 2) // 3  # Integer division rounded up
        
        # Create figure
        fig = plt.figure(figsize=(15, 5*n_rows + 3))
        gs = fig.add_gridspec(n_rows + 1, 3, height_ratios=[1] + [1]*n_rows)
        
        # Main spatial overview panel
        ax_main = fig.add_subplot(gs[0, :])
        
        # Get combined extent for all valid lakes
        x_mins, x_maxs, y_mins, y_maxs = [], [], [], []
        
        for lake_gdf, evolving_outlines_gdf in zip(lake_gdfs, evolving_outlines_gdfs):
            # Find evolving and stationary outlines union for plotting extent
            lake_name = lake_gdf['name'].iloc[0]
            evolving_stationary_union_gdf = gpd.GeoDataFrame(
                geometry=[lake_gdf.geometry.iloc[0].union(evolving_outlines_gdf.geometry.union_all())],
                crs=lake_gdf.crs)

            # Get extent
            x_min, y_min, x_max, y_max = evolving_stationary_union_gdf['geometry'].bounds.iloc[0]
            buffer_dist = max(x_max - x_min, y_max - y_min) * 0.05
            x_mins.append(x_min - buffer_dist)
            x_maxs.append(x_max + buffer_dist)
            y_mins.append(y_min - buffer_dist)
            y_maxs.append(y_max + buffer_dist)
        
        # Set plot extent
        x_min, x_max = min(x_mins), max(x_maxs)
        y_min, y_max = min(y_mins), max(y_maxs)
        
        # Plot MOA background
        mask_x = (moa_highres_da.x >= x_min) & (moa_highres_da.x <= x_max)
        mask_y = (moa_highres_da.y >= y_min) & (moa_highres_da.y <= y_max)
        moa_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
        ax_main.imshow(moa_subset[0,:,:], cmap='gray', clim=[14000, 17000],
                      extent=[x_min, x_max, y_min, y_max])
        
        # Plot stationary outlines
        stationary_color = 'darkturquoise'
        for lake_gdf in lake_gdfs:
            lake_gdf.boundary.plot(ax=ax_main, color=stationary_color, linewidth=2)

        # Plot evolving outlines union
        for lake_gdf in lake_gdfs:
            lake_name = lake_gdf['name'].iloc[0]
            evolving_union_gdf = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == lake_name]
            evolving_union_gdf.boundary.plot(ax=ax_main, color='k', linestyle='dotted', linewidth=2)
        
        # Plot evolving outlines with time-based coloring
        cmap = plt.get_cmap('plasma')
        norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                           mdates.date2num(cyc_start_datetimes[-1]))
        
        for evolving_outlines_gdf in evolving_outlines_gdfs:
            for idx, row in evolving_outlines_gdf.iterrows():
                color = cmap(norm(mdates.date2num(pd.to_datetime(row['mid_pt_datetime']))))
                gpd.GeoSeries(row['geometry']).boundary.plot(
                    ax=ax_main, color=color, linewidth=1)

        # Format overview axes
        km_scale = 1e3
        ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
        ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
        ax_main.xaxis.set_major_formatter(ticks_x)
        ax_main.yaxis.set_major_formatter(ticks_y)
        ax_main.set_xlabel('x [km]')
        ax_main.set_ylabel('y [km]')

        # Set up colormap
        min_date = pd.to_datetime(cyc_start_datetimes[1])
        max_date = pd.to_datetime(cyc_start_datetimes[-1])
        date_range = pd.date_range(min_date, max_date, periods=len(cyc_start_datetimes[1:]))
        years = date_range.year.unique()
        years = pd.to_datetime(years, format='%Y')
        n_dates = len(cyc_start_datetimes[1:])
        cmap = plt.get_cmap('plasma', n_dates)
        norm = plt.Normalize(mdates.date2num(min_date), mdates.date2num(max_date))
        m = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        m.set_array(np.linspace(mdates.date2num(min_date), mdates.date2num(max_date), n_dates))
        
        # Add colorbar
        divider = make_axes_locatable(ax_main)
        cax = divider.append_axes('bottom', size='2.5%', pad=0.5)
        cbar = fig.colorbar(m, cax=cax, orientation='horizontal')

        # Set colorbar ticks
        cbar.ax.xaxis.set_major_formatter(year_interval_formatter(interval=4))
        cbar.ax.xaxis.set_major_locator(mdates.YearLocator())  # Every year
        cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))  # Quarter year ticks
        cbar.set_label('Year')

        # # Get y axis limits for volume plots
        y_min, y_max = get_overall_y_limits(evolving_geom_calcs_dfs, 
                                          stationary_geom_calcs_dfs,
                                          evolving_union_geom_calcs_dfs)        
        # Calculate limits with buffer
        y_range = y_max - y_min
        buffer = y_range * 0.05
        y_limits = (y_min - buffer, y_max + buffer)
        
        # Create axes for all plots
        axes = []
        for idx in range(n_plots):
            row = (idx // 3) + 1
            col = idx % 3
            ax = fig.add_subplot(gs[row, col])
            axes.append(ax)
        
        # Plot individual lakes
        for idx, (lake_name, evolving_df, stationary_df, union_df) in enumerate(zip(
                valid_lakes, evolving_geom_calcs_dfs, stationary_geom_calcs_dfs, evolving_union_geom_calcs_dfs)):
            ax = axes[idx]
            ax.axhline(0, color='k', linestyle='--')
            
            dates = mdates.date2num(evolving_df['mid_pt_datetime'])
            
            # Plot stationary outline
            stationary_cumsum = np.cumsum(np.divide(stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
            ax.plot(dates, stationary_cumsum, color=stationary_color, label='Stationary', linewidth=2)
            ax.scatter(dates, stationary_cumsum, color=stationary_color, s=5)

            # Plot evolving outlines union
            union_cumsum = np.cumsum(np.divide(union_df['stationary_outline_dV_corr (m^3)'], 1e9))
            ax.plot(dates, union_cumsum, color='k', linestyle='dotted', label='Union', linewidth=2)
            ax.scatter(dates, union_cumsum, color='k', s=5)

            # Store line segments for multi-colored line in legend
            lines = []
            for i, dt in enumerate(dates):
                line = ax.plot(1, 1, color=cmap(norm(mdates.date2num(cyc_start_datetimes[i]))), linewidth=2)[0]
                lines.append(line)
                line.remove()  # Remove the dummy lines after creating them

            # Store line segments for multi-colored line in legend
            onlake_lines = []
            for i, dt in enumerate(dates):
                x, y = 1, 1
                onlake_line, = ax.plot(x, y, color=cmap(norm(mdates.date2num(cyc_start_datetimes[i]))), linewidth=2)
                onlake_lines.append(onlake_line)

            # Plot evolving outlines (multi-colored line)
            x = dates
            y = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'], 1e9))
            points = np.array([x, y]).T.reshape(-1, 1, 2)
            segments = np.concatenate([points[:-1], points[1:]], axis=1)
            lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
            lc.set_array(x)
            lc.set_linewidth(2)
            ax.add_collection(lc)
            ax.scatter(x, y, c=x, cmap=cmap, norm=norm, s=9)

            # Plot bias
            bias = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'] - 
                                     stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
            ax.plot(dates, bias, color='r', label='Bias', linewidth=2)
            ax.scatter(dates, bias, color='r', linewidth=2, s=5)

            # Add legend only to the first plot
            if idx == 0:
                stationary_line = plt.Line2D([], [], color=stationary_color, linestyle='solid', linewidth=2)
                evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)
                bias_line = plt.Line2D([], [], color='red', linestyle='solid', linewidth=2)
                legend = ax.legend(
                    [stationary_line,
                     tuple(lines), 
                     evolving_union_line,
                     bias_line],
                    ['stationary outline',
                     'evolving outlines',
                     'updated stationary outline',
                     'bias (evolving − stationary)'],
                    handlelength=3,
                    handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
                    fontsize=12,
                    loc='upper center')

            # Format axes
            ax.xaxis.set_major_formatter(year_interval_formatter())
            ax.xaxis.set_major_locator(mdates.YearLocator())
            ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))
            ax.set_xlabel('Year')
            ax.set_title(lake_name)

            # Set x and y axes limit
            ax.set_xlim(cyc_start_datetimes[0], cyc_end_datetimes[-1])
            ax.set_ylim(y_min, y_max)    

            # Handle y-axis labels and ticks
            if idx % 3 == 0:  # Leftmost column
                ax.set_ylabel('cumulative dV [km$^3$]')
            else:  # Middle and right columns
                ax.set_yticklabels([])
            
            # Handle x-axis labels
            # Calculate if this is the last plot in its column
            current_row = (idx // 3) + 1
            current_col = idx % 3
            is_last_in_column = True
            for next_idx in range(idx + 1, n_plots):
                if next_idx % 3 == current_col:  # Same column
                    is_last_in_column = False
                    break
            
            if not is_last_in_column:
                ax.set_xticklabels([])
                ax.set_xlabel('')
            else:
                ax.set_xlabel('Year')
            
            ax.set_title(lake_name)
        
        # Format last plot (combined data)
        last_ax = axes[-1]
        last_col = (n_plots - 1) % 3

        # Plot combined data
        # Combine all dataframes by summing values for each timestamp
        combined_evolving = pd.concat(evolving_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        combined_stationary = pd.concat(stationary_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        combined_union = pd.concat(evolving_union_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        
        dates = mdates.date2num(combined_evolving['mid_pt_datetime'])
        
        # Plot stationary outline
        stationary_cumsum = np.cumsum(np.divide(combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
        last_ax.plot(dates, stationary_cumsum, color=stationary_color, label='Stationary', linewidth=2)
        last_ax.scatter(dates, stationary_cumsum, color=stationary_color, s=5)

        # Plot evolving outlines union
        union_cumsum = np.cumsum(np.divide(combined_union['stationary_outline_dV_corr (m^3)'], 1e9))
        last_ax.plot(dates, union_cumsum, color='k', linestyle='dotted', label='Union', linewidth=2)
        last_ax.scatter(dates, union_cumsum, color='k', s=5)

        # Plot evolving outlines (multi-colored line)
        evolving_cumsum = np.cumsum(np.divide(combined_evolving['evolving_outlines_dV_corr (m^3)'], 1e9))
        points = np.array([dates, evolving_cumsum]).T.reshape(-1, 1, 2)
        segments = np.concatenate([points[:-1], points[1:]], axis=1)
        lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
        lc.set_array(dates)
        lc.set_linewidth(2)
        last_ax.add_collection(lc)
        last_ax.scatter(dates, evolving_cumsum, c=dates, cmap=cmap, norm=norm, s=9)

        # Plot bias
        bias_cumsum = np.cumsum(np.divide(
            combined_evolving['evolving_outlines_dV_corr (m^3)'] - 
            combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
        last_ax.plot(dates, bias_cumsum, color='r', label='Bias', linewidth=2)
        last_ax.scatter(dates, bias_cumsum, color='r', s=5)

        # Set axes limits for combined plot
        last_ax.set_xlim(cyc_start_datetimes[0], cyc_end_datetimes[-1])
        last_ax.set_ylim(y_min, y_max)
        last_ax.axhline(0, color='k', linestyle='--')
        
        # Set y-axis formatting for combined plot
        if last_col == 0:  # Leftmost column
            last_ax.set_ylabel('Cumulative dV [km$^3$]')
        else:
            last_ax.set_yticklabels([])
        
        # Always show x-axis labels for the combined plot as it's the last one
        last_ax.set_xlabel('Year')
        last_ax.xaxis.set_major_formatter(year_interval_formatter())
        last_ax.xaxis.set_major_locator(mdates.YearLocator())
        last_ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))
        last_ax.set_title('Integrated')

        # Save the figure using the group name
        sanitized_group_name = group_name.replace(' ', '_').replace('/', '_')
        plt.savefig(f'{OUTPUT_DIR}/lake_group_dV_plots/{sanitized_group_name}.jpg', 
                   dpi=300, bbox_inches='tight')
        plt.close()

        # Clear output
        clear_output(wait=True)

def get_overall_y_limits(evolving_geom_calcs_dfs, stationary_geom_calcs_dfs, evolving_union_geom_calcs_dfs):
    """
    Calculate overall y-axis limits for all lake volume plots based on three types of geometric calculations.
    
    Parameters:
    -----------
    evolving_geom_calcs_dfs : list of pandas.DataFrame
        List of dataframes containing evolving outline calculations
    stationary_geom_calcs_dfs : list of pandas.DataFrame
        List of dataframes containing stationary outline calculations
    evolving_union_geom_calcs_dfs : list of pandas.DataFrame
        List of dataframes containing evolving union calculations
        
    Returns:
    --------
    tuple : (y_min, y_max)
        The minimum and maximum y-axis values with a 5% buffer
    """
    all_y_values = []
    
    # Process each lake's data
    for evolving_df, stationary_df, union_df in zip(evolving_geom_calcs_dfs, 
                                                   stationary_geom_calcs_dfs,
                                                   evolving_union_geom_calcs_dfs):
        # Calculate cumulative values for all time series
        stationary_cumsum = np.cumsum(np.divide(stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
        evolving_cumsum = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'], 1e9))
        union_cumsum = np.cumsum(np.divide(union_df['stationary_outline_dV_corr (m^3)'], 1e9))
        bias_cumsum = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'] - 
                                        stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
        
        # Extend list with all values
        all_y_values.extend(stationary_cumsum)
        all_y_values.extend(evolving_cumsum)
        all_y_values.extend(union_cumsum)
        all_y_values.extend(bias_cumsum)
    
    # Also include the combined plot values if there are any lakes
    if evolving_geom_calcs_dfs:
        # Combine all dataframes by summing values for each timestamp
        combined_evolving = pd.concat(evolving_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        combined_stationary = pd.concat(stationary_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        combined_union = pd.concat(evolving_union_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        
        # Calculate cumulative sums for combined data
        stationary_cumsum = np.cumsum(np.divide(combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
        evolving_cumsum = np.cumsum(np.divide(combined_evolving['evolving_outlines_dV_corr (m^3)'], 1e9))
        union_cumsum = np.cumsum(np.divide(combined_union['stationary_outline_dV_corr (m^3)'], 1e9))
        bias_cumsum = np.cumsum(np.divide(combined_evolving['evolving_outlines_dV_corr (m^3)'] - 
                                        combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
        
        all_y_values.extend(stationary_cumsum)
        all_y_values.extend(evolving_cumsum)
        all_y_values.extend(union_cumsum)
        all_y_values.extend(bias_cumsum)
    
    # Calculate limits with a small buffer (5% of range)
    y_min = min(all_y_values)
    y_max = max(all_y_values)
    y_range = y_max - y_min
    buffer = y_range * 0.05
    
    return y_min - buffer, y_max + buffer

def year_interval_formatter(interval=2, start_year=2012):
    '''
    Create custom formatter that labels years at specified intervals
    
    Parameters:
    -----------
    interval : int, default=2
        Interval between labeled years (e.g., 2 for every 2 years, 4 for every 4 years)
    start_year : int, optional
        Starting year for the interval. If None, uses modulo arithmetic.
        If provided, labels years that are start_year + n*interval
    
    Returns:
    --------
    function : formatter function for matplotlib
    
    Examples:
    ---------
    # Every 2 years (even years): 2012, 2014, 2016, 2018, 2020, 2022, 2024
    formatter = year_interval_formatter(interval=2)
    
    # Every 4 years starting from 2012: 2012, 2016, 2020, 2024
    formatter = year_interval_formatter(interval=4, start_year=2012)
    
    # Every 4 years using modulo (years divisible by 4): 2012, 2016, 2020, 2024
    formatter = year_interval_formatter(interval=4)

    
    # Usage examples:
    # For specific case (2012, 2016, 2020, 2024):
    formatter = year_interval_formatter(interval=4, start_year=2012)
    
    # For every 4 years using modulo:
    formatter = year_interval_formatter(interval=4)
    
    # For every 2 years using modulo:
    formatter = year_interval_formatter(interval=2)
    '''
    def formatter_func(x, pos):
        date = mdates.num2date(x)
        year = date.year
        
        if start_year is not None:
            # Use specific starting year and interval
            if (year - start_year) % interval == 0 and year >= start_year:
                return date.strftime('%Y')
        else:
            # Use modulo arithmetic
            if year % interval == 0:
                return date.strftime('%Y')
        
        return ''
    
    return formatter_func

In [None]:
def filter_gdf_by_folder_contents(gdf, folder_path, exclude=True, prefix=None, suffix=None, suffix_pattern=None, file_extension=None):
    '''
    Filter the GeoDataFrame based on processed lake names from the folder contents.
    
    Args:
    gdf: GeoDataFrame to be filtered.
    folder_path: Path to the directory containing files and/or subdirectories.
    exclude: If True, excludes gdf rows where the 'name' is in the folder_path directories or files.
             If False, includes only gdf rows where the 'name' is in the folder_path directories or files.
    prefix: Optional string to remove from the beginning of filenames.
    suffix: Optional string to remove from the end of filenames.
    suffix_pattern: Optional regex pattern to remove from the end of filenames.
    file_extension: Optional string specifying the file extension to filter (e.g., 'png', 'txt').
    
    Returns:
    GeoDataFrame filtered based on the presence of 'name' in folder_path.

    # Example usage:
    remaining_lakes = filter_gdf_by_folder_contents(
        stationary_outlines_gdf, 
        folder_path,
        # prefix='plot_evolving_outlines_time_series_', 
        suffix_pattern=r'\d+\.\d+m-level_\d+x-with',
        file_extension='txt'
    )
    '''
    # Return empty GeoDataFrame if input is empty
    if gdf is None or gdf.empty:
        return gdf

    def process_name(name):
        '''Helper function to remove prefix and suffix from a name'''
        processed_name = name
        
        # First strip the file extension if it exists
        processed_name = os.path.splitext(processed_name)[0]
        
        if prefix and processed_name.startswith(prefix):
            processed_name = processed_name[len(prefix):]
            
        if suffix_pattern:
            processed_name = re.sub(suffix_pattern + '$', '', processed_name)
        elif suffix and processed_name.endswith(suffix):
            processed_name = processed_name[:-len(suffix)]
            
        return processed_name.lower().strip()
    
    # Get all files and filter by extension if specified
    all_files = os.listdir(folder_path)
    if file_extension:
        clean_extension = file_extension.lstrip('.')
        all_files = [f for f in all_files if f.lower().endswith(f'.{clean_extension.lower()}')]
    
    # Process filenames to get lake names
    names_in_folder = {
        process_name(name)
        for name in all_files
    }
    
    # Filter without adding and then dropping a new column
    gdf_filtered = gdf[gdf['name'].str.lower().str.strip().apply(
        lambda x: (x not in names_in_folder) if exclude else (x in names_in_folder)
    )]
    
    return gdf_filtered.reset_index(drop=True)

In [None]:
def count_files_by_suffix(directory_path, suffix=".csv"):
    """
    Counts the number of files with a given suffix in a directory.

    Args:
        directory_path (str): The path to the directory to search.
        suffix (str): The file suffix to match (e.g., ".csv", ".txt"). Defaults to ".csv".

    Returns:
        int: The number of matching files found in the directory.
    """
    if not os.path.isdir(directory_path):
        print(f"Error: Directory '{directory_path}' does not exist.")
        return 0

    return sum(1 for filename in os.listdir(directory_path) if filename.endswith(suffix))

In [None]:
def find_csvs_with_values_in_range(directory, col="evolving_outlines_area (m^2)",
                                   date_col="mid_pt_datetime",
                                   start="2019-01-01 00:00:00",
                                   end="2021-07-02 09:00:00"):
    """
    Returns a DataFrame of CSV files in a directory that contain at least one row
    where `col` is not null and `date_col` is within the given date range.
    The DataFrame has one column: 'name' (the filename without extension).
    """
    start = pd.to_datetime(start)
    end = pd.to_datetime(end)

    matching_files = []

    for fname in os.listdir(directory):
        if fname.endswith(".csv"):
            fpath = os.path.join(directory, fname)
            try:
                df = pd.read_csv(fpath, parse_dates=[date_col])
            except Exception as e:
                print(f"Skipping {fname}, error: {e}")
                continue

            if col not in df.columns or date_col not in df.columns:
                continue

            mask = (df[date_col].between(start, end)) & (df[col].notna())
            if mask.any():
                matching_files.append(os.path.splitext(fname)[0])  # remove .csv

    return pd.DataFrame(matching_files, columns=["name"])

# Import datasets

In [None]:
# Import subglacial lake outlines 
stationary_outlines_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/stationary_outlines_gdf.geojson')
reexamined_stationary_outlines_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/reexamined_stationary_outlines_gdf.geojson')
evolving_outlines_union_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/evolving_outlines_union_gdf.geojson')

In [None]:
stationary_outlines_gdf[stationary_outlines_gdf['name'] == 'L1']

In [None]:
stationary_outlines_gdf[stationary_outlines_gdf['cite'].str.contains('Arthur')]

In [None]:
# Import CryoSat-2 SARIn mode mask
# See 0_preprocess_data.ipynb for data source and pre-processing steps
gdf_SARIn_3_1 = gpd.read_file('output/CryoSat2_SARIn_mode_masks/gdf_SARIn_3_1.geojson')
gdf_SARIn_3_1_3_6_diff= gpd.read_file('output/CryoSat2_SARIn_mode_masks/gdf_SARIn_3_1_3_6_diff.geojson')

In [None]:
# Import cyc_dates
cyc_dates = pd.read_csv('output/cycle_dates.csv', parse_dates=['cyc_start_datetimes', 'cyc_end_datetimes'])

# Store the cyc_dates columns as a np array with datetime64[ns] data type
cyc_start_datetimes = [np.datetime64(ts) for ts in cyc_dates['cyc_start_datetimes']]
cyc_end_datetimes = [np.datetime64(ts) for ts in cyc_dates['cyc_end_datetimes']]

In [None]:
# Import MODIS Mosaic of Antarctica (MOA) surface imagery
# https://nsidc.org/data/nsidc-0730/versions/1
moa_highres = DATA_DIR + '/moa125_2014_hp1_v01.tif' 
moa_highres_da = rioxarray.open_rasterio(moa_highres)

In [None]:
# MODIS MOA 2014 coastline and grounding line
# https://nsidc.org/data/nsidc-0730/versions/1
shp = DATA_DIR + '/moa2014_coastline_v01.shp' 
moa_2014_coastline = gpd.read_file(shp)
shp = DATA_DIR + '/moa2014_grounding_line_v01.shp' 
moa_2014_groundingline = gpd.read_file(shp)

In [None]:
# Scripps Grounding Line (Depoorter and others, 2013)
# https://doi.pangaea.de/10.1594/PANGAEA.819147
Scripps_gl = gpd.read_file(DATA_DIR + '/boundaries/Depoorter2013/Antarctica_masks/scripps_antarctica_polygons_v1.shp')

# Isolate land ice and ice shelf
Scripps_landice = Scripps_gl[Scripps_gl['Id_text'] == 'Grounded ice or land']

# Results and discussion

In [None]:
# How many previously identified lakes were analyzed?
print(len(stationary_outlines_gdf), 'lakes reanalyzed')
print(len(stationary_outlines_gdf[stationary_outlines_gdf['name'] != 'Crane_Glacier']), 'actually lakes reanalyzed due to lake of data at Crane Glacier')
print(len(reexamined_stationary_outlines_gdf), 
    'lakes analyzed in revised inventory due to Site_B and Site_C being combined into Site_BC')

In [None]:
# How many lakes are missing CryoSat-2 SARIn coverage?
print(stationary_outlines_gdf[stationary_outlines_gdf['CS2_SARIn_start'] == '<NA>'].shape[0])
print(reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['CS2_SARIn_start'] == '<NA>'].shape[0])

In [None]:
# How many lakes have CryoSat-2 SARIn coverage?
print(stationary_outlines_gdf[stationary_outlines_gdf['CS2_SARIn_start'] != '<NA>'].shape[0])
print(reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['CS2_SARIn_start'] != '<NA>'].shape[0])

In [None]:
# How many lakes have CryoSat-2 SARIn coverage from the start of the mission?
print(stationary_outlines_gdf[stationary_outlines_gdf['CS2_SARIn_start'] == '2010.5'].shape[0])
print(reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['CS2_SARIn_start'] == '2010.5'].shape[0])

In [None]:
# How many lakes have CryoSat-2 SARIn coverage starting when LRM/SARIn boundary moved inland?
print(stationary_outlines_gdf[stationary_outlines_gdf['CS2_SARIn_start'] == '2013.75'].shape[0])
print(reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['CS2_SARIn_start'] == '2013.75'].shape[0])

In [None]:
# How many lakes exhibit evolving outlines?
print(len([f for f in os.listdir('output/lake_outlines/evolving_outlines') if f.endswith('.geojson')]))

In [None]:
# How many lakes exhibit evolving outlines?
print(len([f for f in os.listdir('output/lake_outlines/evolving_outlines/forward_fill') if f.endswith('.geojson')]))

In [None]:
# How many lakes exhibit no evolving outlines?
print(len([f for f in os.listdir('output/lake_outlines/evolving_outlines') if f.endswith('.txt')]))

In [None]:
# Confirm count of non-evolving lakes
print(len(stationary_outlines_gdf) - len([f for f in os.listdir('output/lake_outlines/evolving_outlines') if f.endswith('.geojson')]))
print(len(reexamined_stationary_outlines_gdf) - len([f for f in os.listdir('output/lake_outlines/evolving_outlines') if f.endswith('.geojson')]))

In [None]:
# Investigate discrepancy in numbers

# Gather file names (strip extensions so they match "name" column)
non_evolving_lakes = [os.path.splitext(f)[0] for f in os.listdir("output/lake_outlines/evolving_outlines") if f.endswith(".txt")]
evolving_lakes = [os.path.splitext(f)[0] for f in os.listdir("output/lake_outlines/evolving_outlines") if f.endswith(".geojson")]

# Get names from the GeoDataFrames
evolving_lakes_names = set(evolving_lakes)
non_evolving_lakes_names = set(non_evolving_lakes)
prior_stationary_names = set(stationary_outlines_gdf["name"])
reexamined_names = set(reexamined_stationary_outlines_gdf["name"])
updated_stationary_names = set(evolving_outlines_union_gdf["name"])

prior_stationary_evolving_lakes = prior_stationary_names - non_evolving_lakes_names
reexamined_evolving_lakes = reexamined_names - non_evolving_lakes_names
updated_stationary_evolving_lakes = updated_stationary_names - non_evolving_lakes_names

prior_stationary_non_evolving_lakes = prior_stationary_names - evolving_lakes_names
reexamined_non_evolving_lakes = reexamined_names - evolving_lakes_names
updated_stationary_non_evolving_lakes = updated_stationary_names - evolving_lakes_names

# Compare
print("Evolving lakes")
print("In prior stationary gdf but not in reexamined gdf:", prior_stationary_evolving_lakes - reexamined_evolving_lakes)
print("In reexamined gdf but not in prior stationary gdf:", reexamined_evolving_lakes - prior_stationary_evolving_lakes)
print("In prior stationary gdf but not in updated stationary gdf:", prior_stationary_evolving_lakes - updated_stationary_evolving_lakes)
print("In updated stationary gdf but not in prior stationary gdf:", updated_stationary_evolving_lakes - prior_stationary_evolving_lakes)
print("In reexamined gdf but not in updated stationary gdf:", reexamined_evolving_lakes - updated_stationary_evolving_lakes)
print("In updated stationary gdf but not in reexamined gdf:", updated_stationary_evolving_lakes - reexamined_evolving_lakes)

print("\nNon-evolving lakes")
print("In prior stationary gdf but not in reexamined gdf:", prior_stationary_non_evolving_lakes - reexamined_non_evolving_lakes)
print("In reexamined gdf but not in prior stationary gdf:", reexamined_non_evolving_lakes - prior_stationary_non_evolving_lakes)
print("In prior stationary gdf but not in updated stationary gdf:", prior_stationary_non_evolving_lakes - updated_stationary_non_evolving_lakes)
print("In updated stationary gdf but not in prior stationary gdf:", updated_stationary_non_evolving_lakes - prior_stationary_non_evolving_lakes)
print("In reexamined gdf but not in updated stationary gdf:", reexamined_non_evolving_lakes - updated_stationary_non_evolving_lakes)
print("In updated stationary gdf but not in reexamined gdf:", updated_stationary_non_evolving_lakes - reexamined_non_evolving_lakes)

In [None]:
# Lakes with evolving outlines found

# Gather file names (strip extensions)
txt_names = {os.path.splitext(f)[0] for f in os.listdir("output/lake_outlines/evolving_outlines") if f.endswith(".txt")}
evolving_outlines_names = {os.path.splitext(f)[0] for f in os.listdir("output/lake_outlines/evolving_outlines") if f.endswith(".geojson")}
stationary_names = set(stationary_outlines_gdf["name"])
reexamined_names = set(reexamined_stationary_outlines_gdf["name"])
evolving_outlines_union_names = set(evolving_outlines_union_gdf["name"])

# Collect all sets in a dict for easier handling
sources = {
    "stationary_at_evolving_subset": stationary_names - txt_names,
    "reexamined_at_evolving_subset": reexamined_names - txt_names,
    "evolving_union": evolving_outlines_union_names,
    "evolving_outlines": evolving_outlines_names,
}

# Find common names across all sets
common_names = set.intersection(*sources.values())
print(f"Common to all sets ({len(common_names)}):", sorted(common_names))

# Report unique names per set
for label, names in sources.items():
    unique_names = names - common_names
    print(f"\nUnique to {label} ({len(unique_names)}):", sorted(unique_names))


In [None]:
# Lakes without evolving outlines found

# Collect all sets in a dict for easier handling
sources = {
    "stationary_at_evolving_subset": stationary_names - evolving_outlines_names,
    "reexamined_at_evolving_subset": reexamined_names - evolving_outlines_names,
    "txt": txt_names,
}

# Find common names across all sets
common_names = set.intersection(*sources.values())
print(f"Common to all sets ({len(common_names)}):", sorted(common_names))

# Report unique names per set
for label, names in sources.items():
    unique_names = names - common_names
    print(f"\nUnique to {label} ({len(unique_names)}):", sorted(unique_names))


## Evolving outlines

In [None]:
# Analyze lake extensions
lakes_with_extensions, extension_results = quantify_lake_extensions(
    reexamined_stationary_outlines_gdf, 
    evolving_outlines_union_gdf, 
    area_threshold=0.25)

In [None]:
# How many lakes have no extension beyond stationary outline?
len(extension_results[extension_results['relative_extension'] != 0])

In [None]:
summary = generate_extension_summary(extension_results, km2=True)
print(summary)

In [None]:
# View lakes that exceed the area threshold
extension_results[extension_results['relative_extension'] > 0.25].sort_values('relative_extension', ascending=False)

## Active area and carbon export

### Active area

In [None]:
# Read in continental summation geometric calculation csv files - stationary outlines (all analyzed lakes)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

superset_IS2_stationary_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']


# Read in continental summation geometric calculation csv files - stationary outlines (evolving lakes subset)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_evolving_lakes'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_stationary_subset_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']


# Read in continental summation geometric calculation csv files - evolving outlines (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

# Store dataframes from dfs list for code readability
superset_IS2_evolving_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']


# Read in continental summation geometric calculation csv files - evolving union (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_evolving_union_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

In [None]:
# Evolving outlines vs. prior stationary outlines

In [None]:
# Find minimum/maximum lake bed active area derived from evolving outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPreExpansion_evolving_sum_df['evolving_outlines_area (m^2)'].min() / 1e6, 0))
print(np.round(subset_CS2_IS2_SARInPreExpansion_evolving_sum_df['evolving_outlines_area (m^2)'].max() / 1e6, 0))

# Find lake bed active area derived from prior stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 0))

# Find minimum/maximum discrepancy (as a percentage) between lake bed active area derived from evolving outlines 
# compared to prior stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPreExpansion_evolving_sum_df['evolving_outlines_area (m^2)'].min() / 
               subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] * 100, 0), '%')
print(np.round(subset_CS2_IS2_SARInPreExpansion_evolving_sum_df['evolving_outlines_area (m^2)'].max() / 
               subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] * 100, 0), '%')

In [None]:
# Find minimum/maximum lake bed active area derived from evolving outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_area (m^2)'].min() / 1e6, 0))
print(np.round(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_area (m^2)'].max() / 1e6, 0))

# Find lake bed active area derived from prior stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[-1] / 1e6, 0))

# Find minimum discrepancy (as a percentage) between lake bed active area derived from evolving outlines 
# compared to prior stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_area (m^2)'].min() / 
               subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[-1] * 100, 0), '%')
print(np.round(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_area (m^2)'].max() / 
               subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[-1] * 100, 0), '%')

In [None]:
# Find minimum/maximum lake bed active area derived from evolving outlines during ICESat-2 observation period
print(np.round(superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'].min() / 1e6, 0))
print(np.round(superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'].max() / 1e6, 0))

# Find lake bed active area derived from prior stationary outlines during ICESat-2 observation period
print(np.round(superset_IS2_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 0))

# Find minimum/maximum discrepancy (as a percentage) between lake bed active area derived from evolving outlines 
# compared to prior stationary outlines during ICESat-2 observation period
print(np.round(superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'].min() / 
               superset_IS2_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] * 100, 0), '%')
print(np.round(superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'].max() / 
               superset_IS2_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] * 100, 0), '%')

In [None]:
# Updated stationary outlines vs. prior stationary outlines

In [None]:
# Find lake bed active area derived from updated stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 0))

# Find lake bed active area derived from prior stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 0))

# Find discrepancy (as a percentage) between lake bed active area derived from updated stationary outlines 
# compared to prior stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 
               subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] * 100, 0), '%')

In [None]:
# Find lake bed active area derived from updated stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 0))

# Find lake bed active area derived from prior stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[-1] / 1e6, 0))

# Find discrepancy (as a percentage) between lake bed active area derived from updated stationary outlines
# compared to prior stationary outlines during ICESat-2 observation period
print(np.round(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 
               subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[-1] * 100, 0), '%')

In [None]:
# Find lake bed active area derived from prior stationary outlines during ICESat-2 observation period
print(np.round(superset_IS2_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 0))

# Find lake bed active area derived from prior stationary outlines during ICESat-2 observation period
print(np.round(superset_IS2_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 0))

# Find discrepancy (as a percentage) between lake bed active area derived from evolving outlines 
# compared to prior stationary outlines during ICESat-2 observation period
print(np.round(superset_IS2_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 
               superset_IS2_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] * 100, 0), '%')

### Dissolved inorganic carbon (DIC) export estimates

In [None]:
# We use the microbial respiration rate  of 1.4 x 10^4 g C d-1 measured directly in Mercer Subglacial Lake (SLM; Venturelli and others, 2023) 
# as an estimate of dissolved inorganic carbon (DIC) flux from saturated sediment to the subglacial water column via respiration
# doi.org/10.1029/2022AV000846
resp_rate = 1.4 * 10**4  # g C d-1 in SLM
print('respiration rate:', resp_rate, 'g C d-1')

# Estimate DIC export from SLM per CryoSat-2/ICESat-2 satellite repeat cycle (91 days) time step
SLM_DIC_export_per_step = resp_rate * 91
print('DIC export per time step at SLM:', SLM_DIC_export_per_step, 'g C / 91 d')  # g C / 91 d in SLM

In [None]:
# Find the stationary areas
SLM_stationary_area = reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['name'] == 'MercerSubglacialLake']['area (m^2)'].values[0]
SLM_evolving_union_area = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == 'MercerSubglacialLake']['area (m^2)'].values[0]
print('SLM stationary outline area:', np.round(SLM_stationary_area/1e6,1), 'km^2')
print('SLM updated stationary area:', np.round(SLM_evolving_union_area/1e6,1), 'km^2')

In [None]:
# Find the per area DIC export values using stationary outline
SLM_stationary_per_area_DIC_export = resp_rate / SLM_stationary_area
SLM_stationary_per_area_per_step_DIC_export = resp_rate / SLM_stationary_area * 91

print('SLM stationary outline per area DIC export:', np.round(SLM_stationary_per_area_DIC_export,6), 'g C d-1 m-2')
print('SLM stationary outline per area per time step DIC export:', np.round(SLM_stationary_per_area_per_step_DIC_export,6), 'g C step-1 m-2')

In [None]:
# Find the per area per time step DIC export values using evolving union outline
SLM_evolving_union_per_area_DIC_export = resp_rate / SLM_evolving_union_area
SLM_evolving_union_per_area_per_step_DIC_export = resp_rate / SLM_evolving_union_area * 91

print('SLM updated stationary outline per area DIC export:', np.round(SLM_evolving_union_per_area_DIC_export,6), 'g C d-1 m-2')
print('SLM updated stationary outline per area per time step DIC export:', np.round(SLM_evolving_union_per_area_per_step_DIC_export,6), 'g C step-1 m-2')

In [None]:
# Next we estimate the per area respiration using evolving outlines

In [None]:
# Plot dV time series to view filling/draining history

# Load SLM geometries dataframe
SLM_evolving_outlines_geom = pd.read_csv('/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/evolving_outlines_geom_calc/forward_fill/MercerSubglacialLake.csv')
SLM_stationary_outline_geom = pd.read_csv('/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/MercerSubglacialLake.csv')
SLM_updated_stationary_outline_geom = pd.read_csv('/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes/MercerSubglacialLake.csv')

# Convert to datetimes
SLM_evolving_outlines_geom['mid_pt_datetime'] = pd.to_datetime(SLM_evolving_outlines_geom['mid_pt_datetime'])

# Define x and y for plotting
x = SLM_evolving_outlines_geom['mid_pt_datetime']
y = (SLM_evolving_outlines_geom['evolving_outlines_dV_corr (m^3)'] / 1e9)  # km^3
y2 = np.cumsum(SLM_evolving_outlines_geom['evolving_outlines_dV_corr (m^3)'] / 1e9)  # km^3
y3 = (SLM_stationary_outline_geom['stationary_outline_dV_corr (m^3)'] / 1e9)  # km^3
y4 = np.cumsum(SLM_stationary_outline_geom['stationary_outline_dV_corr (m^3)'] / 1e9)  # km^3
y5 = (SLM_updated_stationary_outline_geom['stationary_outline_dV_corr (m^3)'] / 1e9)  # km^3
y6 = np.cumsum(SLM_updated_stationary_outline_geom['stationary_outline_dV_corr (m^3)'] / 1e9)  # km^3

# Create the plot
plt.figure(figsize=(15, 6))  # Set figure size

# Get current axis
ax = plt.gca()

# Define masks for shading
positive_mask = y > 0
negative_mask = y < 0

# +1 = positive, -1 = negative, 0 = zero
sign_series = positive_mask.astype(int) - negative_mask.astype(int)

# Find change points
changes = np.diff(sign_series, prepend=sign_series.iloc[0])
change_points = np.where(changes != 0)[0]
change_points = np.concatenate(([0], change_points, [len(x)]))

# Compute half time step for proper discrete alignment
if len(x) > 1:
    dt = (x.iloc[1] - x.iloc[0]) / 2
else:
    dt = pd.Timedelta(days=0)  # fallback

# Shaded regions
for i in range(len(change_points) - 1):
    start_idx = change_points[i]
    end_idx = change_points[i + 1]

    # Determine color based on start of interval
    val = sign_series.iloc[start_idx]
    if val > 0:
        color = "blue"
    elif val < 0:
        color = "red"
    else:
        color = "gray"

    # Use start_idx-1 to fully cover previous regime for perfect alignment
    plt.axvspan(x.iloc[start_idx-1], x.iloc[end_idx-1],
        alpha=0.2, color=color, zorder=0)
    
# Plot both lines (with fixed colors)
plt.plot(x, y, 'k-', linewidth=2, label='evolving outlines $dV$ ($km^3$)')
plt.plot(x, y2, 'k--', linewidth=2, label='evolving outlines cumulative $dV$ ($km^3$)')
plt.plot(x, y3, 'b-', linewidth=2, label='stationary outline $dV$ ($km^3$)')
plt.plot(x, y4, 'b--', linewidth=2, label='stationary outline cumulative $dV$ ($km^3$)')
plt.plot(x, y5, 'g-', linewidth=2, label='updated stationary outline $dV$ ($km^3$)')
plt.plot(x, y6, 'g--', linewidth=2, label='updated stationary outline cumulative $dV$ ($km^3$)')

# Format the x-axis to show years as major ticks and quarters as minor ticks
# Set major ticks to years
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Format as year only

# Set minor ticks to quarters
ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1, 4, 7, 10]))  # Jan, Apr, Jul, Oct

# Add gridlines for better readability
ax.grid(True, which='major', linestyle='-', alpha=0.7)
ax.grid(True, which='minor', linestyle='--', alpha=0.3)

# Add reference line at y=0
plt.axhline(y=0, color='k', linestyle='-', lw=0.5)

# Add labels, title, and legend
plt.xlabel('Year', fontsize=12)
plt.ylabel('$dV$ ($km^3$)', fontsize=12)
ax.set_title('Mercer Subglacial Lake $dV$ time series')

ax.set_xlim(x.iloc[0], x.iloc[-1])

# Add proxy patches for shaded regions
pos_patch = mpatches.Patch(color='blue', alpha=0.2, label='filling')
neg_patch = mpatches.Patch(color='red', alpha=0.2, label='draining')
zero_patch = mpatches.Patch(color='gray', alpha=0.2, label='zero cumulative $dV$')

# Add legend with both line plots and shaded patches
plt.legend(handles=[pos_patch, neg_patch, zero_patch] + ax.get_legend_handles_labels()[0],
           loc='best', bbox_to_anchor=(1, 0.5, 0.25, 0.25))  # (x, y, width, height)

# Show the plot
plt.tight_layout()
plt.show()

We observed a 3.75 year filling period prior to the 2018-2019 subglacial lake access campaign (2014.5 - 2018.25) cf. 4.5 years reported by Venturelli and others (2023).

In [None]:
# Filter for date range 2014.75 to 2018.25
# Filter for SLM fill period prior to Venturelli et al., 2023 sampling (2014.5 to 2018.25)
start_date = pd.to_datetime('2014-07-01')  # ~2014.5
end_date = pd.to_datetime('2018-04-01')    # ~2018.25
SLM_evolving_outlines_geom_filling_period = SLM_evolving_outlines_geom[(SLM_evolving_outlines_geom['mid_pt_datetime'] >= start_date) & (SLM_evolving_outlines_geom['mid_pt_datetime'] <= end_date)]

In [None]:
# Method 1: Conservation-based integration

# Respiration rate [g C d-1]
resp_rate = resp_rate  

# Duration of each step [days/time step]
time_step_days = 91  

# Total filling period length [days = (years in filling period) * (days/year)]
T_days = 3.75 * 365.25  

# Total carbon respired over 3.75-year filling period [g C]
C_total = resp_rate * T_days 

# Area at each time step [m^2]
areas = SLM_evolving_outlines_geom_filling_period['evolving_outlines_area (m^2)'].values

# Total area–time exposure [m^2 d]
A_time_total = np.sum(areas * time_step_days)

# Conservation-based per-area per-day rate [g C d-1 m-2]
rate_method1 = C_total / A_time_total

print('Method 1:', np.round(rate_method1, 6), 'g C d-1 m-2')

In [None]:
# Method 2 - Mean of per-step rates
rate_method2 = np.average(resp_rate / SLM_evolving_outlines_geom_filling_period['evolving_outlines_area (m^2)'])
print('Method 2:', np.round(rate_method2,6), 'g C d-1 m-2')

In [None]:
# Method 3 - Mean area approach
rate_method3 = resp_rate / np.average(SLM_evolving_outlines_geom_filling_period['evolving_outlines_area (m^2)'])
print('Method 3:', np.round(rate_method3,6), 'g C d-1 m-2')

In [None]:
# Convert per-area respiration rate to per-area, per-time-step respiration rate to be compatible 
# [g C m-2 time step-1 = (g C d-1 m-2) * (days/time step)]
SLM_evolving_per_area_per_step_DIC_export_filling_period = rate_method1 * time_step_days

## Volume change ($dV$)

### Bias

In [None]:
CS2_last_cyc_date = str(cyc_dates[cyc_dates['dataset'] == 'CryoSat2_SARIn']['cyc_end_datetimes'].iloc[-1])

# Evolving outlines
cum_CS2_evolving = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'].values
) / 1e9

cum_sum_last_CS2_evolving = cum_CS2_evolving[
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
][-1]

cum_IS2_evolving = np.cumsum(
    superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'].values
) / 1e9 + cum_sum_last_CS2_evolving

cum_total_evolving = np.concatenate([cum_CS2_evolving, cum_IS2_evolving])

max_abs_cum_dV_evolving = np.max(np.abs(cum_total_evolving))

# Stationary outline at all lakes
cum_CS2_stationary = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9

cum_sum_last_CS2_stationary = cum_CS2_stationary[
    subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
][-1]

cum_IS2_stationary = np.cumsum(
    superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9 + cum_sum_last_CS2_stationary

cum_total_stationary = np.concatenate([cum_CS2_stationary, cum_IS2_stationary])

max_abs_cum_dV_prior_stationary = np.max(np.abs(cum_total_stationary))


# Stationary outline at evolving lakes subset
cum_CS2_stationary_subset = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9

cum_sum_last_CS2_stationary_subset = cum_CS2_stationary[
    subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
][-1]

cum_IS2_stationary_subset = np.cumsum(
    superset_IS2_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9 + cum_sum_last_CS2_stationary

cum_total_stationary_subset = np.concatenate([cum_CS2_stationary_subset, cum_IS2_stationary_subset])

max_abs_cum_dV_prior_stationary_evolving_subset = np.max(np.abs(cum_total_stationary_subset))


# Updated stationary (union of evolving outlines)
cum_CS2_updated_stationary = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9

cum_sum_last_CS2_updated_stationary = cum_CS2_updated_stationary[
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
][-1]

cum_IS2_updated_stationary = np.cumsum(
    superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9 + cum_sum_last_CS2_updated_stationary

cum_total_updated_stationary = np.concatenate([cum_CS2_updated_stationary, cum_IS2_updated_stationary])

max_abs_cum_dV_updated_stationary = np.max(np.abs(cum_total_updated_stationary))


# Print results
print(f"Max absolute cumulative dV (evolving outlines): {max_abs_cum_dV_evolving:.3f} km³")
print(f"Max absolute cumulative dV (stationary outline at all analyzed lakes): {max_abs_cum_dV_prior_stationary:.3f} km³")
print(f"Max absolute cumulative dV (stationary outline at evolving lakes subset): {max_abs_cum_dV_prior_stationary_evolving_subset:.3f} km³")
print(f"Max absolute cumulative dV (updated stationary outline): {max_abs_cum_dV_updated_stationary:.3f} km³")

In [None]:
# Bias (evolving - prior stationary at all analyzed lakes)
bias_prior_CS2 = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'].values -
    subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9

cum_sum_last_CS2_midcyc_date_prior = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
    ]['evolving_outlines_dV_corr (m^3)'] -
    subset_CS2_IS2_SARInPostExpansion_stationary_sum_df[
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
    ]['stationary_outline_dV_corr (m^3)']
) / 1e9).iloc[-1]

bias_prior_IS2 = np.cumsum((
    superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'].values -
    superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9) + cum_sum_last_CS2_midcyc_date_prior

# Combine CS2 + IS2
bias_prior_total = np.concatenate([bias_prior_CS2, bias_prior_IS2])

# Max absolute bias (prior stationary)
max_abs_cum_dV_bias_prior_stationary = np.max(np.abs(bias_prior_total))


# Bias (evolving - prior stationary at evolving lakes subset)
bias_prior_CS2 = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'].values -
    subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9

cum_sum_last_CS2_midcyc_date_prior = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
    ]['evolving_outlines_dV_corr (m^3)'] -
    subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df[
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
    ]['stationary_outline_dV_corr (m^3)']
) / 1e9).iloc[-1]

bias_prior_IS2 = np.cumsum((
    superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'].values -
    superset_IS2_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9) + cum_sum_last_CS2_midcyc_date_prior

# Combine CS2 + IS2
bias_prior_total = np.concatenate([bias_prior_CS2, bias_prior_IS2])

# Max absolute bias (prior stationary)
max_abs_cum_dV_bias_prior_stationary_at_evolving_lakes = np.max(np.abs(bias_prior_total))


# Bias (evolving - updated stationary)
bias_updated_CS2 = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'].values -
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9

cum_sum_last_CS2_midcyc_date_updated = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
    ]['evolving_outlines_dV_corr (m^3)'] -
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date
    ]['stationary_outline_dV_corr (m^3)']
) / 1e9).iloc[-1]

bias_updated_IS2 = np.cumsum((
    superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'].values -
    superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)'].values
) / 1e9) + cum_sum_last_CS2_midcyc_date_updated

# Combine CS2 + IS2
bias_updated_total = np.concatenate([bias_updated_CS2, bias_updated_IS2])

# Max absolute bias (updated stationary)
max_abs_cum_dV_bias_updated_stationary = np.max(np.abs(bias_updated_total))


# --- Print results ---
print(f"Maximum absolute bias (evolving - prior stationary at all analyzed lakes): {max_abs_cum_dV_bias_prior_stationary:.3f} km³")
print(f"Maximum absolute bias (evolving - prior stationary at evolving lakes subset): {max_abs_cum_dV_bias_prior_stationary_at_evolving_lakes:.3f} km³")
print(f"Maximum absolute bias (evolving - updated stationary): {max_abs_cum_dV_bias_updated_stationary:.3f} km³")

In [None]:
# Percent underestimation
percent_prior = 100 * max_abs_cum_dV_bias_prior_stationary / max_abs_cum_dV_evolving
percent_prior_evolving_subset = 100 * max_abs_cum_dV_bias_prior_at_evolving_lakes / max_abs_cum_dV_evolving
percent_updated = 100 * max_abs_cum_dV_bias_updated_stationary / max_abs_cum_dV_evolving

print(f"Maximum cumulative bias (prior stationary at all analyzed lakes) is {percent_prior:.1f}% of the cumulative evolving flux.")
print(f"Maximum cumulative bias (prior stationary at evolving lakes subset) is {percent_prior_evolving_subset:.1f}% of the cumulative evolving flux.")
print(f"Maximum cumulative bias (updated stationary) is {percent_updated:.1f}% of the cumulative evolving flux.")

In [None]:
# Percent underestimation
max_bias = max(max_abs_cum_dV_bias_prior, max_abs_cum_dV_bias_prior_stationary_at_evolving_lakes, max_abs_cum_dV_bias_updated_stationary)
max_bias_percent = 100 *  max_bias / max_abs_cum_dV_evolving
print(f'maximum cumulative volume flux bias: {max_bias:.1f} km$^3$')

min_bias = min(max_abs_cum_dV_bias_prior_stationary, max_abs_cum_dV_bias_prior_stationary_at_evolving_lakes, max_abs_cum_dV_bias_updated_stationary)
min_bias_percent = 100 *  min_bias / max_abs_cum_dV_evolving
print(f'minimum cumulative volume flux bias: {min_bias:.1f} km$^3$')

# Key point # 3
# Find maximum percent of cumulative volume flux underestimated using stationary outlines
print(f'Using stationary outlines results in a cumulative underestimation of Antarctic subglacial water volume of $\sim$ {min_bias_percent:.0f}-{max_bias_percent:.0f}$\%$ ($\sim$ {min_bias:.1f}-{max_bias:.1f} $km^3$) compared to evolving outlines.')

In [None]:
# Set the directory paths for the new folder structure
evolving_folder = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill/'
stationary_folder = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/'

# Files to ignore in our analysis
files_to_ignore = [
    'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv', 
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_noCS2_IS2_lakes_sum.csv'
]

# Get lists of CSV files in each directory, excluding the files to ignore
evolving_csv_files = [f for f in os.listdir(evolving_folder) 
                     if f.endswith('.csv') and f not in files_to_ignore]
stationary_csv_files = [f for f in os.listdir(stationary_folder) 
                       if f.endswith('.csv') and f not in files_to_ignore]

# Find common lake names (files that exist in both folders)
evolving_lake_names = {os.path.splitext(f)[0] for f in evolving_csv_files}
stationary_lake_names = {os.path.splitext(f)[0] for f in stationary_csv_files}
common_lake_names = evolving_lake_names.intersection(stationary_lake_names)

# Initialize DataFrame to store lake-level results
lake_results_df = pd.DataFrame({
    'lake_name': list(common_lake_names),
    'greater_than_125_percent': False,
    'less_than_75_percent': False,
    'both_conditions': False,
    'either_condition': False,
    'total_time_steps': 0,
    'valid_data_found': False
})

# Create a list to store the combined data for all lakes (for time step analysis)
all_combined_data = []

# Loop through each lake and process data
for idx, lake_name in enumerate(lake_results_df['lake_name']):
    try:
        # Read the evolving lake data
        evolving_file_path = os.path.join(evolving_folder, f"{lake_name}.csv")
        evolving_df = pd.read_csv(evolving_file_path)
        
        # Read the stationary lake data
        stationary_file_path = os.path.join(stationary_folder, f"{lake_name}.csv")
        stationary_df = pd.read_csv(stationary_file_path)
        
        # Identify the date column and volume column in each dataframe
        evolving_date_col = None
        evolving_vol_col = None
        
        for col in evolving_df.columns:
            if any(date_indicator in col.lower() for date_indicator in ['date', 'time', 'day']):
                evolving_date_col = col
            if 'dv_corr' in col.lower():
                evolving_vol_col = col
        
        # For stationary dataframe
        stationary_date_col = None
        stationary_vol_col = None
        
        for col in stationary_df.columns:
            if any(date_indicator in col.lower() for date_indicator in ['date', 'time', 'day']):
                stationary_date_col = col
            if 'dv_corr' in col.lower():
                stationary_vol_col = col
        
        # Skip if we couldn't identify the necessary columns
        if not all([evolving_date_col, evolving_vol_col, stationary_date_col, stationary_vol_col]):
            print(f"Skipping {lake_name} - could not identify all required columns")
            continue
        
        # Create standardized dataframes for merging
        evolving_std_df = pd.DataFrame({
            'lake_name': lake_name,
            'date': evolving_df[evolving_date_col],
            'evolving_outlines_dV_corr (m^3)': evolving_df[evolving_vol_col]
        })
        
        stationary_std_df = pd.DataFrame({
            'lake_name': lake_name,
            'date': stationary_df[stationary_date_col],
            'stationary_outline_dV_corr (m^3)': stationary_df[stationary_vol_col]
        })
        
        # Merge the dataframes on lake_name and date
        merged_df = pd.merge(evolving_std_df, stationary_std_df, on=['lake_name', 'date'])
        
        # Filter out rows where evolving volume is 0
        filtered_df = merged_df[merged_df['evolving_outlines_dV_corr (m^3)'] != 0]
        
        # Skip lakes with no valid data
        if filtered_df.empty:
            print(f"Skipping {lake_name} - no valid data after filtering")
            continue
        
        # Update lake-level results
        lake_results_df.loc[idx, 'valid_data_found'] = True
        lake_results_df.loc[idx, 'total_time_steps'] = len(filtered_df)
        
        # Check conditions for lake-level analysis
        condition_greater_than = (filtered_df['evolving_outlines_dV_corr (m^3)'] > 
                                1.25 * filtered_df['stationary_outline_dV_corr (m^3)'])
        condition_less_than = (filtered_df['evolving_outlines_dV_corr (m^3)'] < 
                             0.75 * filtered_df['stationary_outline_dV_corr (m^3)'])
        
        # Update DataFrame with lake-level results
        lake_results_df.loc[idx, 'greater_than_125_percent'] = condition_greater_than.any()
        lake_results_df.loc[idx, 'less_than_75_percent'] = condition_less_than.any()
        lake_results_df.loc[idx, 'both_conditions'] = condition_greater_than.any() and condition_less_than.any()
        
        # Calculate either condition
        condition_either = condition_greater_than | condition_less_than
        lake_results_df.loc[idx, 'either_condition'] = condition_either.any()
        
        # Add to combined data for time step analysis
        all_combined_data.append(filtered_df)
        
    except Exception as e:
        print(f"Error processing {lake_name}: {e}")
        continue

# ==================== LAKE-LEVEL ANALYSIS ====================
print("\n" + "="*60)
print("LAKE-LEVEL ANALYSIS RESULTS")
print("="*60)

# Filter out lakes with no valid data for final analysis
valid_lake_results = lake_results_df[lake_results_df['valid_data_found']]

# Calculate proportions using vectorized operations
total_valid_lakes = len(valid_lake_results)
print(f"Valid lakes analyzed: {total_valid_lakes}")

if total_valid_lakes > 0:
    lake_proportions = {
        'greater_than_125_percent': valid_lake_results['greater_than_125_percent'].sum() / total_valid_lakes,
        'less_than_75_percent': valid_lake_results['less_than_75_percent'].sum() / total_valid_lakes,
        'both_conditions': valid_lake_results['both_conditions'].sum() / total_valid_lakes,
        'either_condition': valid_lake_results['either_condition'].sum() / total_valid_lakes
    }
    
    # Print lake-level results
    print(f"Lakes meeting condition 1 (greater than 125%): {valid_lake_results['greater_than_125_percent'].sum()} ({lake_proportions['greater_than_125_percent']:.2f})")
    print(f"Lakes meeting condition 2 (less than 75%): {valid_lake_results['less_than_75_percent'].sum()} ({lake_proportions['less_than_75_percent']:.2f})")
    print(f"Lakes meeting either condition: {valid_lake_results['either_condition'].sum()} ({lake_proportions['either_condition']:.2f})")
    print(f"Lakes meeting both conditions: {valid_lake_results['both_conditions'].sum()} ({lake_proportions['both_conditions']:.2f})")
    
else:
    print("No valid lakes found for analysis")

# ==================== TIME STEP ANALYSIS ====================
print("\n" + "="*60)
print("TIME STEP ANALYSIS RESULTS")
print("="*60)

# Concatenate all the combined data into a single dataframe
if all_combined_data:
    time_step_df = pd.concat(all_combined_data, ignore_index=True)
    
    # Define the separate conditions, using absolute values
    condition1 = (time_step_df['evolving_outlines_dV_corr (m^3)'].abs() > 
                  1.25 * time_step_df['stationary_outline_dV_corr (m^3)'].abs())
    condition2 = (time_step_df['evolving_outlines_dV_corr (m^3)'].abs() < 
                  0.75 * time_step_df['stationary_outline_dV_corr (m^3)'].abs())
    
    # Condition where either condition is met
    either_condition = (condition1 | condition2)
    
    # Condition where neither condition is met
    neither_condition = ~(condition1 | condition2)
    
    # Condition where both conditions are met
    both_conditions = condition1 & condition2
    
    # Count the number of rows meeting each condition
    num_rows_condition1 = condition1.sum()
    num_rows_condition2 = condition2.sum()
    num_rows_either_condition = either_condition.sum()
    num_rows_neither_condition = neither_condition.sum()
    num_rows_both_conditions = both_conditions.sum()
    
    # Calculate proportions based on the length of time_step_df
    total_time_steps = len(time_step_df)
    proportion_condition1 = num_rows_condition1 / total_time_steps if total_time_steps > 0 else 0
    proportion_condition2 = num_rows_condition2 / total_time_steps if total_time_steps > 0 else 0
    proportion_either_condition = num_rows_either_condition / total_time_steps if total_time_steps > 0 else 0
    proportion_neither_condition = num_rows_neither_condition / total_time_steps if total_time_steps > 0 else 0
    proportion_both_conditions = num_rows_both_conditions / total_time_steps if total_time_steps > 0 else 0
    
    # Calculate sum of proportions as a sanity check
    sum_of_proportions = np.sum([proportion_condition1, proportion_condition2, proportion_both_conditions, proportion_neither_condition])
    
    # Print out the time step results
    print(f"Valid time steps analyzed: {total_time_steps}")
    print(f"Proportion of time steps meeting condition 1 (greater than 125%): {np.round(proportion_condition1, 2)}")
    print(f"Proportion of time steps meeting condition 2 (less than 75%): {np.round(proportion_condition2, 2)}")
    print(f"Proportion of time steps meeting either conditions: {np.round(proportion_either_condition, 2)}")
    print(f"Proportion of time steps meeting neither condition: {np.round(proportion_neither_condition, 2)}")
    print(f"Proportion of time steps meeting both conditions: {np.round(proportion_both_conditions, 2)}")
    print(f"Sum of proportions: {np.round(sum_of_proportions, 2)}")
    
else:
    print("Failed to create any valid combined data for time step analysis.")

# ==================== SUMMARY ====================
print("\n" + "="*60)
print("ANALYSIS SUMMARY")
print("="*60)

if total_valid_lakes > 0 and all_combined_data:
    print(f"Analysis completed successfully:")
    print(f"  • {total_valid_lakes} lakes analyzed")
    print(f"  • {len(time_step_df)} total time steps analyzed")
    print(f"\nKey findings:")
    print(f"  • {lake_proportions['either_condition']:.1%} of lakes have at least one time step with dV differences ±25%")
    print(f"  • {proportion_either_condition:.1%} of time steps show dV differences ±25%")
else:
    print("Analysis could not be completed due to data issues.")

### Explaining continental sum trends

In [None]:
directory = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'

def create_interactive_plot(directory, stationary_outlines_gdf):
    # Load and process all lake data
    dfs = []
    lake_names = []
    
    for file in os.listdir(directory):
        # Exclude continental summation files
        if "subset" in file or "superset" in file:
            continue

        if file.endswith('.csv'):
            file_path = os.path.join(directory, file)
            df = pd.read_csv(file_path)
            lake_name = os.path.splitext(file)[0]
            
            # Process each lake's data
            df['lake_name'] = lake_name
            df['datetime'] = pd.to_datetime(df['mid_pt_datetime'])
            df['cumsum_vol'] = np.cumsum(df['evolving_outlines_dV_corr (m^3)']/1e9)
            dfs.append(df)
            lake_names.append(lake_name)
    
    # Combine all dataframes
    combined_df = pd.concat(dfs, ignore_index=True)
    
    # Create plot using hvplot
    plot = combined_df.hvplot.line(
        x='datetime',
        y='cumsum_vol',
        by='lake_name',
        width=800,
        height=400,
        title='Lake Volume Changes Over Time',
        xlabel='Date',
        ylabel='Cumulative Volume Change (km³)',
        grid=True,
        legend=False
    )
    
    return plot

# Usage:
plot = create_interactive_plot(directory, stationary_outlines_gdf)
plot  # Display in notebook

In [None]:
directory = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'

def create_interactive_plot(directory, stationary_outlines_gdf):
    # Load and process all lake data
    dfs_subset_CS2_IS2_lakes = []
    lake_names = []
    
    for file in os.listdir(directory):
        if file.endswith('.csv'):
            file_path = os.path.join(directory, file)
            df = pd.read_csv(file_path)
            lake_name = os.path.splitext(file)[0]
            
            # Process each lake's data
            df['lake_name'] = lake_name
            df['datetime'] = pd.to_datetime(df['mid_pt_datetime'])
            df['cumsum_vol'] = np.cumsum(np.divide(df['evolving_outlines_dV_corr (m^3)'], 1e9))
            dfs_subset_CS2_IS2_lakes.append(df)
            lake_names.append(lake_name)
    
    # Combine all dataframes
    combined_df = pd.concat(dfs_subset_CS2_IS2_lakes, ignore_index=True)
    
    # Create the plot using Dataset and Curve
    dataset = hv.Dataset(combined_df)
    curves = dataset.to(hv.Curve, 
                       kdims=['datetime'], 
                       vdims=['cumsum_vol', 'lake_name'],
                       groupby='lake_name')
    
    # Apply options to the plot
    plot = curves.opts(
        width=800,
        height=400,
        tools=['hover'],
        title='Lake volume changes over time',
        xlabel='Date',
        ylabel='Cumulative volume change (km³)',
        show_grid=True,
        toolbar='above'
    )
    
    return plot

# Usage:
plot = create_interactive_plot(directory, stationary_outlines_gdf)
plot  # Display in notebook

In [None]:
# Investigate the lakes driving the deviation of evolving outlines and prior stationary outlines

evolving_directory = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'
stationary_directory = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_evolving_lakes'

def process_lake_data(directory, is_evolving=True):
    dfs = {}
    
    for file in os.listdir(directory):
        # Exclude continental summation files
        if "subset" in file or "superset" in file:
            continue

        if file.endswith('.csv'):
            file_path = os.path.join(directory, file)
            df = pd.read_csv(file_path)
            lake_name = os.path.splitext(file)[0]
            
            # Process each lake's data
            df['lake_name'] = lake_name
            df['datetime'] = pd.to_datetime(df['mid_pt_datetime'])
            
            # Calculate cumulative volume based on directory type
            if is_evolving:
                df['cumsum_vol'] = np.cumsum(df['evolving_outlines_dV_corr (m^3)'])
            else:
                df['cumsum_vol'] = np.cumsum(df['stationary_outline_dV_corr (m^3)'])
            
            dfs[lake_name] = df
    
    return dfs

def create_interactive_plot(evolving_directory, stationary_directory):
    # Load data from both directories
    evolving_dfs = process_lake_data(evolving_directory, is_evolving=True)
    stationary_dfs = process_lake_data(stationary_directory, is_evolving=False)
    
    # Initialize lists to store processed dataframes
    plot_dfs = []
    
    # Process common lakes
    common_lakes = set(evolving_dfs.keys()) & set(stationary_dfs.keys())
    for lake_name in common_lakes:
        evolving_df = evolving_dfs[lake_name].copy()
        stationary_df = stationary_dfs[lake_name].copy()
        
        # Calculate difference (evolving - stationary)
        merged_df = pd.merge(
            evolving_df[['datetime', 'cumsum_vol']], 
            stationary_df[['datetime', 'cumsum_vol']], 
            on='datetime', 
            suffixes=('_evolving', '_stationary')
        )
        merged_df['cumsum_vol'] = merged_df['cumsum_vol_evolving'] - merged_df['cumsum_vol_stationary']
        merged_df['lake_name'] = lake_name + '_difference'
        plot_dfs.append(merged_df[['datetime', 'cumsum_vol', 'lake_name']])
    
    # Process lakes only in stationary directory
    stationary_only = set(stationary_dfs.keys()) - set(evolving_dfs.keys())
    for lake_name in stationary_only:
        df = stationary_dfs[lake_name].copy()
        df['lake_name'] = lake_name + '_stationary'
        plot_dfs.append(df[['datetime', 'cumsum_vol', 'lake_name']])
    
    # Combine all dataframes
    combined_df = pd.concat(plot_dfs, ignore_index=True)
    
    # Create plot using hvplot
    plot = combined_df.hvplot.line(
        x='datetime',
        y='cumsum_vol',
        by='lake_name',
        width=800,
        height=400,
        title='Lake volume changes over time',
        xlabel='Date',
        ylabel='Cumulative volume change (km³)',
        grid=True,
        legend=False
    )
    
    return plot

# Usage:
plot = create_interactive_plot(evolving_directory, stationary_directory)
plot  # Display in notebook

In [None]:
# Investigate the lakes driving the deviation of evolving outlines and updated stationary outlines

evolving_directory = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'
stationary_directory = 'output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes'

def process_lake_data(directory, is_evolving=True):
    dfs = {}
    
    for file in os.listdir(directory):
        # Exclude continental summation files
        if "subset" in file or "superset" in file:
            continue

        if file.endswith('.csv'):
            file_path = os.path.join(directory, file)
            df = pd.read_csv(file_path)
            lake_name = os.path.splitext(file)[0]
            
            # Process each lake's data
            df['lake_name'] = lake_name
            df['datetime'] = pd.to_datetime(df['mid_pt_datetime'])
            
            # Calculate cumulative volume based on directory type
            if is_evolving:
                df['cumsum_vol'] = np.cumsum(df['evolving_outlines_dV_corr (m^3)'])
            else:
                df['cumsum_vol'] = np.cumsum(df['stationary_outline_dV_corr (m^3)'])
            
            dfs[lake_name] = df
    
    return dfs

def create_interactive_plot(evolving_directory, stationary_directory):
    # Load data from both directories
    evolving_dfs = process_lake_data(evolving_directory, is_evolving=True)
    stationary_dfs = process_lake_data(stationary_directory, is_evolving=False)
    
    # Initialize lists to store processed dataframes
    plot_dfs = []
    
    # Process common lakes
    common_lakes = set(evolving_dfs.keys()) & set(stationary_dfs.keys())
    for lake_name in common_lakes:
        evolving_df = evolving_dfs[lake_name].copy()
        stationary_df = stationary_dfs[lake_name].copy()
        
        # Calculate difference (evolving - stationary)
        merged_df = pd.merge(
            evolving_df[['datetime', 'cumsum_vol']], 
            stationary_df[['datetime', 'cumsum_vol']], 
            on='datetime', 
            suffixes=('_evolving', '_stationary')
        )
        merged_df['cumsum_vol'] = merged_df['cumsum_vol_evolving'] - merged_df['cumsum_vol_stationary']
        merged_df['lake_name'] = lake_name + '_difference'
        plot_dfs.append(merged_df[['datetime', 'cumsum_vol', 'lake_name']])
    
    # Process lakes only in stationary directory
    stationary_only = set(stationary_dfs.keys()) - set(evolving_dfs.keys())
    for lake_name in stationary_only:
        df = stationary_dfs[lake_name].copy()
        df['lake_name'] = lake_name + '_stationary'
        plot_dfs.append(df[['datetime', 'cumsum_vol', 'lake_name']])
    
    # Combine all dataframes
    combined_df = pd.concat(plot_dfs, ignore_index=True)
    
    # Create plot using hvplot
    plot = combined_df.hvplot.line(
        x='datetime',
        y='cumsum_vol',
        by='lake_name',
        width=800,
        height=400,
        title='Lake volume changes over time',
        xlabel='Date',
        ylabel='Cumulative volume change (km³)',
        grid=True,
        legend=False
    )
    
    return plot

# Usage:
plot = create_interactive_plot(evolving_directory, stationary_directory)
plot  # Display in notebook

### Comparison to SMB trends

#### EAIS vs WAIS [MOVE TO DIFF NOTEBOOK OUTSIDE THIS REPO]

In [None]:
# Split lakes into East vs. West Antarctic Ice Sheet
evolving_outlines_union_names

In [None]:
WAIS_lake_names = {
 'ConwaySubglacialLake',
 'EngelhardtSubglacialLake',
 'Institute_E1',
 'Institute_E2',
 'Institute_W2',
 'KT1',
 'KT2',
 'KT3',
 'Kamb_10',
 'Kamb_12',
 'Kamb_5',
 'Kamb_6',
 'Kamb_8',
 'Lake12',
 'Lake78',
 'LowerConwaySubglacialLake',
 'LowerMercerSubglacialLake',
 'Mac1',
 'Mac2',
 'Mac3',
 'Mac4',
 'MercerSubglacialLake',
 'Rutford_1',
 'TL96',
 'Thw_124',
 'Thw_142',
 'Thw_170',
 'Thw_70',
 'UpperEngelhardtSubglacialLake',
 'UpperSubglacialLakeConway',
 'WT',
 'WhillansSubglacialLake',
 'Whillans_6',
 'Whillans_7',
 'Wilkes_2'}

In [None]:
reexamined_stationary_EAIS = reexamined_stationary_outlines_gdf[~reexamined_stationary_outlines_gdf['name'].isin(WAIS_lake_names)]
reexamined_stationary_WAIS = reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['name'].isin(WAIS_lake_names)]
updated_stationary_EAIS = evolving_outlines_union_gdf[~evolving_outlines_union_gdf['name'].isin(WAIS_lake_names)]
updated_stationary_WAIS = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'].isin(WAIS_lake_names)]

In [None]:
target_area = reexamined_stationary_WAIS.loc[reexamined_stationary_WAIS['name'] == 'MercerSubglacialLake', 'area (m^2)'].iloc[0]
percentile = percentileofscore(reexamined_stationary_WAIS['area (m^2)'], target_area, kind='rank')

print(f"Using Sauthoff et al., 2025 re-examined stationary outlines (n={len(reexamined_stationary_WAIS)}; sourced from Siegfried et al., 2018, etc.; Text S2):")
print(
    f"WAIS active subglacial lakes stationary outlines have minimum "
    f"({np.min(reexamined_stationary_WAIS['area (m^2)'])/1e6:.3f} km^2) and maximum "
    f"({np.max(reexamined_stationary_WAIS['area (m^2)'])/1e6:.3f} km^2) areas"
)
print(f"MercerSubglacialLake area ({target_area/1e6:.3f} km^2) is at the {percentile:.1f} percentile")

In [None]:
target_area = updated_stationary_WAIS.loc[updated_stationary_WAIS['name'] == 'MercerSubglacialLake', 'area (m^2)'].iloc[0]
percentile = percentileofscore(updated_stationary_WAIS['area (m^2)'], target_area, kind='rank')

print(f"Using Sauthoff et al., 2025 updated stationary outlines (n={len(updated_stationary_WAIS)}; Text S1):")
print(
    f"WAIS active subglacial lakes stationary outlines have minimum "
    f"({np.min(updated_stationary_WAIS['area (m^2)'])/1e6:.3f} km^2) and maximum "
    f"({np.max(updated_stationary_WAIS['area (m^2)'])/1e6:.3f} km^2) areas"
)
print(f"MercerSubglacialLake area ({target_area/1e6:.3f} km^2) is at the {percentile:.1f} percentile")

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Store datasets, labels, and colors in a list
datasets = [
    (reexamined_stationary_WAIS, 'Re-examined stationary (Text S2)', '#40E0D0'),  # turquoise
    (updated_stationary_WAIS, 'Updated stationary (Text S1)', '#008080')           # teal
]

plt.figure(figsize=(8,5))

for gdf, label, color in datasets:
    # Extract and sort areas in km^2
    areas = gdf['area (m^2)'] / 1e6
    sorted_areas = np.sort(areas)
    
    # Compute CDF
    cdf = np.arange(1, len(sorted_areas)+1) / len(sorted_areas)
    
    # Plot CDF
    plt.plot(sorted_areas, cdf, marker='.', linestyle='none', color=color, label=label)
    
    # Highlight MercerSubglacialLake
    mercer_area = gdf.loc[gdf['name'] == 'MercerSubglacialLake', 'area (m^2)'].iloc[0] / 1e6
    mercer_percentile = np.sum(areas <= mercer_area) / len(areas)
    plt.axvline(mercer_area, color=color, linestyle='--', label=f'Mercer ({mercer_percentile*100:.1f}%)')

plt.xlabel('Lake area (km$^2$)')
plt.ylabel('CDF')
plt.title('CDF of WAIS active subglacial lake areas')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
def process_continental_sums_subset(geom_calc_folder, subset_gdf, subset_name):
    """
    Sum geometric variables for only a subset of lakes in the specified folder,
    based on the 'name' column of a provided GeoDataFrame.
    
    Args:
        geom_calc_folder (str): folder of evolving outlines CSVs being summed
        subset_gdf (GeoDataFrame): lakes to include, using 'name' column
        subset_name (str): name of the subset for output folder
    """
    directory = os.path.join('output/geometric_calcs', geom_calc_folder)
    output_directory = os.path.join('output/geometric_calcs/sub_continent_basins', subset_name, geom_calc_folder)
    os.makedirs(output_directory, exist_ok=True)
    print(f'\nProcessing folder: {geom_calc_folder} using subset: {subset_name}')
    
    # Initialize lists for different lake categories
    dfs_superset_IS2_lakes = []
    dfs_subset_noCS2_IS2_lakes = []
    dfs_subset_CS2_IS2_lakes_SARInPreExpansion = []
    dfs_subset_CS2_IS2_lakes_SARInPostExpansion = []
    
    subset_lake_names = subset_gdf['name'].tolist()
    
    for lake_name in subset_lake_names:
        file_path = os.path.join(directory, f'{lake_name}.csv')
        if os.path.exists(file_path):
            df = pd.read_csv(file_path)
            dfs_superset_IS2_lakes.append(df)
            
            SARIn_date = subset_gdf.loc[subset_gdf['name'] == lake_name, 'CS2_SARIn_start'].values[0]
            if SARIn_date == '<NA>':
                dfs_subset_noCS2_IS2_lakes.append(df)
            if SARIn_date in ['2010.5']:
                dfs_subset_CS2_IS2_lakes_SARInPreExpansion.append(df)
            if SARIn_date in ['2010.5', '2013.75']:
                dfs_subset_CS2_IS2_lakes_SARInPostExpansion.append(df)
        else:
            print(f"Warning: file for lake '{lake_name}' not found in {directory}")
    
    def apply_precision(df):
        for col in df.columns:
            if col == 'mid_pt_datetime':
                continue
            elif 'area' in col:
                df[col] = df[col].round(0).astype(int)
            elif 'dh' in col:
                df[col] = df[col].round(2)
            elif 'dV' in col:
                df[col] = df[col].round(0).astype(int)
        return df
    
    # Save each subset
    for subset_label, dfs_list in [
        ('subset_noCS2_IS2_lakes', dfs_subset_noCS2_IS2_lakes),
        ('subset_CS2_IS2_lakes_SARInPreExpansion', dfs_subset_CS2_IS2_lakes_SARInPreExpansion),
        ('subset_CS2_IS2_lakes_SARInPostExpansion', dfs_subset_CS2_IS2_lakes_SARInPostExpansion)
    ]:
        if dfs_list:
            df_concat = pd.concat(dfs_list, ignore_index=True)
            df_sum = df_concat.groupby('mid_pt_datetime').sum().reset_index()
            df_sum = apply_precision(df_sum)
            output_path = os.path.join(output_directory, f'{subset_label}_sum.csv')
            df_sum.to_csv(output_path, index=False)
    
    # Superset processing
    if dfs_superset_IS2_lakes:
        superset_IS2_lakes = pd.concat(dfs_superset_IS2_lakes, ignore_index=True)
        superset_IS2_lakes_sum = superset_IS2_lakes.groupby('mid_pt_datetime').sum().reset_index()
        superset_IS2_lakes_sum['mid_pt_datetime'] = pd.to_datetime(superset_IS2_lakes_sum['mid_pt_datetime'])
        threshold = pd.Timestamp('2019-01-01 06:00:00')
        superset_IS2_lakes_sum = superset_IS2_lakes_sum[
            superset_IS2_lakes_sum['mid_pt_datetime'] >= threshold
        ].reset_index(drop=True)
        superset_IS2_lakes_sum = apply_precision(superset_IS2_lakes_sum)
        output_path = os.path.join(output_directory, 'superset_IS2_lakes_sum.csv')
        superset_IS2_lakes_sum.to_csv(output_path, index=False)
    
    print(f'Successfully processed subset of lakes: {subset_name} in {geom_calc_folder}.')


In [None]:
# List of comparison types
geom_calc_folders = [
    'stationary_outline_geom_calc/stationary_outlines_at_all_lakes',
    'stationary_outline_geom_calc/stationary_outlines_at_evolving_lakes',
    'evolving_outlines_geom_calc/forward_fill',
    'stationary_outline_geom_calc/evolving_union_at_evolving_lakes',
]

# Process each comparison type
for geom_calc_folder in geom_calc_folders:
    process_continental_sums_subset(geom_calc_folder, updated_stationary_EAIS, 'updated_stationary_EAIS')
    process_continental_sums_subset(geom_calc_folder, updated_stationary_WAIS, 'updated_stationary_WAIS')

In [None]:
# Import stationary subglacial lake outlines
stationary_lakes_gdf = gpd.read_file(os.path.join(os.getcwd(), 'output/lake_outlines/stationary_outlines/stationary_outlines_gdf.geojson'))

# Create filtered geodataframes of lakes based on whether they have evolving outlines
folder_path = os.path.join ('output/lake_outlines/evolving_outlines')

# Lakes with evolving outlines (.geojson)
evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='geojson', exclude=False)

# For the evolving_outlines_lakes, we must add the special case of Site_B_Site_C that are now a combined lake
include_list = ['Site_B', 'Site_C']
included_rows = stationary_lakes_gdf[stationary_lakes_gdf['name'].isin(include_list)]
evolving_outlines_lakes = pd.concat([evolving_outlines_lakes, included_rows]).drop_duplicates()
print('lakes with evolving outlines:',len(evolving_outlines_lakes))

# Lakes with non-dynamic outlines (.txt)
no_evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='txt', exclude=False)
print('lakes without evolving outlines:',len(no_evolving_outlines_lakes))

In [None]:
# Making revised version to incorporate reviewer feedback of showing subset of stationary outlines at evolving lakes
# Changes to make
# stationary outlines subset for 1) active area, 2) carbon export, 3) cum dV (using dashed cyan line)
# new panel for dV bias
# add dV bias (stationary subset - evolving)
# add open or closed dots for time series to differentiate

# Read in data
# Read in continental summation geometric calculation csv files - evolving outlines (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/sub_continent_basins/updated_stationary_WAIS/evolving_outlines_geom_calc/forward_fill'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

# Store dataframes from dfs list for code readability
superset_IS2_evolving_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - stationary outlines (all lakes)
base_path = 'output/geometric_calcs/sub_continent_basins/updated_stationary_WAIS/stationary_outline_geom_calc/stationary_outlines_at_all_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

superset_IS2_stationary_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - stationary outlines (only at evolving lakes)
base_path = 'output/geometric_calcs/sub_continent_basins/updated_stationary_WAIS/stationary_outline_geom_calc/stationary_outlines_at_evolving_lakes'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_stationary_subset_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - evolving union (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/sub_continent_basins/updated_stationary_WAIS/stationary_outline_geom_calc/evolving_union_at_evolving_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_evolving_union_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']


# Setup figure
nrows, ncols = 4, 1
fig, ax = plt.subplots(nrows, ncols, figsize=(10, 10), constrained_layout=True)

# Store dates and time period for satellite coverage eras
time_span = mdates.date2num(cyc_dates['cyc_end_datetimes'].iloc[-1]) - mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
start_date = mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
SARIn_expand_date = mdates.date2num(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2013-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0])
CS2_IS2_tie_pt = mdates.date2num(cyc_dates[cyc_dates['dataset'] == 'ICESat2_ATL15'].iloc[0]['cyc_start_datetimes'])

for row in [0]:
    # Plot horizontal line at zero for reference
    ax[row].axhline(0, color='k', linewidth=0.5)
    # Plot vertical lines to indicate CS2 SARIn mode mask moving inland and ICESat-2 era start
    ax[row].axvline(SARIn_expand_date, color='dimgray', linestyle='solid', linewidth=0.75, ymin=-1, ymax=1, zorder=0)
    ax[row].axvline(CS2_IS2_tie_pt, color='dimgray', linestyle='solid', linewidth=0.75, ymin=-1, ymax=1, zorder=0)

# Pick colormap and make continuous cmap discrete for evolving outlines
cmap = plt.get_cmap('plasma', len(cyc_start_datetimes[1:]))
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                    mdates.date2num(cyc_start_datetimes[-1]))

# Use for loop to store each time step as line segment to use in legend
lines = []  # list of lines to be used for the legend
for dt_idx, dt in enumerate(cyc_dates['cyc_start_datetimes'][1:]):
    x = 1; y = 1
    line, = ax[0].plot(x, y, color=cmap(norm(mdates.date2num(cyc_dates['cyc_start_datetimes'][dt_idx]))))
    lines.append(line)

# Define colors and linestyles that will be reused and create lines for legend
stationary_outline_color  = 'turquoise'
stationary_outline_subset_color  = 'darkcyan'

# Panel - cumulative dV/dt --------------------------------------------------

# Plot dV time series of evolving outlines using LineCollection from points/segments to plot multi-colored line
x = mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'])
y = np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] / 1e9)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(1)
line = ax[2].add_collection(lc)
scatter = ax[2].scatter(x, y, c=x, cmap=cmap, norm=norm, s=8)

CS2_last_cyc_date = str(cyc_dates[cyc_dates['dataset'] == 'CryoSat2_SARIn']['cyc_end_datetimes'].iloc[-1])
cum_sum_last_CS2_midcyc_date = np.cumsum(np.divide(
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'], 1e9)).iloc[-1]

x = mdates.date2num(superset_IS2_evolving_sum_df['mid_pt_datetime'])
y = np.cumsum(superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(2)
line = ax[2].add_collection(lc)
scatter = ax[2].scatter(x, y, c=x, cmap=cmap, norm=norm, s=14)

# Plot dV time series of stationary outline of all lakes
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9), 
    color=stationary_outline_color, marker='o', markerfacecolor='white', markersize=3, linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_stationary_sum_df[subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)'] / 1e9).iloc[-1]
ax[2].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
np.cumsum(superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color=stationary_outline_color, marker='o', markerfacecolor='white', markersize=4, linewidth=2)

# Plot dV time series of stationary outline of lakes with evolving outlines (subset)
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9), 
    color=stationary_outline_subset_color, marker='o', markersize=3, markerfacecolor='white', linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df[subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)'] / 1e9).iloc[-1]
ax[2].plot(mdates.date2num(superset_IS2_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum(superset_IS2_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
    color=stationary_outline_subset_color, marker='o', markersize=4, markerfacecolor='white', linewidth=2)

# Plot dV time series of updated stationary outline
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
    color='k', marker='o', markersize=3, linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df[
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[2].plot(mdates.date2num(superset_IS2_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color='k', marker='o', markersize=4, linewidth=2)


# Panel - cumulative dV/dt bias --------------------------------------------------

# Plot bias (evolving - prior stationary (all lakes))
ax[3].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='red', marker='o', markerfacecolor='white', markersize=3, linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df[
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[3].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='red', marker='o', markerfacecolor='white', markersize=4, linewidth=2)


# Plot bias (evolving - prior stationary (subset at evolving lakes))
ax[3].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='lightcoral', marker='o', markersize=3, markerfacecolor='k', linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df[
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[3].plot(mdates.date2num(superset_IS2_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='lightcoral', marker='o', markersize=4, markerfacecolor='k', linewidth=2)

# Plot bias (evolving - updated stationary)
ax[3].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='darkred', marker='o', markersize=3, linestyle='solid', linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[3].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='darkred', marker='o', markersize=4, linewidth=2)


# Set y axes limits
ax0_auto_ymin, ax0_auto_ymax = ax[0].get_ylim()
ax1_auto_ymin, ax1_auto_ymax = ax[1].get_ylim()
del ax0_auto_ymin, ax0_auto_ymax, ax1_auto_ymin, ax1_auto_ymax

# Add colorbar, legends, and titles
ax[0].set_xlabel('year')

# Add annotations for satellite/mode dates
start_date_text = pd.to_datetime(cyc_dates['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
sarin_expand_date_text = pd.to_datetime(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2013-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
is2_start_date_text = pd.to_datetime(cyc_dates[cyc_dates['dataset'] == 'ICESat2_ATL15'].iloc[0]['cyc_start_datetimes']) + pd.Timedelta(days=15)

ax[0].text(start_date_text, -800, 'CryoSat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')
ax[0].text(sarin_expand_date_text, -800, 'CryoSat-2 SARIn mode expands', horizontalalignment='left', verticalalignment='top', color='k')
ax[0].text(is2_start_date_text, -800, 'ICESat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')

# Add legends
# Create legend handles
stationary_line = plt.Line2D([], [], color=stationary_outline_color, linestyle='solid', linewidth=2)
stationary_subset_line = plt.Line2D([], [], color=stationary_outline_subset_color, linestyle='dashed', linewidth=2)
# Full colormap for evolving outlines with one representative data point
mid_idx = len(cyc_dates['cyc_start_datetimes']) // 2
evolving_scatter_line = []
for i, dt in enumerate(cyc_dates['cyc_start_datetimes']):
    if i == mid_idx:  # only the middle line gets a marker
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid',
                   marker='o', markersize=5))
    else:
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid'))
evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)
stationary_scatter_line = plt.Line2D([], [], color=stationary_outline_color, marker='o', markersize=5, markerfacecolor='white', linewidth=2)
stationary_subset_scatter_line = plt.Line2D([], [], color=stationary_outline_subset_color, marker='o', markersize=5, markerfacecolor='white', linewidth=2)
evolving_union_scatter_line = plt.Line2D([], [], color='k', marker='o', markersize=5, markerfacecolor='k', linewidth=2)
bias_line = plt.Line2D([], [], color='red', marker='o', markersize=5, markerfacecolor='white', linewidth=2)  # evolving - prior stationary (all lakes)
bias_line2 = plt.Line2D([], [], color='lightcoral', marker='o', markersize=5, markerfacecolor='k', linewidth=2)  # evolving - prior stationary (only at evolving lakes)
bias_line3 = plt.Line2D([], [], color='darkred', marker='o', markersize=5, linewidth=2)  # evolving - updated stationary (only evolving lakes)

x0 = 0.03
legend0 = ax[0].legend([stationary_line,
                       stationary_subset_line,
                       tuple(evolving_scatter_line),
                       evolving_union_line],
    [f'prior stationary outlines at all analyzed lakes (n={len(no_evolving_outlines_lakes) + len(evolving_outlines_lakes)})',
     f'prior stationary outlines at evolving lakes subset (n={len(evolving_outlines_lakes)})',
     f'evolving outlines (n={len(evolving_outlines_lakes)})',
     f'updated stationary outlines (n={len(evolving_outlines_lakes)})'],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper left', bbox_to_anchor=(x0, 1))

legend2 = ax[2].legend([stationary_scatter_line,
                       stationary_subset_scatter_line,
                       tuple(evolving_scatter_line),
                       evolving_union_scatter_line],
    [f'prior stationary outlines at all analyzed lakes (n={len(no_evolving_outlines_lakes) + len(evolving_outlines_lakes)})',
     f'prior stationary outlines at evolving lakes subset (n={len(evolving_outlines_lakes)})',
     f'evolving outlines (n={len(evolving_outlines_lakes)})',
     f'updated stationary outlines (n={len(evolving_outlines_lakes)})'],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper left', bbox_to_anchor=(x0, 1))

legend3 = ax[3].legend([bias_line, bias_line2, bias_line3], 
                      [f'bias, evolving (n={len(evolving_outlines_lakes)}) − prior stationary at all analyzed lakes (n={len(no_evolving_outlines_lakes) + len(evolving_outlines_lakes)})',
                       f'bias, evolving (n={len(evolving_outlines_lakes)}) − prior stationary at evolving lakes subset (n={len(evolving_outlines_lakes)})',
                       f'bias, evolving (n={len(evolving_outlines_lakes)}) − updated stationary (n={len(evolving_outlines_lakes)})'
                      ],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper left', bbox_to_anchor=(x0, 0.27))

for row in [0,2]:
    # Remove x tick labels
    ax[row].set_xticklabels([])

    # Format the x-axis to display years only
    ax[row].xaxis.set_major_locator(mdates.YearLocator(base=1))  # Major ticks every other year
    ax[row].xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Minor ticks every quarter
    ax[row].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Display major ticks as years

    # Set x-axis limits
    ax[row].set(xlim=(cyc_dates['cyc_start_datetimes'].iloc[0],
        # Set righthand x-axis limit slightly earlier to prevent tick mark displaying when there is no data point
        (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2))))

# Set axes titles
ax[0].set_ylabel('active area [km$^2$]')
ax[1].set_ylabel('DIC export [Gg C]')
ax[2].set_ylabel('cumulative $dV$ [km$^3$]')
ax[3].set_ylabel('cumulative $dV$ bias [km$^3$]')

# Adding annotations at the top left of the subplot
ax_array = np.array(ax)  # Convert gridspec list of lists into numpy array to use .flatten() method
char_index = 97  # ASCII value for 'a'
for i, ax_i in enumerate(ax_array.flatten()):
    # `transform=ax.transAxes` makes coordinates relative to the axes (0,0 is bottom left and 1,1 is top right)
    ax_i.text(0.01, 0.97, chr(char_index), transform=ax_i.transAxes, fontsize=16, va='top', ha='left')
    char_index += 1 # Increment the ASCII index to get the next character
    
# # Save and close plot
# plt.savefig(OUTPUT_DIR + '/figures/Fig3_lake_reexamination_results_continental_integration.jpg',
#     dpi=300, bbox_inches='tight')

# Preview plot
plt.show()

In [None]:
# Making revised version to incorporate reviewer feedback of showing subset of stationary outlines at evolving lakes
# Changes to make
# stationary outlines subset for 1) active area, 2) carbon export, 3) cum dV (using dashed cyan line)
# new panel for dV bias
# add dV bias (stationary subset - evolving)
# add open or closed dots for time series to differentiate

# Read in data
# Read in continental summation geometric calculation csv files - evolving outlines (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/sub_continent_basins/updated_stationary_EAIS/evolving_outlines_geom_calc/forward_fill'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

# Store dataframes from dfs list for code readability
superset_IS2_evolving_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - stationary outlines (all lakes)
base_path = 'output/geometric_calcs/sub_continent_basins/updated_stationary_EAIS/stationary_outline_geom_calc/stationary_outlines_at_all_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

superset_IS2_stationary_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - stationary outlines (only at evolving lakes)
base_path = 'output/geometric_calcs/sub_continent_basins/updated_stationary_EAIS/stationary_outline_geom_calc/stationary_outlines_at_evolving_lakes'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_stationary_subset_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - evolving union (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/sub_continent_basins/updated_stationary_EAIS/stationary_outline_geom_calc/evolving_union_at_evolving_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_evolving_union_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']


# Setup figure
nrows, ncols = 4, 1
fig, ax = plt.subplots(nrows, ncols, figsize=(10, 10), constrained_layout=True)

# Store dates and time period for satellite coverage eras
time_span = mdates.date2num(cyc_dates['cyc_end_datetimes'].iloc[-1]) - mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
start_date = mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
SARIn_expand_date = mdates.date2num(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2013-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0])
CS2_IS2_tie_pt = mdates.date2num(cyc_dates[cyc_dates['dataset'] == 'ICESat2_ATL15'].iloc[0]['cyc_start_datetimes'])

for row in [0]:
    # Plot horizontal line at zero for reference
    ax[row].axhline(0, color='k', linewidth=0.5)
    # Plot vertical lines to indicate CS2 SARIn mode mask moving inland and ICESat-2 era start
    ax[row].axvline(SARIn_expand_date, color='dimgray', linestyle='solid', linewidth=0.75, ymin=-1, ymax=1, zorder=0)
    ax[row].axvline(CS2_IS2_tie_pt, color='dimgray', linestyle='solid', linewidth=0.75, ymin=-1, ymax=1, zorder=0)

# Pick colormap and make continuous cmap discrete for evolving outlines
cmap = plt.get_cmap('plasma', len(cyc_start_datetimes[1:]))
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                    mdates.date2num(cyc_start_datetimes[-1]))

# Use for loop to store each time step as line segment to use in legend
lines = []  # list of lines to be used for the legend
for dt_idx, dt in enumerate(cyc_dates['cyc_start_datetimes'][1:]):
    x = 1; y = 1
    line, = ax[0].plot(x, y, color=cmap(norm(mdates.date2num(cyc_dates['cyc_start_datetimes'][dt_idx]))))
    lines.append(line)

# Define colors and linestyles that will be reused and create lines for legend
stationary_outline_color  = 'turquoise'
stationary_outline_subset_color  = 'darkcyan'

# Panel - cumulative dV/dt --------------------------------------------------

# Plot dV time series of evolving outlines using LineCollection from points/segments to plot multi-colored line
x = mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'])
y = np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] / 1e9)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(1)
line = ax[2].add_collection(lc)
scatter = ax[2].scatter(x, y, c=x, cmap=cmap, norm=norm, s=8)

CS2_last_cyc_date = str(cyc_dates[cyc_dates['dataset'] == 'CryoSat2_SARIn']['cyc_end_datetimes'].iloc[-1])
cum_sum_last_CS2_midcyc_date = np.cumsum(np.divide(
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'], 1e9)).iloc[-1]

x = mdates.date2num(superset_IS2_evolving_sum_df['mid_pt_datetime'])
y = np.cumsum(superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(2)
line = ax[2].add_collection(lc)
scatter = ax[2].scatter(x, y, c=x, cmap=cmap, norm=norm, s=14)

# Plot dV time series of stationary outline of all lakes
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9), 
    color=stationary_outline_color, marker='o', markerfacecolor='white', markersize=3, linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_stationary_sum_df[subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)'] / 1e9).iloc[-1]
ax[2].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
np.cumsum(superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color=stationary_outline_color, marker='o', markerfacecolor='white', markersize=4, linewidth=2)

# Plot dV time series of stationary outline of lakes with evolving outlines (subset)
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9), 
    color=stationary_outline_subset_color, marker='o', markersize=3, markerfacecolor='white', linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df[subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)'] / 1e9).iloc[-1]
ax[2].plot(mdates.date2num(superset_IS2_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum(superset_IS2_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
    color=stationary_outline_subset_color, marker='o', markersize=4, markerfacecolor='white', linewidth=2)

# Plot dV time series of updated stationary outline
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
    color='k', marker='o', markersize=3, linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df[
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[2].plot(mdates.date2num(superset_IS2_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color='k', marker='o', markersize=4, linewidth=2)


# Panel - cumulative dV/dt bias --------------------------------------------------

# Plot bias (evolving - prior stationary (all lakes))
ax[3].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='red', marker='o', markerfacecolor='white', markersize=3, linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df[
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[3].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='red', marker='o', markerfacecolor='white', markersize=4, linewidth=2)


# Plot bias (evolving - prior stationary (subset at evolving lakes))
ax[3].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='lightcoral', marker='o', markersize=3, markerfacecolor='k', linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df[
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[3].plot(mdates.date2num(superset_IS2_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='lightcoral', marker='o', markersize=4, markerfacecolor='k', linewidth=2)

# Plot bias (evolving - updated stationary)
ax[3].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='darkred', marker='o', markersize=3, linestyle='solid', linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[3].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='darkred', marker='o', markersize=4, linewidth=2)


# Set y axes limits
ax0_auto_ymin, ax0_auto_ymax = ax[0].get_ylim()
ax1_auto_ymin, ax1_auto_ymax = ax[1].get_ylim()
del ax0_auto_ymin, ax0_auto_ymax, ax1_auto_ymin, ax1_auto_ymax

# Add colorbar, legends, and titles
ax[0].set_xlabel('year')

# Add annotations for satellite/mode dates
start_date_text = pd.to_datetime(cyc_dates['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
sarin_expand_date_text = pd.to_datetime(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2013-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
is2_start_date_text = pd.to_datetime(cyc_dates[cyc_dates['dataset'] == 'ICESat2_ATL15'].iloc[0]['cyc_start_datetimes']) + pd.Timedelta(days=15)

ax[0].text(start_date_text, -800, 'CryoSat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')
ax[0].text(sarin_expand_date_text, -800, 'CryoSat-2 SARIn mode expands', horizontalalignment='left', verticalalignment='top', color='k')
ax[0].text(is2_start_date_text, -800, 'ICESat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')

# Add legends
# Create legend handles
stationary_line = plt.Line2D([], [], color=stationary_outline_color, linestyle='solid', linewidth=2)
stationary_subset_line = plt.Line2D([], [], color=stationary_outline_subset_color, linestyle='dashed', linewidth=2)
# Full colormap for evolving outlines with one representative data point
mid_idx = len(cyc_dates['cyc_start_datetimes']) // 2
evolving_scatter_line = []
for i, dt in enumerate(cyc_dates['cyc_start_datetimes']):
    if i == mid_idx:  # only the middle line gets a marker
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid',
                   marker='o', markersize=5))
    else:
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid'))
evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)
stationary_scatter_line = plt.Line2D([], [], color=stationary_outline_color, marker='o', markersize=5, markerfacecolor='white', linewidth=2)
stationary_subset_scatter_line = plt.Line2D([], [], color=stationary_outline_subset_color, marker='o', markersize=5, markerfacecolor='white', linewidth=2)
evolving_union_scatter_line = plt.Line2D([], [], color='k', marker='o', markersize=5, markerfacecolor='k', linewidth=2)
bias_line = plt.Line2D([], [], color='red', marker='o', markersize=5, markerfacecolor='white', linewidth=2)  # evolving - prior stationary (all lakes)
bias_line2 = plt.Line2D([], [], color='lightcoral', marker='o', markersize=5, markerfacecolor='k', linewidth=2)  # evolving - prior stationary (only at evolving lakes)
bias_line3 = plt.Line2D([], [], color='darkred', marker='o', markersize=5, linewidth=2)  # evolving - updated stationary (only evolving lakes)

x0 = 0.03
legend0 = ax[0].legend([stationary_line,
                       stationary_subset_line,
                       tuple(evolving_scatter_line),
                       evolving_union_line],
    [f'prior stationary outlines at all analyzed lakes (n={len(no_evolving_outlines_lakes) + len(evolving_outlines_lakes)})',
     f'prior stationary outlines at evolving lakes subset (n={len(evolving_outlines_lakes)})',
     f'evolving outlines (n={len(evolving_outlines_lakes)})',
     f'updated stationary outlines (n={len(evolving_outlines_lakes)})'],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper left', bbox_to_anchor=(x0, 1))

legend2 = ax[2].legend([stationary_scatter_line,
                       stationary_subset_scatter_line,
                       tuple(evolving_scatter_line),
                       evolving_union_scatter_line],
    [f'prior stationary outlines at all analyzed lakes (n={len(no_evolving_outlines_lakes) + len(evolving_outlines_lakes)})',
     f'prior stationary outlines at evolving lakes subset (n={len(evolving_outlines_lakes)})',
     f'evolving outlines (n={len(evolving_outlines_lakes)})',
     f'updated stationary outlines (n={len(evolving_outlines_lakes)})'],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper left', bbox_to_anchor=(x0, 1))

legend3 = ax[3].legend([bias_line, bias_line2, bias_line3], 
                      [f'bias, evolving (n={len(evolving_outlines_lakes)}) − prior stationary at all analyzed lakes (n={len(no_evolving_outlines_lakes) + len(evolving_outlines_lakes)})',
                       f'bias, evolving (n={len(evolving_outlines_lakes)}) − prior stationary at evolving lakes subset (n={len(evolving_outlines_lakes)})',
                       f'bias, evolving (n={len(evolving_outlines_lakes)}) − updated stationary (n={len(evolving_outlines_lakes)})'
                      ],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper left', bbox_to_anchor=(x0, 0.27))

for row in [0,2]:
    # Remove x tick labels
    ax[row].set_xticklabels([])

    # Format the x-axis to display years only
    ax[row].xaxis.set_major_locator(mdates.YearLocator(base=1))  # Major ticks every other year
    ax[row].xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Minor ticks every quarter
    ax[row].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Display major ticks as years

    # Set x-axis limits
    ax[row].set(xlim=(cyc_dates['cyc_start_datetimes'].iloc[0],
        # Set righthand x-axis limit slightly earlier to prevent tick mark displaying when there is no data point
        (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2))))

# Set axes titles
ax[0].set_ylabel('active area [km$^2$]')
ax[1].set_ylabel('DIC export [Gg C]')
ax[2].set_ylabel('cumulative $dV$ [km$^3$]')
ax[3].set_ylabel('cumulative $dV$ bias [km$^3$]')

# Adding annotations at the top left of the subplot
ax_array = np.array(ax)  # Convert gridspec list of lists into numpy array to use .flatten() method
char_index = 97  # ASCII value for 'a'
for i, ax_i in enumerate(ax_array.flatten()):
    # `transform=ax.transAxes` makes coordinates relative to the axes (0,0 is bottom left and 1,1 is top right)
    ax_i.text(0.01, 0.97, chr(char_index), transform=ax_i.transAxes, fontsize=16, va='top', ha='left')
    char_index += 1 # Increment the ASCII index to get the next character
    
# # Save and close plot
# plt.savefig(OUTPUT_DIR + '/figures/Fig3_lake_reexamination_results_continental_integration.jpg',
#     dpi=300, bbox_inches='tight')

# Preview plot
plt.show()

# Figures

In [None]:
# Import stationary subglacial lake outlines
stationary_lakes_gdf = gpd.read_file(os.path.join(os.getcwd(), 'output/lake_outlines/stationary_outlines/stationary_outlines_gdf.geojson'))

# Create filtered geodataframes of lakes based on whether they have evolving outlines
folder_path = os.path.join ('output/lake_outlines/evolving_outlines')

# Lakes with evolving outlines (.geojson)
evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='geojson', exclude=False)

# For the evolving_outlines_lakes, we must add the special case of Site_B_Site_C that are now a combined lake
include_list = ['Site_B', 'Site_C']
included_rows = stationary_lakes_gdf[stationary_lakes_gdf['name'].isin(include_list)]
evolving_outlines_lakes = pd.concat([evolving_outlines_lakes, included_rows]).drop_duplicates()
print('lakes with evolving outlines:',len(evolving_outlines_lakes))

# Lakes with non-dynamic outlines (.txt)
no_evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='txt', exclude=False)
print('lakes without evolving outlines:',len(no_evolving_outlines_lakes))

# All analyzed lakes within SARIn mode

## Fig. 2

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 9,
    'axes.labelsize': 22,
    'axes.titlesize': 9,
    'xtick.labelsize': 14,
    'ytick.labelsize': 14,
    'legend.fontsize': 12,
})

In [None]:
# Boolen arg on whether to use the forward filled version of the evolving outlines
forward_fill = True
# forward_fill = False

# Select lakes to be included in plot
selected_lakes = stationary_outlines_gdf[stationary_outlines_gdf['name'].isin(['ConwaySubglacialLake', 'David_s1', 'Slessor_23'])]
desired_order = ['ConwaySubglacialLake', 'David_s1', 'Slessor_23']
stationary_outlines_gdf_filtered = gpd.GeoDataFrame(pd.concat([selected_lakes[selected_lakes['name'] == name] for name in desired_order]))

# Number of rows and columns
nrows, ncols = 4, 3

# Create a 4x3 grid of plots (4 metrics, 3 lakes per metric)
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 20), constrained_layout=True)

# Define the display names for lakes
lake_names = ['Conway Subglacial Lake', r'David$_{\text{s1}}$', 'Slessor$_{23}$']

# Add titles to the top row of subplots
for col, title in enumerate(lake_names):
    axs[0, col].set_title(title, fontsize=18, pad=12)

# Define color that will be reused
stationary_outline_color  = 'darkturquoise'

for row in range(1, nrows):
    # Share y-axis within each row but not between rows
    for col in range(ncols):
        axs[row, col].sharey(axs[row, 0])

# Pick colormap and normalize to cyc_start_datetimes
cmap = plt.get_cmap('plasma', len(cyc_start_datetimes))
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                    mdates.date2num(cyc_end_datetimes[-1]))

for idx, (lake_idx, lake) in enumerate(stationary_outlines_gdf_filtered.iterrows()):
    # Select the row by index and convert it to a GeoDataFrame
    lake_gdf = stationary_outlines_gdf_filtered.loc[[lake_idx]]
    lake_name = lake_gdf['name'].iloc[0]
    stationary_outline = lake_gdf['geometry']
    print(f"\nProcessing lake: {lake_name}")
    
    # Attempt to open the evolving outlines GeoJSON file
    try:
        evolving_outlines_orig_gdf = gpd.read_file('output/lake_outlines/evolving_outlines/{}.geojson'.format(lake_name))
    except fiona.errors.DriverError:
        print(f"File for {lake_name} not found. Skipping...")
        continue  # Skip the rest of the function if the file doesn't exist
    
    # Attempt to open the geometric calculations CSV files
    try:
        if forward_fill==True: 
            evolving_geom_calcs_orig_df = pd.read_csv('output/geometric_calcs/evolving_outlines_geom_calc/{}.csv'.format(lake_name))
            evolving_geom_calcs_df = pd.read_csv('output/geometric_calcs/evolving_outlines_geom_calc/forward_fill/{}.csv'.format(lake_name))
        elif forward_fill==False:
            print('using not forward fill')
            evolving_geom_calcs_df = pd.read_csv('output/geometric_calcs/evolving_outlines_geom_calc/{}.csv'.format(lake_name))
        evolving_union_geom_calcs_df = pd.read_csv('output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes/{}.csv'.format(lake_name))
        stationary_geom_calcs_df = pd.read_csv('output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/{}.csv'.format(lake_name))
    except FileNotFoundError:
        print(f"At least of of the geometric calculations CSV files for {lake_name} not found. Skipping...")
        continue  # Skip the rest of the function if the file doesn't exist
    
    # Convert of strings to datetime
    evolving_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_geom_calcs_df['mid_pt_datetime'])

    
    # Panel - evolving outlines ------------------------------------------------------
    
    # Find evolving and stationary outlines union for plotting extent
    evolving_stationary_union_gdf = gpd.GeoDataFrame(
        geometry=[lake_gdf.geometry.iloc[0].union(evolving_outlines_orig_gdf.geometry.union_all())],
        crs=lake_gdf.crs)
    x_min, y_min, x_max, y_max = evolving_stationary_union_gdf.bounds.iloc[0]

    # Make plots a uniform size
    # Make x_min, y_min, x_max, and y_max define a square area centered at the original midpoints
    # Calculate the midpoints of the current bounds
    x_mid = (x_min + x_max) / 2
    y_mid = (y_min + y_max) / 2
    
    # Calculate the current spans of the x and y dimensions
    x_span = x_max - x_min
    y_span = y_max - y_min
    
    # Determine the maximum span to ensure square dimensions
    max_span = max(x_span, y_span)
    
    # Update the min and max values to match the new span, keeping the midpoint the same
    x_min = x_mid - max_span / 2
    x_max = x_mid + max_span / 2
    y_min = y_mid - max_span / 2
    y_max = y_mid + max_span / 2
    
    buffer_frac = 0.35
    x_buffer = abs(x_max-x_min)*buffer_frac
    y_buffer = abs(y_max-y_min)*buffer_frac

    # Plot MOA surface imagery
    mask_x = (moa_highres_da.x >= x_min-x_buffer) & (moa_highres_da.x <= x_max+x_buffer)
    mask_y = (moa_highres_da.y >= y_min-y_buffer) & (moa_highres_da.y <= y_max+y_buffer)
    moa_highres_da_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
    axs[0,idx].imshow(moa_highres_da_subset[0,:,:], cmap='gray', clim=[14000, 17000], extent=[x_min-x_buffer, x_max+x_buffer, y_min-y_buffer, y_max+y_buffer])
    
    # Use for loop to store each time slice as line segment to use in legend
    # And plot each evolving outline in the geodataframe color by date
    lines = []  # list of lines to be used for the legend
    for dt_idx, dt in enumerate(cyc_start_datetimes[1:]):
        x = 1; y = 1
        line, = axs[0,idx].plot(x, y, color=cmap(norm(mdates.date2num(cyc_start_datetimes[dt_idx]))))
        lines.append(line)
        
        # Filter rows that match the current time step
        evolving_outlines_gdf_dt_sub = evolving_outlines_orig_gdf[evolving_outlines_orig_gdf['mid_pt_datetime'] == dt]
    
        # Plotting the subset if not empty
        if not evolving_outlines_gdf_dt_sub.empty:
            evolving_outlines_gdf_dt_sub.boundary.plot(ax=axs[0,idx], 
                color=cmap(norm(mdates.date2num(cyc_start_datetimes[dt_idx]))), linewidth=1)
    
    # Plot stationary outline
    stationary_outlines_gdf['geometry'].boundary.plot(ax=axs[0,idx], color=stationary_outline_color, linewidth=2)

    # Import evolving_outlines_union_gdf and plot
    evolving_union_gdf = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == lake_name]
    evolving_union_gdf.boundary.plot(ax=axs[0,idx], color='k', linestyle='dotted', linewidth=2)

    # Plot inset map
    axIns = axs[0,idx].inset_axes([0.01, -0.01, 0.3, 0.3]) # [left, bottom, width, height] (fractional axes coordinates)
    axIns.set_aspect('equal')
    moa_2014_coastline.plot(ax=axIns, color='gray', edgecolor='k', linewidth=0.1, zorder=3)
    moa_2014_groundingline.plot(ax=axIns, color='ghostwhite', edgecolor='k', linewidth=0.1, zorder=3)
    axIns.axis('off')
    # Plot star to indicate location
    axIns.scatter(((x_max+x_min)/2), ((y_max+y_min)/2), marker='*', 
        linewidth=2, color='k', s=30, zorder=3)

    # Change polar stereographic m to km
    km_scale = 1e3
    ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
    axs[0,idx].xaxis.set_major_formatter(ticks_x)
    ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
    axs[0,idx].yaxis.set_major_formatter(ticks_y)
    
    # Set axes limits
    axs[0,idx].set(xlim=(x_min-x_buffer, x_max+x_buffer), ylim=(y_min-y_buffer, y_max+y_buffer))


    # Panel - Active area ---------------------------------------------
    
    # Plot horizontal zero line for reference
    axs[1,idx].axhline(0, color='k', linewidth=1)
    
    # Plot stationary outline and evolving outlines unary union areas
    axs[1,idx].axhline(np.divide(lake_gdf['area (m^2)'], 1e6).values, 
        color=stationary_outline_color, linestyle='solid', linewidth=2)
    axs[1,idx].axhline(np.divide(evolving_union_gdf['area (m^2)'].iloc[0], 1e6), 
        color='k', linestyle='dotted', linewidth=2)

    # Plot evolving outlines
    x = mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime'])
    y = np.divide(evolving_geom_calcs_df['evolving_outlines_area (m^2)'], 1e6)
    
    # Create points and segments for LineCollection
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    
    # Create a LineCollection, using the discrete colormap and norm
    lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
    lc.set_array(x)
    lc.set_linewidth(2)

    # Plot multi-colored line and scatter for data points
    line = axs[1,idx].add_collection(lc)
    # Only plot data points of original evolving outlines (no forward-filled data points)
    y = np.divide(evolving_geom_calcs_orig_df['evolving_outlines_area (m^2)'], 1e6)
    scatter = axs[1,idx].scatter(x, y, c=x, cmap=cmap, s=9, norm=norm, zorder=2)
    
    # Get the maximum y value across all data for this lake
    if idx == 0:
        max_y = max(
            np.divide(lake_gdf['area (m^2)'], 1e6).values[0],
            np.divide(evolving_union_gdf['area (m^2)'], 1e6).values[0],
            np.divide(evolving_geom_calcs_df['evolving_outlines_area (m^2)'], 1e6).max()
        )
    
    # Set y limit with padding above the maximum value to avoid data plotting behind legend
    axs[1,idx].set_ylim(bottom=None, top=max_y * 1.3)
    
    # Panel - Cumulative dh/dt -------------------------------------------------------
    
    # Plot horizontal zero line for reference
    axs[2,idx].axhline(0, color='k', linewidth=1)

    # Plot stationary outlines off-lake secular dh
    axs[2,idx].plot(mdates.date2num(stationary_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(stationary_geom_calcs_df['stationary_outline_region_dh (m)']),
        color='lightgray', marker='o', markerfacecolor='w', markersize=3, linewidth=2)

    # Plot evolving outlines off-lake secular dh
    axs[2,idx].plot(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(evolving_union_geom_calcs_df['stationary_outline_region_dh (m)']), color='dimgray', 
            marker='o', markersize=3, linewidth=2)
    
    # Plot stationary outline time series
    axs[2,idx].plot(mdates.date2num(stationary_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(stationary_geom_calcs_df['stationary_outline_dh_corr (m)']), 
            color=stationary_outline_color, marker='o', markerfacecolor='w', markersize=3, linewidth=2)

    # Plot evolving outlines time series
    x = mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime'])
    y = np.cumsum(evolving_geom_calcs_df['evolving_outlines_dh_corr (m)'])
    
    # Create points and segments for LineCollection
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    
    # Create a LineCollection, using the discrete colormap and norm
    lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
    lc.set_array(x)
    lc.set_linewidth(2)

    # Plot multi-colored line and scatter for data points
    line = axs[2,idx].add_collection(lc)
    scatter = axs[2,idx].scatter(x, y, c=x, cmap=cmap, s=9, norm=norm, zorder=2)

    # Plot evolving outlines union outline time series
    axs[2,idx].plot(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(evolving_union_geom_calcs_df['stationary_outline_dh_corr (m)']), 
            color='k', marker='o', markersize=3, linewidth=2)
    
    # Plot bias (evolving - prior stationary)
    axs[2,idx].plot(mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(evolving_geom_calcs_df['evolving_outlines_dh_corr (m)'] - 
        stationary_geom_calcs_df['stationary_outline_dh_corr (m)']), 
        color='red', marker='o', markerfacecolor='w', markersize=3, linewidth=2)

    # Plot bias (evolving - updated stationary)
    axs[2,idx].plot(mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(evolving_geom_calcs_df['evolving_outlines_dh_corr (m)'] - 
        evolving_union_geom_calcs_df['stationary_outline_dh_corr (m)']), 
        color='darkred', marker='o', markersize=3, linewidth=2)

    axs[2,idx].set_ylim(bottom=-20.5, top=20.5)

    # Panel - Cumulative dV/dt --------------------------------------------------
    
    # Plot horizontal line at zero for reference
    axs[3,idx].axhline(0, color='k', linewidth=0.5)
    
    # Plot stationary outline time series
    axs[3,idx].plot(mdates.date2num(stationary_geom_calcs_df['mid_pt_datetime']), 
        np.divide(np.cumsum(stationary_geom_calcs_df['stationary_outline_dV_corr (m^3)']), 1e9), 
        color=stationary_outline_color, marker='o', markerfacecolor='w', markersize=3, linewidth=2)

    # Plot evolving outlines time series
    x = mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime'])
    y = np.cumsum(np.divide(evolving_geom_calcs_df['evolving_outlines_dV_corr (m^3)'], 1e9))
    
    # Create points and segments for LineCollection
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    
    # Create a LineCollection, using the discrete colormap and norm
    lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
    lc.set_array(x)
    lc.set_linewidth(2)

    # Plot multi-colored line and scatter for data points
    line = axs[3,idx].add_collection(lc)
    scatter = axs[3,idx].scatter(x, y, c=x, cmap=cmap, s=9, norm=norm, zorder=2)

    # Plot evolving outlines union outline time series
    axs[3,idx].plot(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(np.divide(evolving_union_geom_calcs_df['stationary_outline_dV_corr (m^3)'], 1e9)), 
            color='k', marker='o', markersize=3, linewidth=2)
    
    # Plot bias (evolving - prior stationary)
    axs[3,idx].plot(mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(np.divide((evolving_geom_calcs_df['evolving_outlines_dV_corr (m^3)'] -
            stationary_geom_calcs_df['stationary_outline_dV_corr (m^3)']), 1e9)), 
            color='red', marker='o', markerfacecolor='w', markersize=3, linewidth=2)

    # Plot bias (evolving - updated stationary)
    axs[3,idx].plot(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(np.divide((evolving_geom_calcs_df['evolving_outlines_dV_corr (m^3)'] -
            evolving_union_geom_calcs_df['stationary_outline_dV_corr (m^3)']), 1e9)), 
            color='darkred', marker='o', markersize=3, linewidth=2)

    axs[3,idx].set_ylim(bottom=-3.5, top=3.5)

# Add colorbar, legends, and titles
idx=0  # Add colorbar and legends only to first row of plots

# Set up colormap
min_date = pd.to_datetime(cyc_start_datetimes[1])
max_date = pd.to_datetime(cyc_start_datetimes[-1])
date_range = pd.date_range(min_date, max_date, periods=len(cyc_start_datetimes[1:]))
years = date_range.year.unique()
years = pd.to_datetime(years, format='%Y')
n_dates = len(cyc_start_datetimes[:-1])
cmap = plt.get_cmap('plasma', n_dates)
norm = plt.Normalize(mdates.date2num(min_date), mdates.date2num(max_date))
m = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
m.set_array(np.linspace(mdates.date2num(min_date), mdates.date2num(max_date), n_dates))

# Add colorbar
cax = inset_axes(axs[0,idx],
                 width='67%',
                 height='3%',
                 loc='lower left',
                 bbox_to_anchor=[0.31, 0.15, 1, 1],  # [left, bottom, width, height]
                 bbox_transform=axs[0,idx].transAxes,
                 borderpad=0)
cbar = fig.colorbar(m, cax=cax, orientation='horizontal')
cbar.set_label('year', fontsize=16, labelpad=4)

# Set ticks for all years but labels only for even years, skipping first year, 2010.0, as it starts before time series
tick_locations = [mdates.date2num(date) for date in years[1:]]
tick_labels = [f"'{date.strftime('%y')}" if date.year % 2 == 0 else '' for date in years[1:]]
cbar.set_ticks(tick_locations)
cbar.set_ticklabels(tick_labels)
cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Add minor ticks for quarters

# Add legends
# Define colors and linestyles that will be reused and create lines for legend
stationary_outline_color  = 'turquoise'
stationary_line = plt.Line2D([], [], color=stationary_outline_color, linestyle='solid', linewidth=2)
# Full colormap for evolving outlines with one representative data point
mid_idx = len(cyc_dates['cyc_start_datetimes']) // 2
evolving_scatter_line = []
for i, dt in enumerate(cyc_dates['cyc_start_datetimes']):
    if i == mid_idx:  # only the middle line gets a marker
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid',
                   marker='o', markersize=5))
    else:
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid'))
evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)
stationary_scatter_line = plt.Line2D([], [], color=stationary_outline_color, marker='o', markersize=5, markerfacecolor='white', linestyle='solid', linewidth=2)
evolving_union_scatter_line = plt.Line2D([], [], color='k', marker='o', markersize=5, linewidth=2)
bias = plt.Line2D([], [], color='red', marker='o', markersize=5, markerfacecolor='white', linewidth=2)  # evolving - prior stationary (all lakes)
bias2 = plt.Line2D([], [], color='darkred', marker='o', markersize=5, linewidth=2)  # evolving - updated stationary (only evolving lakes)
stationary_secular_scatter_line = plt.Line2D([], [], color='lightgray', marker='o', markerfacecolor='w', markersize=5, linewidth=2)
evolving_secular_scatter_line = plt.Line2D([], [], color='dimgray', marker='o', markersize=5,  linewidth=2)

legend = axs[0,idx].legend(
    [tuple(lines), 
     stationary_line,
     evolving_union_line], 
    ['evolving outlines',
     'prior stationary outline',
     'updated stationary outline'],
     handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
     loc='upper center')

legend = axs[1,idx].legend(
    [tuple(evolving_scatter_line),
     stationary_line, 
     evolving_union_line],
    ['evolving outlines',
     'prior stationary outline', 
     'updated stationary outline'], 
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper center')

legend = axs[2,idx].legend(
    [stationary_secular_scatter_line,
     evolving_secular_scatter_line,
     tuple(evolving_scatter_line),   
     stationary_scatter_line,  
     evolving_union_scatter_line, 
     bias, bias2],
    ['prior stationary off-lake secular',
     'updated stationary off-lake secular',
     'evolving outlines',
     'prior stationary outline', 
     'updated stationary outline', 
     'bias (evolving − prior stationary)',
     'bias (evolving − updated stationary)'],
     handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
     loc='lower center')

legend = axs[3,idx].legend(
    [tuple(evolving_scatter_line),
     stationary_scatter_line,
     evolving_union_scatter_line,
     bias, bias2],
    ['evolving outlines',
     'prior stationary outline',
     'updated stationary outline', 
     'bias (evolving − prior stationary)',
     'bias (evolving − updated stationary)'], 
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='lower center')

# Set common font sizes and axis labels
for i in range(nrows):
    for j in range(ncols):
        # Set tick sizes for all plots
        axs[i,j].tick_params(axis='both')

        # Add subplot labels (a, b, c, etc.)
        axs[i,j].text(0.03, 0.97, chr(97 + i*ncols + j), transform=axs[i,j].transAxes, 
                      fontsize=22, va='top', ha='left')

        # Configure row-specific settings
        if i == 0:
            axs[i,j].set_xlabel('x [km]')
        if i == 3:
            axs[i,j].set_xlabel('year')
        if 0 < i < 4:
            axs[i,j].xaxis.set_major_formatter(year_interval_formatter())
            axs[i,j].xaxis.set_major_locator(mdates.YearLocator())
            axs[i,j].xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))
        if j == 0:  # Leftmost column labels
            y_labels = ['y [km]', 'lakebed active area [km$^2$]', 'cumulative $dh$ [m]', 'cumulative $dV$ [km$^3$]']
            axs[i,j].set_ylabel(y_labels[i])
        # Do not display redundant tick labels
        if 0 < i < 3:
            axs[i,j].set_xticklabels([])
        if i > 0:
            # Set x-axis limits
            axs[i,j].set(xlim=(cyc_dates['cyc_start_datetimes'].iloc[0],
               # Set righthand x-axis limit slightly earlier to prevent tick mark displaying when there is no data point
               (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2))))
            if j > 0:
                axs[i,j].tick_params(axis='y', which='both', labelleft=False)
            else:
                axs[i,j].tick_params(axis='y', which='both', labelleft=True)

# Clear output
clear_output()

# Save and close plot
plt.savefig(OUTPUT_DIR + '/figures/Fig2_lake_reexamination_results.jpg', dpi=300, bbox_inches='tight')

# Preview plot
plt.show()

In [None]:
plt.close('all')

## Fig. 3
NOTE: You must run 
* "Dissolved inorganic carbon (DIC) export estimates" section of code earlier in notebook
* cell at beginning of Figures section

for necessary plotting variables.

In [None]:
# Import stationary subglacial lake outlines
stationary_lakes_gdf = gpd.read_file(os.path.join(os.getcwd(), 'output/lake_outlines/stationary_outlines/stationary_outlines_gdf.geojson'))

# Create filtered geodataframes of lakes based on whether they have evolving outlines
folder_path = os.path.join ('output/lake_outlines/evolving_outlines')

# Lakes with evolving outlines (.geojson)
evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='geojson', exclude=False)

# For the evolving_outlines_lakes, we must add the special case of Site_B_Site_C that are now a combined lake
include_list = ['Site_B', 'Site_C']
included_rows = stationary_lakes_gdf[stationary_lakes_gdf['name'].isin(include_list)]
evolving_outlines_lakes = pd.concat([evolving_outlines_lakes, included_rows]).drop_duplicates()
print('lakes with evolving outlines:',len(evolving_outlines_lakes))

# Lakes with non-dynamic outlines (.txt)
no_evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='txt', exclude=False)
print('lakes without evolving outlines:',len(no_evolving_outlines_lakes))

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 11,
    'axes.labelsize': 14,
    'xtick.labelsize': 11,
    'ytick.labelsize': 11,
    'legend.fontsize':10,
})

In [None]:
# Read in data
# Read in continental summation geometric calculation csv files - evolving outlines (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

# Store dataframes from dfs list for code readability
superset_IS2_evolving_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - stationary outlines (all lakes)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

superset_IS2_stationary_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - stationary outlines (only at evolving lakes)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_evolving_lakes'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_stationary_subset_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - evolving union (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_evolving_union_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']


# Setup figure
nrows, ncols = 4, 1
fig, ax = plt.subplots(nrows, ncols, gridspec_kw={'height_ratios': [2.5, 2.5, 3, 3]}, sharex=True, figsize=(10, 12), constrained_layout=True)

# Store dates and time period for satellite coverage eras
time_span = mdates.date2num(cyc_dates['cyc_end_datetimes'].iloc[-1]) - mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
start_date = mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
SARIn_expand_date = mdates.date2num(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2013-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0])
CS2_IS2_tie_pt = mdates.date2num(cyc_dates[cyc_dates['dataset'] == 'ICESat2_ATL15'].iloc[0]['cyc_start_datetimes'])

for row in [0,1,2,3]:
    # Plot horizontal line at zero for reference
    ax[row].axhline(0, color='k', linewidth=0.5)
    # Plot vertical lines to indicate CS2 SARIn mode mask moving inland and ICESat-2 era start
    ax[row].axvline(SARIn_expand_date, color='dimgray', linestyle='solid', linewidth=0.75, ymin=-1, ymax=1, zorder=0)
    ax[row].axvline(CS2_IS2_tie_pt, color='dimgray', linestyle='solid', linewidth=0.75, ymin=-1, ymax=1, zorder=0)

# Pick colormap and make continuous cmap discrete for evolving outlines
cmap = plt.get_cmap('plasma', len(cyc_start_datetimes[1:]))
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                    mdates.date2num(cyc_start_datetimes[-1]))

# Use for loop to store each time step as line segment to use in legend
lines = []  # list of lines to be used for the legend
for dt_idx, dt in enumerate(cyc_dates['cyc_start_datetimes'][1:]):
    x = 1; y = 1
    line, = ax[0].plot(x, y, color=cmap(norm(mdates.date2num(cyc_dates['cyc_start_datetimes'][dt_idx]))))
    lines.append(line)

# Define colors and linestyles that will be reused and create lines for legend
stationary_outline_color  = 'turquoise'
stationary_outline_subset_color  = 'darkcyan'

# Panel - Lake active area ---------------------------------------------

# Plot evolving outlines time series (CryoSat-2 observation period) 
x = mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'])
y = subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_area (m^2)'] / 1e6
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(1)
line = ax[0].add_collection(lc)
scatter = ax[0].scatter(x, y, c=x, cmap=cmap, norm=norm, s=5)

# Plot evolving outlines time series (ICESat-2 era)
x = mdates.date2num(superset_IS2_evolving_sum_df['mid_pt_datetime'])
y = superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'] / 1e6
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(2)
line = ax[0].add_collection(lc)
scatter = ax[0].scatter(x, y, c=x, cmap=cmap, norm=norm, s=9)

# Plot prior stationary outlines at all lakes
ax[0].axhline(subset_CS2_IS2_SARInPreExpansion_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color=stationary_outline_color, linestyle='solid', linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span, zorder=1)
ax[0].axhline(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_area (m^2)'].iloc[-1] / 1e6, 
              color=stationary_outline_color, linestyle='solid', linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1, zorder=1)
ax[0].axhline(superset_IS2_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color=stationary_outline_color, linestyle='solid', linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1, zorder=1)

# Plot prior stationary outlines at lakes with evolving outlines (subset)
ax[0].axhline(subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color=stationary_outline_subset_color, linestyle='dashed', linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span, zorder=1)
ax[0].axhline(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[-1] / 1e6, 
              color=stationary_outline_subset_color, linestyle='dashed', linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1, zorder=1)
ax[0].axhline(superset_IS2_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color=stationary_outline_subset_color, linestyle='dashed', linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1, zorder=1)

# Plot updated stationary outlines
ax[0].axhline(subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color='k', linestyle='dotted', linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span, zorder=1)
ax[0].axhline(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[-1] / 1e6, 
              color='k', linestyle='dotted', linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1, zorder=1)
ax[0].axhline(superset_IS2_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color='k', linestyle='dotted', linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1, zorder=1)


# Carbon export

# Define conversion factor for grams
# g_conv = 1e6  # Megagrams
g_conv = 1e9  # Gigagrams

# Plot evolving outlines time series (CryoSat-2 observation period)
x = mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'])
y = subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_area (m^2)'] * SLM_evolving_per_area_per_step_DIC_export_filling_period / g_conv
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(1)
line = ax[1].add_collection(lc)
scatter = ax[1].scatter(x, y, c=x, cmap=cmap, norm=norm, s=5)

# Plot evolving outlines time series (ICESat-2 observation period)
x = mdates.date2num(superset_IS2_evolving_sum_df['mid_pt_datetime'])
y = superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'] * SLM_evolving_per_area_per_step_DIC_export_filling_period / g_conv 
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(2)
line = ax[1].add_collection(lc)
scatter = ax[1].scatter(x, y, c=x, cmap=cmap, norm=norm, s=9)

# Plot stationary outlines at all lakes
ax[1].axhline(subset_CS2_IS2_SARInPreExpansion_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv,
              color=stationary_outline_color, linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span)
ax[1].axhline(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_area (m^2)'].iloc[-1] * SLM_stationary_per_area_per_step_DIC_export / g_conv, 
              color=stationary_outline_color, linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1)
ax[1].axhline(superset_IS2_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv, 
              color=stationary_outline_color, linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1)

# Plot stationary outlines at lakes with evolving outlines (subset)
ax[1].axhline(subset_CS2_IS2_SARInPreExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv,
              color=stationary_outline_subset_color, linestyle='dashed', linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span)
ax[1].axhline(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[-1] * SLM_stationary_per_area_per_step_DIC_export / g_conv, 
              color=stationary_outline_subset_color, linestyle='dashed', linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1)
ax[1].axhline(superset_IS2_stationary_subset_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv, 
              color=stationary_outline_subset_color, linestyle='dashed', linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1)

# Plot evolving outlines union
ax[1].axhline(subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv,
              color='k', linestyle='dotted', linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span)
ax[1].axhline(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[-1] * SLM_stationary_per_area_per_step_DIC_export / g_conv,
              color='k', linestyle='dotted',linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1)
ax[1].axhline(superset_IS2_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv,
              color='k', linestyle='dotted', linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1)


# Panel - cumulative dV/dt --------------------------------------------------

# Plot dV time series of evolving outlines using LineCollection from points/segments to plot multi-colored line
x = mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'])
y = np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] / 1e9)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(1)
line = ax[2].add_collection(lc)
scatter = ax[2].scatter(x, y, c=x, cmap=cmap, norm=norm, s=8)

CS2_last_cyc_date = str(cyc_dates[cyc_dates['dataset'] == 'CryoSat2_SARIn']['cyc_end_datetimes'].iloc[-1])
cum_sum_last_CS2_midcyc_date = np.cumsum(np.divide(
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'], 1e9)).iloc[-1]

x = mdates.date2num(superset_IS2_evolving_sum_df['mid_pt_datetime'])
y = np.cumsum(superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(2)
line = ax[2].add_collection(lc)
scatter = ax[2].scatter(x, y, c=x, cmap=cmap, norm=norm, s=14)

# Plot dV time series of stationary outline of all lakes
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9), 
    color=stationary_outline_color, marker='o', markerfacecolor='white', markersize=3, linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_stationary_sum_df[subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)'] / 1e9).iloc[-1]
ax[2].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
np.cumsum(superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color=stationary_outline_color, marker='o', markerfacecolor='white', markersize=4, linewidth=2)

# Plot dV time series of stationary outline of lakes with evolving outlines (subset)
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9), 
    color=stationary_outline_subset_color, marker='o', markersize=3, markerfacecolor='white', linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df[subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)'] / 1e9).iloc[-1]
ax[2].plot(mdates.date2num(superset_IS2_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum(superset_IS2_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
    color=stationary_outline_subset_color, marker='o', markersize=4, markerfacecolor='white', linewidth=2)

# Plot dV time series of updated stationary outline
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
    color='k', marker='o', markersize=3, linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df[
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[2].plot(mdates.date2num(superset_IS2_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color='k', marker='o', markersize=4, linewidth=2)


# Panel - cumulative dV/dt bias --------------------------------------------------

# Plot bias (evolving - prior stationary (all lakes))
ax[3].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='red', marker='o', markerfacecolor='white', markersize=3, linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df[
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[3].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='red', marker='o', markerfacecolor='white', markersize=4, linewidth=2)


# Plot bias (evolving - prior stationary (subset at evolving lakes))
ax[3].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='lightcoral', marker='o', markersize=3, markerfacecolor='k', linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df[
        subset_CS2_IS2_SARInPostExpansion_stationary_subset_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[3].plot(mdates.date2num(superset_IS2_stationary_subset_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_stationary_subset_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='lightcoral', marker='o', markersize=4, markerfacecolor='k', linewidth=2)

# Plot bias (evolving - updated stationary)
ax[3].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='darkred', marker='o', markersize=3, linestyle='solid', linewidth=1)
cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]
ax[3].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='darkred', marker='o', markersize=4, linewidth=2)


# Set y axes limits
ax0_auto_ymin, ax0_auto_ymax = ax[0].get_ylim()
ax1_auto_ymin, ax1_auto_ymax = ax[1].get_ylim()
ax[0].set_ylim(-(ax0_auto_ymax-ax0_auto_ymin)*0.1, None) # Prescribe lower limit to accommodate text annotations of satellite eras
ax[1].set_ylim(-(ax1_auto_ymax-ax1_auto_ymin)*0.1, None)
ax[2].set_ylim(-8.5, 8.5)
ax[3].set_ylim(-8.5, 1)
del ax0_auto_ymin, ax0_auto_ymax, ax1_auto_ymin, ax1_auto_ymax

# Add colorbar, legends, and titles
ax[3].set_xlabel('year')

# Add text label near the vertical line
start_date_text = pd.to_datetime(cyc_dates['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
sarin_expand_date_text = pd.to_datetime(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2013-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
is2_start_date_text = pd.to_datetime(cyc_dates[cyc_dates['dataset'] == 'ICESat2_ATL15'].iloc[0]['cyc_start_datetimes']) + pd.Timedelta(days=15)

ax[0].text(start_date_text, -800, 'CryoSat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')
ax[0].text(sarin_expand_date_text, -800, 'CryoSat-2 SARIn mode expands', horizontalalignment='left', verticalalignment='top', color='k')
ax[0].text(is2_start_date_text, -800, 'ICESat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')

# Add legends
# Create legend handles
stationary_line = plt.Line2D([], [], color=stationary_outline_color, linestyle='solid', linewidth=2)
stationary_subset_line = plt.Line2D([], [], color=stationary_outline_subset_color, linestyle='dashed', linewidth=2)
# Full colormap for evolving outlines with one representative data point
mid_idx = len(cyc_dates['cyc_start_datetimes']) // 2
evolving_scatter_line = []
for i, dt in enumerate(cyc_dates['cyc_start_datetimes']):
    if i == mid_idx:  # only the middle line gets a marker
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid',
                   marker='o', markersize=5))
    else:
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid'))
evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)
stationary_scatter_line = plt.Line2D([], [], color=stationary_outline_color, marker='o', markersize=5, markerfacecolor='white', linewidth=2)
stationary_subset_scatter_line = plt.Line2D([], [], color=stationary_outline_subset_color, marker='o', markersize=5, markerfacecolor='white', linewidth=2)
evolving_union_scatter_line = plt.Line2D([], [], color='k', marker='o', markersize=5, markerfacecolor='k', linewidth=2)
bias_line = plt.Line2D([], [], color='red', marker='o', markersize=5, markerfacecolor='white', linewidth=2)  # evolving - prior stationary (all lakes)
bias_line2 = plt.Line2D([], [], color='lightcoral', marker='o', markersize=5, markerfacecolor='k', linewidth=2)  # evolving - prior stationary (only at evolving lakes)
bias_line3 = plt.Line2D([], [], color='darkred', marker='o', markersize=5, linewidth=2)  # evolving - updated stationary (only evolving lakes)

x0 = 0.03
legend0 = ax[0].legend([tuple(evolving_scatter_line),
                        stationary_line,
                        stationary_subset_line,
                        evolving_union_line],
    [f'evolving outlines (n={len(evolving_outlines_lakes)})',
     f'prior stationary outlines at all analyzed lakes (n={len(no_evolving_outlines_lakes) + len(evolving_outlines_lakes)})',
     f'prior stationary outlines at evolving lakes subset (n={len(evolving_outlines_lakes)})',
     f'updated stationary outlines (n={len(evolving_outlines_lakes)})'],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper left', bbox_to_anchor=(x0, 1))

legend2 = ax[2].legend([tuple(evolving_scatter_line),
                        stationary_scatter_line,
                        stationary_subset_scatter_line,
                        evolving_union_scatter_line],
    [f'evolving outlines (n={len(evolving_outlines_lakes)})',
     f'prior stationary outlines at all analyzed lakes (n={len(no_evolving_outlines_lakes) + len(evolving_outlines_lakes)})',
     f'prior stationary outlines at evolving lakes subset (n={len(evolving_outlines_lakes)})',
     f'updated stationary outlines (n={len(evolving_outlines_lakes)})'],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper left', bbox_to_anchor=(x0, 1))

legend3 = ax[3].legend([bias_line, bias_line2, bias_line3], 
                      [f'bias, evolving (n={len(evolving_outlines_lakes)}) − prior stationary at all analyzed lakes (n={len(no_evolving_outlines_lakes) + len(evolving_outlines_lakes)})',
                       f'bias, evolving (n={len(evolving_outlines_lakes)}) − prior stationary at evolving lakes subset (n={len(evolving_outlines_lakes)})',
                       f'bias, evolving (n={len(evolving_outlines_lakes)}) − updated stationary (n={len(evolving_outlines_lakes)})'
                      ],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper left', bbox_to_anchor=(x0, 0.27))

for row in [0,2]:
    # Remove x tick labels
    ax[row].set_xticklabels([])

    # Format the x-axis to display years only
    ax[row].xaxis.set_major_locator(mdates.YearLocator(base=1))  # Major ticks every other year
    ax[row].xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Minor ticks every quarter
    ax[row].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Display major ticks as years

    # Set x-axis limits
    ax[row].set(xlim=(cyc_dates['cyc_start_datetimes'].iloc[0],
        # Set righthand x-axis limit slightly earlier to prevent tick mark displaying when there is no data point
        (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2))))

# Set axes titles
ax[0].set_ylabel('lakebed active area [km$^2$]')
ax[1].set_ylabel('DIC export [Gg C]')
ax[2].set_ylabel('cumulative $dV$ [km$^3$]')
ax[3].set_ylabel('cumulative $dV$ bias [km$^3$]')

# Adding annotations at the top left of the subplot
ax_array = np.array(ax)  # Convert gridspec list of lists into numpy array to use .flatten() method
char_index = 97  # ASCII value for 'a'
for i, ax_i in enumerate(ax_array.flatten()):
    # `transform=ax.transAxes` makes coordinates relative to the axes (0,0 is bottom left and 1,1 is top right)
    ax_i.text(0.01, 0.97, chr(char_index), transform=ax_i.transAxes, fontsize=16, va='top', ha='left')
    char_index += 1 # Increment the ASCII index to get the next character
    
# Save and close plot
plt.savefig(OUTPUT_DIR + '/figures/Fig3_lake_reexamination_results_continental_integration.jpg',
    dpi=300, bbox_inches='tight')

# Preview plot
plt.show()

In [None]:
plt.close('all')

## Table S2

In [None]:
# Import stationary subglacial lake outlines
stationary_lakes_gdf = gpd.read_file(os.path.join(os.getcwd(), 'output/lake_outlines/stationary_outlines/stationary_outlines_gdf.geojson'))

# Create filtered geodataframes of lakes based on whether they have evolving outlines
folder_path = os.path.join ('output/lake_outlines/evolving_outlines')

# Lakes with evolving outlines (.geojson)
evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='geojson', exclude=False)

# Lakes with no evolving outlines (.txt)
no_evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='txt', exclude=False)

# For the evolving_outlines_lakes, we must add the special case of Site_B_Site_C that are now a combined lake
include_list = ['Site_B', 'Site_C']
included_rows = stationary_lakes_gdf[stationary_lakes_gdf['name'].isin(include_list)]
evolving_outlines_lakes = pd.concat([evolving_outlines_lakes, included_rows]).drop_duplicates()
print('lakes with evolving outlines:',len(evolving_outlines_lakes))
print('lakes with no evolving outlines:',len(no_evolving_outlines_lakes))

In [None]:
len(stationary_lakes_gdf[stationary_lakes_gdf['name'] != 'Crane_Glacier'])

In [None]:
summary = (
    stationary_lakes_gdf
    .loc[stationary_lakes_gdf['name'] != 'Crane_Glacier', 'CS2_SARIn_start']
    .value_counts(dropna=False)
)
print(summary)

In [None]:
summary = (
    evolving_outlines_lakes['CS2_SARIn_start']
    .value_counts(dropna=False)
)
print(summary)
print(np.sum(summary))

In [None]:
summary = (
    no_evolving_outlines_lakes['CS2_SARIn_start']
    .value_counts(dropna=False)
)
print(summary)
print(np.sum(summary))

## Fig. S2

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 9,
    'axes.labelsize': 12,
    'xtick.labelsize': 13,
    'ytick.labelsize': 13,
    'axes.titlesize': 14,
    'legend.fontsize': 12,
})

In [None]:
# Find the lakes that have evolving outlines during the date range of contemporaneous CryoSat-2-SARIn-ICESat-2 data availability
# by counting the number of geometric calculations CSV files that have evolving_outlines_area (m^2) data within 
directory = "output/geometric_calcs/evolving_outlines_geom_calc"
df_matches = find_csvs_with_values_in_range(directory)
df_matches.head()

In [None]:
# Now find the number of those lakes that have CryoSat-2 SARIn coverage 
# (filters out continental summation CSV files and those without SARIn coverage)

# Ensure CS2_SARIn_start column is treated as proper NA values, not strings
reexamined_stationary_outlines_gdf = reexamined_stationary_outlines_gdf.copy()
reexamined_stationary_outlines_gdf["CS2_SARIn_start"] = reexamined_stationary_outlines_gdf["CS2_SARIn_start"].replace("<NA>", pd.NA)

# Merge df_matches with the stationary outlines GeoDataFrame on "name"
merged = df_matches.merge(
    reexamined_stationary_outlines_gdf[["name", "CS2_SARIn_start"]],
    on="name",
    how="inner"
)
count_evolving_lakes_IS2_during_CS2_comparison = len(merged[merged["CS2_SARIn_start"].notna()])
print(count_evolving_lakes_IS2_during_CS2_comparison)

In [None]:
# Find the number of lakes that were found to have evolving outlines using only CryoSat-2 SARIn data
dir = 'output/geometric_calcs/evolving_outlines_geom_calc/CS2_comparison'
print(count_files_by_suffix(dir, ".csv"))
# Double check by looking at the number that were successfully merged with the geometric calculation data from 
# multi-mission time series
dir = 'output/geometric_calcs/evolving_outlines_geom_calc/CS2_comparison/merged'
count_evolving_lakes_CS2_IS2 = count_files_by_suffix(dir, ".csv")
print(count_evolving_lakes_CS2_IS2)

In [None]:
# Are there any lakes where CryoSat-2 found evolving outlines that were not found using multi-mission time series?
IS2_dir = 'output/geometric_calcs/evolving_outlines_geom_calc/'
CS2_dir = 'output/geometric_calcs/evolving_outlines_geom_calc/CS2_comparison/'

# Collect CSV filenames without extension
IS2_dir_files = {os.path.splitext(f)[0] for f in os.listdir(IS2_dir)
              if f.endswith(".csv") and "sum" not in f}  # Exclude continental summation files
CS2_dir_files = {os.path.splitext(f)[0] for f in os.listdir(CS2_dir) if f.endswith(".csv")}

evolving_lakes_CS2 = sorted(CS2_dir_files - IS2_dir_files)
count_evolving_lakes_CS2 = len(evolving_lakes_CS2)
print(count_evolving_lakes_CS2)
print("CSV files in dir2 but not in dir1:")
for f in evolving_lakes_CS2:
    print(f)

In [None]:
# Find files in dir2 but not in dir1
evolving_lakes_IS2 = sorted(IS2_dir_files - CS2_dir_files)
count_evolving_lakes_IS2 = len(evolving_lakes_IS2)
print(count_evolving_lakes_IS2)
print("CSV files in dir1 but not in dir2:")
for f in evolving_lakes_IS2:
    print(f)

In [None]:
# Define linear model for ODR
def linear_model(B, x):
    return B[0] * x + B[1]  # B[0]=slope, B[1]=intercept
    
# Base dir1 list
dir1_list = [
    "/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes",
    "/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_evolving_lakes",
    "/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes",
    "/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/evolving_outlines_geom_calc"
]

rename_map = {
    "area (m^2)": "lakebed active area [km$^2$]",
    "dh (m)": "uncorrected on-lake $dh$ [m]",
    "region_dh (m)": "secular off-lake $dh$ [m]",
    "dh_corr (m)": "corrected on-lake $dh$ [m]",
    "dV_corr (m^3)": "non-cumulative $dV$ [km$^3$]"
}

# Collect results per dataset
datasets = {}
for dir1 in dir1_list:
    merged_dir = os.path.join(dir1, "CS2_comparison", "merged")
    label = os.path.basename(dir1)  # last folder of dir1

    all_dfs = []
    if not os.path.exists(merged_dir):
        print(f"Skipping {merged_dir} (not found)")
        continue

    for fname in os.listdir(merged_dir):
        if fname.endswith(".csv"):
            df = pd.read_csv(os.path.join(merged_dir, fname), parse_dates=["date"])
            all_dfs.append(df)

    if not all_dfs:
        continue

    df_all = pd.concat(all_dfs, ignore_index=True)

    # --- Normalize column names ---
    rename_cols = {}
    for col in df_all.columns:
        if col.startswith("evolving_outlines_"):
            rename_cols[col] = col.replace("evolving_outlines_", "")
        elif col.startswith("stationary_outline_"):
            rename_cols[col] = col.replace("stationary_outline_", "")
    df_all = df_all.rename(columns=rename_cols)

    # Apply variable scaling after renaming
    for var, scale in {"area (m^2)": 1e6, "dV_corr (m^3)": 1e9}.items():
        if var in df_all.columns:
            df_all[var] = df_all[var] / scale
            if f"{var}_CS2" in df_all.columns:
                df_all[f"{var}_CS2"] = df_all[f"{var}_CS2"] / scale

    datasets[label] = df_all

# Identify variables to plot (including the new cumulative dV)
exclude_cols = {"mid_pt_datetime", "date", "name"}
sample_df = next(iter(datasets.values()))
vars_base = [c for c in sample_df.columns if c not in exclude_cols and not c.endswith("_CS2")]

# Create subplots grid
nvars = len(vars_base)
ncols = 2
nrows = (nvars + ncols - 1) // ncols
fig, axes = plt.subplots(nrows, ncols, figsize=(10, 5*nrows), constrained_layout=True)
axes = axes.flatten()

# Adjust padding and spacing *within* constrained layout
fig.set_constrained_layout_pads(h_pad=0.05)

# colors = ["turquoise", "darkcyan", "k"]
colors = ["turquoise", "darkcyan", "k", "purple"]
markers = ["o", "s", "D", "^"]
# style_map = {label: {"color": color, "marker": marker} for (label, _), color, marker in zip(datasets.items(), colors, markers)}
style_map = {key: {"color": color, "marker": marker} 
             for key, color, marker in zip(datasets.keys(), colors, markers)}

# Set up colormap matching evolving outlines
min_date = pd.to_datetime(cyc_start_datetimes[1])
max_date = pd.to_datetime(cyc_start_datetimes[-1])
n_dates = len(cyc_start_datetimes[1:])
date_range = pd.date_range(min_date, max_date, periods=len(cyc_start_datetimes[1:]))
years = date_range.year.unique()
years = pd.to_datetime(years, format='%Y')
cmap = plt.get_cmap('plasma', n_dates)
norm = plt.Normalize(mdates.date2num(min_date), mdates.date2num(max_date))
mappable = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
mappable.set_array(np.linspace(mdates.date2num(min_date), mdates.date2num(max_date), n_dates))

# --- Consistent evolving outlines color (midpoint of full dataset) ---
df_evolving = datasets["evolving_outlines_geom_calc"]
df_evolving["mid_pt_datetime"] = pd.to_datetime(df_evolving["mid_pt_datetime"], errors="coerce")
times_all = mdates.date2num(df_evolving["mid_pt_datetime"].dropna())
mid_time_global = 0.5 * (times_all.min() + times_all.max())
evolving_outline_color = cmap(norm(mid_time_global))

# Define regression line styles
regression_styles = {
    "stationary_outlines_at_all_lakes": "solid",
    "stationary_outlines_at_evolving_lakes": "dashed",
    "evolving_union_at_evolving_lakes": ":",
    "evolving_outlines_geom_calc": "solid"
}

# Prepare containers for legend handles/labels
legend_handles = []
legend_labels = []

# Scatter plotting loop
for i, var in enumerate(vars_base):
    ax = axes[i]

    all_x = pd.concat([df[var] for df in datasets.values() if var in df])
    all_y = pd.concat([df[f"{var}_CS2"] for df in datasets.values() if f"{var}_CS2" in df])
    if all_x.empty or all_y.empty:
        continue

    data_min = min(all_x.min(skipna=True), all_y.min(skipna=True))
    data_max = max(all_x.max(skipna=True), all_y.max(skipna=True))
    range_pad = 0.025 * (data_max - data_min) if data_max != data_min else 1
    lim_min = data_min - range_pad
    lim_max = data_max + range_pad
    ax.set_xlim(lim_min, lim_max)
    ax.set_ylim(lim_min, lim_max)

    for label, df in datasets.items():
        if var not in df.columns or f"{var}_CS2" not in df.columns:
            continue

        x = df[var]
        y = df[f"{var}_CS2"]
        mask_both = pd.notna(x) & pd.notna(y)

        # Scatter
        if label == "evolving_outlines_geom_calc":
            df["mid_pt_datetime"] = pd.to_datetime(df["mid_pt_datetime"], errors="coerce")
            times_num = mdates.date2num(df["mid_pt_datetime"][mask_both])
            scatter_color = cmap(norm(0.5 * (times_num.min() + times_num.max())))  # midpoint color
        else:
            scatter_color = style_map[label]["color"]
        
        ax.scatter(
            x[mask_both], y[mask_both],
            s=20,
            color=scatter_color if label != "evolving_outlines_geom_calc" else None,
            c=times_num if label == "evolving_outlines_geom_calc" else None,
            cmap=cmap if label == "evolving_outlines_geom_calc" else None,
            norm=norm if label == "evolving_outlines_geom_calc" else None,
            marker=style_map[label]["marker"],
            alpha=0.8,
            label=None  # Legend handle separately below
        )

        # Regression
        if mask_both.sum() > 1:
            model = odr.Model(linear_model)
            data_odr = odr.RealData(x[mask_both], y[mask_both])
            odr_obj = odr.ODR(data_odr, model, beta0=[1., 0.])
            out = odr_obj.run()
            slope, intercept = out.beta
            fit_x = np.linspace(lim_min, lim_max, 100)
            fit_y = slope * fit_x + intercept
            reg_color = evolving_outline_color if label == "evolving_outlines_geom_calc" else style_map[label]["color"]
            # reg_color = scatter_color if label == "evolving_outlines_geom_calc" else style_map[label]["color"]
            line = ax.plot(fit_x, fit_y,
                           color=reg_color,
                           linestyle=regression_styles[label],
                           linewidth=1.5)[0]

        # Y present only
        mask_y = pd.notna(y) & pd.isna(x)
        if mask_y.any():
            ax.scatter(np.full(mask_y.sum(), lim_min), y[mask_y],
                       marker="_", color="blue", label=f"evolving outline\nfound only using CryoSat-2 SARIn (n={mask_y.sum()})", alpha=0.8)
            
        # X present only
        mask_x = pd.notna(x) & pd.isna(y) 
        if mask_x.any():
            ax.scatter(x[mask_x], np.full(mask_x.sum(), lim_min),
                       marker="|", color="green", label=f"evolving outline\nfound only using ICESat-2 (n={mask_x.sum()})", alpha=0.8)

    # Only add colorbar in the first subplot
    if i == 0:
        # Inset axes for horizontal colorbar
        cax = inset_axes(
            ax,                          # parent axes
            width='67%',                 # width of colorbar
            height='3%',                 # height of colorbar
            loc='lower left',
            bbox_to_anchor=[0.31, 0.15, 1, 1],  # adjust as needed
            bbox_transform=ax.transAxes,
            borderpad=0
        )
        # Use ScalarMappable 'mappable' for the colorbar
        cbar = fig.colorbar(mappable, cax=cax, orientation='horizontal')
        cbar.set_label('evolving outline year', fontsize=12, labelpad=4)

        # Set ticks for all years but labels only for even years, skipping first year, 2010.0, as it starts before time series
        tick_locations = [mdates.date2num(date) for date in years[1:]]
        tick_labels = [f"'{date.strftime('%y')}" if date.year % 2 == 0 else '' for date in years[1:]]
        cbar.set_ticks(tick_locations)
        cbar.set_ticklabels(tick_labels)
        cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Add minor ticks for quarters

        # Add a box highlighting the data range
        data_min_num = mdates.date2num(df["mid_pt_datetime"].min())
        data_max_num = mdates.date2num(df["mid_pt_datetime"].max())
        rect = Rectangle(
            (data_min_num, 0),             # lower-left corner (x, y)
            data_max_num - data_min_num,   # width
            0.95,                          # height of colorbar (0–1 in colorbar coordinates)
            edgecolor='black',             # outline color
            facecolor='none',              # transparent fill
            linewidth=1.5
        )
        cax.add_patch(rect)
            
    ax.plot([lim_min, lim_max], [lim_min, lim_max], "k", lw=0.75, alpha=0.75, zorder=0)
    ax.set_xlabel("ICESat-2 ATL15")
    ax.set_ylabel("CryoSat-2 SARIn")
    ax.set_title(rename_map.get(var, var))

# Add subplot labels (a, b, c, etc.) for flattened axes
for i, ax in enumerate(axes[:-1]):
    label = chr(97 + i)  # 97 = 'a'
    ax.text(
        0.02, 0.98, f"{label}",
        transform=ax.transAxes,
        fontsize=14,
        va='top', ha='left'
    )

# Build combined legend handles (marker + regression line together)
combo_handles = []
combo_labels = []

for label, df in datasets.items():
    x_cols = [c for c in df.columns if not c.endswith("_CS2")]
    y_cols = [f"{c}_CS2" for c in x_cols]

    mask_x_y_total = pd.Series(False, index=df.index)
    mask_x_total = pd.Series(False, index=df.index)
    mask_y_total = pd.Series(False, index=df.index)

    for x_col, y_col in zip(x_cols, y_cols):
        if x_col in df.columns and y_col in df.columns:
            x = df[x_col]
            y = df[y_col]
            mask_x_y_total |= pd.notna(x) & pd.notna(y)
            mask_x_total |= pd.notna(x) & pd.isna(y)
            mask_y_total |= pd.notna(y) & pd.isna(x)

    if label == "evolving_outlines_geom_calc":
        evolving_outlines_CS2_IS2 = mask_x_y_total.sum()
        evolving_outlines_CS2 = mask_y_total.sum()
        evolving_outlines_IS2 = mask_x_total.sum()

        print(f'ICESat-2 evolving lakes found 2019.0-2021.5, n={count_evolving_lakes_IS2_during_CS2_comparison}')
        print(f'CryoSat-2 and ICESat-2 evolving lakes found, n={count_evolving_lakes_CS2_IS2}')
        print(f'CryoSat-2-only evolving lakes found, n={count_evolving_lakes_CS2}\n')
   
        print(f'CryoSat-2 and ICESat-2 evolving outlines found, n={mask_x_y_total.sum()}')
        print(f'CryoSat-2-only evolving outlines found, n={mask_y_total.sum()}')
        print(f'ICESat-2-only evolving outlines found, n={mask_x_total.sum()}')

dir = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/CS2_comparison/merged'
stationary_outlines_at_all_lakes_count = count_files_by_suffix(dir, ".csv")
dir = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_evolving_lakes/CS2_comparison/merged'
stationary_outlines_at_evolving_lakes_count = count_files_by_suffix(dir, ".csv")
dir = 'output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes/CS2_comparison/merged'
evolving_union_at_evolving_lakes_count = count_files_by_suffix(dir, ".csv")
dir = 'output/geometric_calcs/evolving_outlines_geom_calc/CS2_comparison/merged'
evolving_outlines_geom_calc_count = count_files_by_suffix(dir, ".csv")

label_map = {
    "stationary_outlines_at_all_lakes": 
        f"prior stationary outlines\nat analyzed lakes (n={stationary_outlines_at_all_lakes_count} lakes)",
    "stationary_outlines_at_evolving_lakes": 
        f"prior stationary outlines\nat evolving lakes subset (n={stationary_outlines_at_evolving_lakes_count} lakes)",
    "evolving_union_at_evolving_lakes": 
        f"updated stationary outlines\nat evolving lakes subset (n={evolving_union_at_evolving_lakes_count} lakes)",
    "evolving_outlines_geom_calc": 
        f"evolving outlines found in both\nCryoSat-2 and ICESat-2 data\n(n={evolving_outlines_geom_calc_count} lakes; n={mask_x_y_total.sum()} time steps)"
}

for label in datasets.keys():
    if label == "evolving_outlines_geom_calc":
        color = evolving_outline_color
    else:
        color = style_map[label]["color"]

    # Marker handle
    marker_handle = Line2D([0], [0], marker=style_map[label]["marker"],
                           color=color, markersize=7, alpha=0.8, linestyle="None")
    # Regression line handle
    line_handle = Line2D([0], [0], color=color,
                         linestyle=regression_styles[label], linewidth=1.5)

    # Combine marker + line
    combo_handles.append((marker_handle, line_handle))
    combo_labels.append(label_map[label])

# Add CryoSat-only and ICESat-only markers too
cryosat_only_handle = Line2D([0], [0], marker="_", color="blue", markersize=7, alpha=0.8, linestyle="None")
icesat_only_handle = Line2D([0], [0], marker="|", color="green", markersize=7, alpha=0.8, linestyle="None")

combo_handles.extend([cryosat_only_handle, icesat_only_handle])
combo_labels.extend([
    f"evolving outline found only in\nCryoSat-2 data\n(n={count_evolving_lakes_CS2} lakes; n={evolving_outlines_CS2} time steps)",
    f"evolving outline found only in\nICESat-2 data\n(n={count_evolving_lakes_IS2_during_CS2_comparison} lakes; n={evolving_outlines_IS2} time steps)"
])

# Place legend in last axis
legend_ax = axes[-1]
legend_ax.axis("off")

legend_ax.legend(combo_handles, combo_labels, loc="center", frameon=False, fontsize=13,
                 handler_map={tuple: HandlerTuple(ndivide=None)})

# Save and show plot
plt.savefig(OUTPUT_DIR + '/figures/FigS2_CryoSat2_ICESat2_compare.jpg',
    dpi=300, bbox_inches='tight')

plt.show()

In [None]:
plt.close('all')

## Fig. S3

In [None]:
# Import subglacial lake outlines
reexamined_stationary_outlines_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/reexamined_stationary_outlines_gdf.geojson')
evolving_outlines_union_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/evolving_outlines_union_gdf.geojson')

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 8,
    'axes.labelsize': 10,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 8,
})

In [None]:
# Add hydropotential field and colorbar

# Select lakes to be included in plot
selected_lakes = reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['name'].isin(['Institute_E1', 'Mac2', 'Site_BC'])]
desired_order = ['Institute_E1', 'Mac2', 'Site_BC']
stationary_outlines_gdf_filtered = gpd.GeoDataFrame(pd.concat([selected_lakes[selected_lakes['name'] == name] for name in desired_order]))

# Mapping for prettier lake names
lake_label_map = {
    'Institute_E1': r'Institute$_{\text{E1}}$',
    'Mac2': r'Mac$_{\text{2}}$',
    'Site_BC': r'Site$_{\text{BC}}$'
}

# Create a grid of plots
nrows, ncols = 3, 2
# fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(8.5, 11), constrained_layout=False)
fig = plt.figure(figsize=(8.5, 11))
gs = fig.add_gridspec(
    nrows=nrows, ncols=ncols,
    width_ratios=[1, 1],   # equal column widths
    height_ratios=[1, 1, 1],
    wspace=-0.2,           # tighter horizontal gap
    hspace=0.1
)
axs = gs.subplots()

# Define colors and linestyles for legend
stationary_outline_color = 'darkturquoise'
stationary_line = plt.Line2D([], [], color=stationary_outline_color, linestyle='solid', linewidth=2)
evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)

# Set up colormap for temporal evolution
cmap = plt.get_cmap('plasma', len(cyc_start_datetimes))
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[0]), 
                    mdates.date2num(cyc_end_datetimes[-1]))

# --- Load subglacial hydropotential dataset ---
hydropot_path = DATA_DIR + "/subglacial_hydropotential_Antarctica.nc"
hydropot_da = xr.open_dataset(hydropot_path)["subglacial_hydropotential"]
hydropot_arr = hydropot_da.squeeze().values

# Compute global bounds across all subsets
global_hydro_vmin = np.nanmin(hydropot_arr)
global_hydro_vmax = np.nanmax(hydropot_arr)
print(f"Global hydropotential range: {global_hydro_vmin:.3f} – {global_hydro_vmax:.3f}")

# Initialize lists for min/max
hydro_subset_mins, hydro_subset_maxs = [], []

for lake_name in desired_order:
    lake_gdf = stationary_outlines_gdf_filtered[
        stationary_outlines_gdf_filtered['name'] == lake_name
    ]
    evolving_outlines_path = os.path.join(
        os.getcwd(),
        f'output/lake_outlines/evolving_outlines/{lake_name}.geojson'
    )
    try:
        evolving_outlines_gdf = gpd.read_file(evolving_outlines_path)
    except fiona.errors.DriverError:
        continue

    # Union of lake geometries to define spatial subset
    evolving_stationary_union_gdf = gpd.GeoDataFrame(
        geometry=[lake_gdf.geometry.iloc[0].union(evolving_outlines_gdf.geometry.union_all())],
        crs=lake_gdf.crs
    )
    x_min, y_min, x_max, y_max = evolving_stationary_union_gdf.bounds.iloc[0]

    if lake_name == 'Institute_E1':
        # Smaller buffer to zoom in to this plot because outlines won't be blocked by inset map 
        buffer_frac = 0.2
    elif lake_name == 'Mac2':
        # Smaller buffer to zoom in to this plot because outlines won't be blocked by inset map 
        # and will crop out more of high hydropotential ridge to see hydropotential gradient better
        buffer_frac = 0.15
    else:
        buffer_frac = 0.35

    # Make square bounds + buffer
    x_mid = (x_min + x_max) / 2
    y_mid = (y_min + y_max) / 2
    max_span = max(x_max - x_min, y_max - y_min)
    x_min = x_mid - max_span / 2
    x_max = x_mid + max_span / 2
    y_min = y_mid - max_span / 2
    y_max = y_mid + max_span / 2
    x_buffer = abs(x_max - x_min) * buffer_frac
    y_buffer = abs(y_max - y_min) * buffer_frac

    # Subset hydropotential to same buffered area
    hydropot_subset = hydropot_da.sel(
        x=slice(x_min - x_buffer, x_max + x_buffer),
        y=slice(y_max + y_buffer, y_min - y_buffer)
    )
    arr = hydropot_subset.squeeze().values

    # Skip entirely masked or empty arrays
    if np.all(~np.isfinite(arr)):
        continue

    hydro_subset_mins.append(np.nanpercentile(arr, 5))
    hydro_subset_maxs.append(np.nanpercentile(arr, 95))

# Compute global bounds across all subsets
subset_hydro_vmin = np.nanmin(hydro_subset_mins)
subset_hydro_vmax = np.nanmax(hydro_subset_maxs)
print(f"Geographic subset hydropotential range: {subset_hydro_vmin:.2f} – {subset_hydro_vmax:.2f}")

# Load modeled subglacial water flux raster
flux_path = DATA_DIR + "/SubglacialWaterFlux_Modelled_1km.tif"

# Use rioxarray to open modeled subglacial water flux raster
flux_da = rioxarray.open_rasterio(flux_path, masked=True)
flux_arr = flux_da.squeeze().values

# Compute global bounds across all subsets
global_flux_vmin = np.nanmin(flux_arr)
global_flux_vmax = np.nanmax(flux_arr)
print(f"Global flux range: {global_flux_vmin:.3f} – {global_flux_vmax:.3f}")

# Initialize lists for min/max
subset_mins, subset_maxs = [], []

for lake_name in desired_order:
    lake_gdf = stationary_outlines_gdf_filtered[
        stationary_outlines_gdf_filtered['name'] == lake_name
    ]
    evolving_outlines_path = os.path.join(
        os.getcwd(),
        f'output/lake_outlines/evolving_outlines/{lake_name}.geojson'
    )
    try:
        evolving_outlines_gdf = gpd.read_file(evolving_outlines_path)
    except fiona.errors.DriverError:
        continue

    # Union to get bounds of all relevant geometries
    evolving_stationary_union_gdf = gpd.GeoDataFrame(
        geometry=[lake_gdf.geometry.iloc[0].union(evolving_outlines_gdf.geometry.union_all())],
        crs=lake_gdf.crs
    )
    x_min, y_min, x_max, y_max = evolving_stationary_union_gdf.bounds.iloc[0]

    if lake_name == 'Institute_E1':
        buffer_frac = 0.2
    elif lake_name == 'Mac2':
        buffer_frac = 0.15
    else:
        buffer_frac = 0.35

    # Make square bounds + buffer (same logic as your plot)
    x_mid = (x_min + x_max) / 2
    y_mid = (y_min + y_max) / 2
    max_span = max(x_max - x_min, y_max - y_min)
    x_min = x_mid - max_span / 2
    x_max = x_mid + max_span / 2
    y_min = y_mid - max_span / 2
    y_max = y_mid + max_span / 2
    x_buffer = abs(x_max - x_min) * buffer_frac
    y_buffer = abs(y_max - y_min) * buffer_frac

    # Subset flux raster to lake extent
    flux_subset = flux_da.sel(
        x=slice(x_min - x_buffer, x_max + x_buffer),
        y=slice(y_max + y_buffer, y_min - y_buffer)
    )

    arr = flux_subset.squeeze().values
    subset_mins.append(np.nanmin(arr))
    subset_maxs.append(np.nanmax(arr))

# Compute global bounds across all subsets
subset_flux_vmin = np.nanmin(subset_mins)
subset_flux_vmax = np.nanmax(subset_maxs)
print(f"Geographic subset flux range: {subset_flux_vmin:.3f} – {subset_flux_vmax:.3f}")

# Manually set vmax to show lower flux pathways at Site_BC
flux_vmin_selected = np.nanpercentile(flux_arr, 75)
flux_vmax_selected = 10

for row in range(1, nrows):
    # Share y-axis within each row but not between rows
    for col in range(ncols):
        axs[row, col].sharey(axs[row, 0])

subset_hydro_vmins, subset_hydro_vmaxs = [], []

# Process each lake sequentially
for row, lake_name in enumerate(desired_order):
    print('working on {}'.format(lake_name))

    # Get the lake data for the current lake
    lake_gdf = stationary_outlines_gdf_filtered[stationary_outlines_gdf_filtered['name'] == lake_name]
    stationary_outline = lake_gdf['geometry']
    
    # Load evolving outlines
    try:
        evolving_outlines_gdf = gpd.read_file(os.path.join(
            os.getcwd(), 'output/lake_outlines/evolving_outlines/{}.geojson'.format(lake_name)))
    except fiona.errors.DriverError:
        print(f"File for {lake_name} not found. Skipping...")
        continue
    
    # Find evolving and stationary outlines union for plotting extent
    lake_union_gdf = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == lake_name]
    x_min, y_min, x_max, y_max = lake_union_gdf.bounds.iloc[0]
    
    # Make plots uniform size and square
    x_mid = (x_min + x_max) / 2
    y_mid = (y_min + y_max) / 2
    x_span = x_max - x_min
    y_span = y_max - y_min
    max_span = max(x_span, y_span)
    
    # Update bounds to ensure square dimensions
    x_min = x_mid - max_span / 2
    x_max = x_mid + max_span / 2
    y_min = y_mid - max_span / 2
    y_max = y_mid + max_span / 2
    
    # Add buffer around the plot
    if lake_name == 'Institute_E1':
        buffer_frac = 0.2
    elif lake_name == 'Mac2':
        buffer_frac = 0.15  
    else:
        buffer_frac = 0.35
    x_buffer = abs(x_max-x_min) * buffer_frac
    y_buffer = abs(y_max-y_min) * buffer_frac
    
    # Create empty lists to store centroid coordinates
    centroids_x = []
    centroids_y = []
    centroid_dates = []
    
    # Plot both outline and centroid views
    for col in [0, 1]:
        # Plot MOA surface imagery for both columns
        mask_x = (moa_highres_da.x >= x_min-x_buffer) & (moa_highres_da.x <= x_max+x_buffer)
        mask_y = (moa_highres_da.y >= y_min-y_buffer) & (moa_highres_da.y <= y_max+y_buffer)
        moa_highres_da_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
        axs[row, col].imshow(moa_highres_da_subset[0,:,:], cmap='gray', clim=[14000, 17000], 
                             extent=[x_min-x_buffer, x_max+x_buffer, y_min-y_buffer, y_max+y_buffer], zorder=0)


        # Plot grounding line
        Scripps_landice.boundary.plot(ax=axs[row, col], color='k', linewidth=1)#, zorder=3)
        Scripps_landice.boundary.plot(ax=axs[row, col], color='k', linewidth=1)#, zorder=3)

        
        # --- Plot subglacial hydropotential layer (between MOA and flux) ---
        # Subset hydropotential to same extent as MOA subset
        hydropot_subset = hydropot_da.sel(
            x=slice(x_min - x_buffer, x_max + x_buffer),
            y=slice(y_max + y_buffer, y_min - y_buffer)
        )
        
        # Convert to array for plotting
        hydropot_arr = hydropot_subset.squeeze().values
        
        # Mask invalid or extreme values
        hydropot_arr = np.where(np.isfinite(hydropot_arr), hydropot_arr, np.nan)
        
        # Choose a subtle colormap (to keep flux visible)
        cmap_hydro = plt.cm.cividis
        vmin_hydro = np.nanmin(hydropot_arr)
        vmax_hydro = np.nanmax(hydropot_arr)

        if col==0:
            subset_hydro_vmins.append(vmin_hydro)
            subset_hydro_vmaxs.append(vmax_hydro)

        # Plot hydropotential (below flux, above MOA)
        im_hydro = axs[row, col].imshow(
            hydropot_arr,
            cmap=cmap_hydro,
            vmin=vmin_hydro,
            vmax=vmax_hydro,
            extent=[x_min - x_buffer, x_max + x_buffer, y_min - y_buffer, y_max + y_buffer],
            alpha=0.6,
            zorder=0.5   # between MOA (z=0) and flux (zorder=1)
        )

        # Clip modeled subglacial water flux raster to extent
        flux_subset = flux_da.sel(
            x=slice(x_min-x_buffer, x_max+x_buffer),
            y=slice(y_max+y_buffer, y_min-y_buffer)
        )
        
        # Get your flux data as a numpy array
        flux_arr = flux_subset.squeeze().values
        
        # Define scaling range for alpha
        vmin, vmax = flux_vmin_selected, flux_vmax_selected
        
        # Normalize flux to [0,1]
        normed = (flux_arr - vmin) / (vmax - vmin)
        normed = np.clip(normed, 0, 1)
        
        # Get colormap (this returns RGBA with fixed alpha=1)
        cmap_flux = plt.cm.Blues
        rgba = cmap_flux(normed)
        
        # Replace alpha channel with normalized flux values
        rgba[..., -1] = normed**0.5 # zero -> fully transparent, large -> opaque
        
        # Transparent overlay of modeled subglacial water flux raster
        im_flux = axs[row, col].imshow(
            rgba,
            extent=[x_min - x_buffer, x_max + x_buffer, y_min - y_buffer, y_max + y_buffer],
            zorder=1
        )

        # Plot stationary outline in both columns
        if lake_name == 'Site_BC':
            # Plot both Site_B and Site_C prior stationary outlines
            for site in ['Site_B', 'Site_C']:
                stationary_outlines_gdf[stationary_outlines_gdf['name'] == site]['geometry'].boundary.plot(
                    ax=axs[row, col], color=stationary_outline_color, linewidth=2)

                # Calculate centroid
                centroid = stationary_outlines_gdf[stationary_outlines_gdf['name'] == site]['geometry'].iloc[0].centroid
        
                # Plot centroids
                axs[row, col].scatter(centroid.x, centroid.y, 
                                   c=stationary_outline_color, marker='.', s=50, linewidth=1, zorder=2)

            # Plot both Site_BC updated stationary outlines
            evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == lake_name]['geometry'].boundary.plot(
                ax=axs[row, col], color='k', linestyle='dotted', linewidth=2, zorder=2)
            

        else:
            # Original code for other lakes
            stationary_outlines_gdf[stationary_outlines_gdf['name'] == lake_name]['geometry'].boundary.plot(
                ax=axs[row, col], color=stationary_outline_color, linewidth=2)

            evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == lake_name]['geometry'].boundary.plot(
                ax=axs[row, col], color='k', linestyle='dotted', linewidth=2, zorder=2)
    
    # Plot evolving outlines with colors based on date (left column only)
    lines = []
    for dt_idx, dt in enumerate(cyc_start_datetimes[1:]):
        # Create line for legend
        x, y = 1, 1
        line, = axs[row, 0].plot(x, y, color=cmap(norm(mdates.date2num(cyc_start_datetimes[dt_idx]))))
        lines.append(line)
        
        # Plot evolving outlines for this time step
        evolving_outlines_gdf_dt_sub = evolving_outlines_gdf[evolving_outlines_gdf['mid_pt_datetime'] == dt]
        if not evolving_outlines_gdf_dt_sub.empty:
            # Plot outline in left column
            evolving_outlines_gdf_dt_sub.boundary.plot(
                ax=axs[row, 0], 
                color=cmap(norm(mdates.date2num(cyc_start_datetimes[dt_idx]))), 
                linewidth=1, zorder=1
            )
            
            # Calculate and store centroid
            centroid = evolving_outlines_gdf_dt_sub.geometry.iloc[0].centroid
            centroids_x.append(centroid.x)
            centroids_y.append(centroid.y)
            centroid_dates.append(dt)
    
    # Plot centroids in right column
    axs[row, 1].scatter(centroids_x, centroids_y, 
                       c=[cmap(norm(mdates.date2num(dt))) for dt in centroid_dates],
                       marker='+', s=100, linewidth=1, zorder=2)
    
    # Set the same limits and formatting for both plots
    for col in [0, 1]:
        # Format axis ticks to show kilometers
        km_scale = 1e3
        ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
        ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
        axs[row, col].xaxis.set_major_formatter(ticks_x)
        axs[row, col].yaxis.set_major_formatter(ticks_y)
        
        # Set axes limits
        axs[row, col].set(xlim=(x_min-x_buffer, x_max+x_buffer), 
                         ylim=(y_min-y_buffer, y_max+y_buffer))
        
        # Remove y-tick labels for right column only
        if col == 1:
            plt.setp(axs[row, col].get_yticklabels(), visible=False)
        
        axs[2, col].set_xlabel('x [km]')        
        axs[row, 0].set_ylabel('y [km]')

        # axs[row, 1].set_ylabel(f'{lake_name}')
        axs[row, 1].set_ylabel(lake_label_map.get(lake_name, lake_name), fontsize=12)
        axs[row, 1].yaxis.set_label_position('right')

    # Create and style inset map (only for left column)
    axIns = axs[row, 0].inset_axes([0.01, -0.01, 0.3, 0.3])
    axIns.set_aspect('equal')
    moa_2014_coastline.plot(ax=axIns, color='gray', edgecolor='k', linewidth=0.1, zorder=3)
    moa_2014_groundingline.plot(ax=axIns, color='ghostwhite', edgecolor='k', linewidth=0.1, zorder=3)
    axIns.axis('off')
    
    # Add location marker to inset map
    axIns.scatter(((x_max+x_min)/2), ((y_max+y_min)/2), marker='*', 
                 linewidth=1, color='k', s=15, zorder=3)

for i in range(axs.shape[0]):
    for j in range(axs.shape[1]):
        # Add subplot labels (a, b, c, etc.)
        axs[i,j].text(0.02, 0.98, chr(97 + i*2 + j), transform=axs[i,j].transAxes, 
                     fontsize=14, va='top', ha='left')

# Set up colormap
min_date = pd.to_datetime(cyc_start_datetimes[0])
max_date = pd.to_datetime(cyc_end_datetimes[-1])
date_range = pd.date_range(min_date, max_date, periods=len(cyc_start_datetimes[1:]))
years = date_range.year.unique()
years = pd.to_datetime(years, format='%Y')
n_dates = len(cyc_start_datetimes[1:])
cmap = plt.get_cmap('plasma', n_dates)
norm = plt.Normalize(mdates.date2num(min_date), mdates.date2num(max_date))
m = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
m.set_array(np.linspace(mdates.date2num(min_date), mdates.date2num(max_date), n_dates))

legend = axs[0,0].legend([tuple(lines), stationary_line, evolving_union_line], 
    ['evolving outlines', 'prior stationary outline', 'updated stationary outline'],
    handlelength=2.5, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper right')

# Add combined legend for evolving outline centroids in [0,1]
ax_legend = axs[0, 1]
# Choose start, middle, and end colors from the plasma colormap
colors = [cmap(0.25), cmap(0.5), cmap(0.75)]
# Create three + markers with different colors
markers = [
    plt.Line2D([], [], color=c, marker='+', linestyle='None', markersize=7, linewidth=1)
    for c in colors
]
# Combine them into one legend entry
combined_handle = tuple(markers)

# Add legend with one label
ax_legend.legend(
    [combined_handle],
    ['evolving outline centroids'],
    loc='lower center',
    handlelength=2.5,
    handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    frameon=True,
    fontsize=8,
    title_fontsize=9,
    borderpad=0.4,
)


lake_point = plt.Line2D([], [], color=stationary_outline_color, ls='none', marker=".")
grounding_line = plt.Line2D([], [], color='k', lw=0.75)
prior_stationary_outline = plt.Line2D([], [], 
    color=stationary_outline_color, marker='o', ls='none',
    mfc='none', mec=stationary_outline_color, mew=1.5)

# Create legend
legend = axs[2,0].legend(
    [lake_point, prior_stationary_outline, grounding_line],
    ['lake point', 'prior stationary outline estimate', 'grounding line'],
    handlelength=2.5,
    handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper right'
)


# Loop through each row in the first column and add its own colorbar
for row in range(nrows):
    # Skip if hydropotential not plotted (e.g., empty)
    if 'subset_hydro_vmins' not in locals() or 'subset_hydro_vmaxs' not in locals():
        continue

    # Retrieve per-row vmin/vmax computed during plotting
    sm_hydro = ScalarMappable(
        norm=Normalize(vmin=subset_hydro_vmins[row]/1e3, vmax=subset_hydro_vmaxs[row]/1e3),
        cmap=cmap_hydro
    )
    sm_hydro.set_array([])

    # Create small horizontal inset colorbar inside the subplot
    cax_hydro = inset_axes(
        axs[row, 0],
        width='67%',
        height='3%',
        loc='lower left',
        bbox_to_anchor=[0.3, 0.15, 1, 1],
        bbox_transform=axs[row, 0].transAxes,
        borderpad=0
    )

    cbar_hydro = fig.colorbar(
        sm_hydro,
        cax=cax_hydro,
        orientation='horizontal',
        extend='both'
    )

    # Make tick labels white and show only whole numbers
    cbar_hydro.ax.xaxis.set_major_locator(ticker.MaxNLocator(integer=True))
    cbar_hydro.ax.tick_params(
        labelsize=7,
        direction='out',
        length=2,
        colors='white'  # ticks and tick labels
    )

    # Set white color for colorbar label
    cbar_hydro.set_label(
        "subglacial hydropotential (MPa)",
        labelpad=4,
        fontsize=8,
        color='white'
    )

    # Set white color for the colorbar outline (spine)
    for spine in cbar_hydro.ax.spines.values():
        spine.set_color('white')

# Create colorbar for flux using the same global normalization and cmap
sm = ScalarMappable(norm=Normalize(vmin=flux_vmin_selected, vmax=flux_vmax_selected), cmap=plt.cm.Blues)
sm.set_array([])
# Add colorbar at the top (horizontal)
cax_flux = fig.add_axes([0.185, 0.94, 0.655, 0.01])
cbar_flux = plt.colorbar(sm, cax=cax_flux, orientation='horizontal', extend='both')
cbar_flux.ax.xaxis.set_label_position('top')
cbar_flux.ax.xaxis.set_ticks_position('top')
cbar_flux.ax.xaxis.tick_top()
# # cbar_flux.ax.tick_params(bottom=False, labelbottom=False)
cbar_flux.set_label("modeled subglacial water flux (m$^3$ s$^{-1}$)", labelpad=5)

# Add colorbar 
cax = fig.add_axes([0.185, 0.1, 0.655, 0.01]) # [left, bottom, width, height] # cbar = fig.colorbar(m, cax=cax, orientation='horizontal') 
cbar = fig.colorbar(m, cax=cax, orientation='horizontal')
cbar.set_label('evolving outline/centroid year', size=11, labelpad=5) 
# Set ticks for all years but labels only for even years, skipping first year, 2010.0, as it starts before time series
tick_locations = [mdates.date2num(date) for date in years[1:]]
tick_labels = [f"'{date.strftime('%Y')}" if date.year % 2 == 0 else '' for date in years[1:]]
cbar.set_ticks(tick_locations)
cbar.set_ticklabels(tick_labels)
cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Add minor ticks for quarters

# Adjust the layout to make room for the colorbar
plt.subplots_adjust(
    top=0.93,       # Reduce top margin (default is usually 0.9)
    bottom=0.16,    # Increase bottom margin for colorbar (up from 0.1)
)

# Save and close plot
plt.savefig(OUTPUT_DIR + '/figures/FigS3_lake_migration.jpg',
    dpi=300, bbox_inches='tight')

# Preview plot
plt.show()

In [None]:
plt.close('all')

In [None]:
# View citation for each of plotted lakes for figure caption
selected_lakes

## Fig. S4

In [None]:
# Import reexamined stationary subglacial lake outlines
reexamined_stationary_outlines_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/reexamined_stationary_outlines_gdf.geojson')

# Create filtered geodataframes of lakes based on whether they have evolving outlines
folder_path = os.path.join ('output/lake_outlines/evolving_outlines')

# Lakes with evolving outlines (.geojson)
evolving_outlines_lakes = filter_gdf_by_folder_contents(reexamined_stationary_outlines_gdf, folder_path, file_extension='geojson', exclude=False)

# For the evolving_outlines_lakes, we must add the special case of Site_B_Site_C that are now a combined lake
# include_list = ['Site_B', 'Site_C']
# included_rows = reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['name'].isin(include_list)]
# evolving_outlines_lakes = pd.concat([evolving_outlines_lakes, included_rows]).drop_duplicates()
print('lakes with evolving outlines:',len(evolving_outlines_lakes))

# Lakes with non-dynamic outlines (.txt)
no_evolving_outlines_lakes = filter_gdf_by_folder_contents(reexamined_stationary_outlines_gdf, folder_path, file_extension='txt', exclude=False)
print('lakes without evolving outlines:',len(no_evolving_outlines_lakes))

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 10,
    'axes.labelsize': 12,
    'xtick.labelsize': 11,
    'ytick.labelsize': 11,
    'legend.fontsize': 10,
})

In [None]:
# Read in data
def process_lake_data(directory, is_evolving=True):
    dfs = {}
    for file in os.listdir(directory):
        # Exclude continental summation files
        if "subset" in file or "superset" in file:
            continue
        # Ignore files that are not csv's
        if not file.endswith('.csv'):
            continue

        file_path = os.path.join(directory, file)
        df = pd.read_csv(file_path)
        lake_name = os.path.splitext(file)[0]

        df['lake_name'] = lake_name
        df['datetime'] = pd.to_datetime(df['mid_pt_datetime'])

        if is_evolving:
            df['cumsum_vol'] = np.cumsum(df['evolving_outlines_dV_corr (m^3)']) / 1e9
        else:
            df['cumsum_vol'] = np.cumsum(df['stationary_outline_dV_corr (m^3)']) / 1e9

        dfs[lake_name] = df
    return dfs

# Load data
prior_stationary_dfs = process_lake_data("output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes", is_evolving=False)
prior_stationary_subset_dfs = process_lake_data("output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_evolving_lakes", is_evolving=False)
evolving_dfs = process_lake_data("output/geometric_calcs/evolving_outlines_geom_calc/forward_fill", is_evolving=True)
updated_stationary_dfs = process_lake_data("output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes", is_evolving=False)

# Setup figure
fig, axs = plt.subplots(5, 1, figsize=(11, 16), sharex=True, constrained_layout=True)

# Store dates and time period for satellite coverage eras
time_span = mdates.date2num(cyc_dates['cyc_end_datetimes'].iloc[-1]) - mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
start_date = mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
SARIn_expand_date = mdates.date2num(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2013-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0])
CS2_IS2_tie_pt = mdates.date2num(cyc_dates[cyc_dates['dataset'] == 'ICESat2_ATL15'].iloc[0]['cyc_start_datetimes'])

# Add annotations for satellite/mode dates
start_date_text = pd.to_datetime(cyc_dates['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
sarin_expand_date_text = pd.to_datetime(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2013-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
is2_start_date_text = pd.to_datetime(cyc_dates[cyc_dates['dataset'] == 'ICESat2_ATL15'].iloc[0]['cyc_start_datetimes']) + pd.Timedelta(days=15)

axs[0].text(start_date_text, -4, 'CryoSat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')
axs[0].text(sarin_expand_date_text, -4, 'CryoSat-2 SARIn mode expands', horizontalalignment='left', verticalalignment='top', color='k')
axs[0].text(is2_start_date_text, -4, 'ICESat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')

for row in range(len(axs)):
    # Plot horizontal line at zero for reference
    axs[row].axhline(0, color='k', linewidth=0.5, zorder=0)
    # Plot vertical lines to indicate CS2 SARIn mode mask moving inland and ICESat-2 era start
    axs[row].axvline(SARIn_expand_date, color='dimgray', linestyle='solid', linewidth=0.75, ymin=-1, ymax=1, zorder=0)
    axs[row].axvline(CS2_IS2_tie_pt, color='dimgray', linestyle='solid', linewidth=0.75, ymin=-1, ymax=1, zorder=0)

# Settings
special_lakes = ["Byrd_2", "EngelhardtSubglacialLake", "Site_BC", "Slessor_23", "Thw_124"]
special_colors = dict(zip(special_lakes, ["tab:blue", "tab:orange", "tab:green", "tab:purple", "tab:brown"]))

# Panel 1: evolving (subset)
for lake, df in evolving_dfs.items():
    if lake in special_lakes:
        axs[0].plot(df["datetime"], df["cumsum_vol"], color=special_colors[lake], 
            marker='o', markersize=2, lw=1.5, label=lake, zorder=2)
    else:
        axs[0].plot(df["datetime"], df["cumsum_vol"], color="lightgray", marker='o', markersize=2, lw=0.8, zorder=1)

# Legend labels
special_labels = {
    "Byrd_2": r"Byrd$_{2}$",
    "EngelhardtSubglacialLake": "Engelhardt Subglacial Lake",
    "Site_BC": r"Site$_{\text{BC}}$",
    "Slessor_23": r"Slessor$_{23}$",
    "Thw_124": r"Thw$_{124}$",
}

handles = []
for key, clr in special_colors.items():
    handles.append(Line2D([0], [0], color=clr, lw=1.25, marker='o', markersize=3, label=special_labels[key]))

# Append “other lakes” at the end
handles.append(Line2D([0], [0], color="gray", lw=1.25, marker='o', markersize=3, label=f"other lakes (n={len(evolving_outlines_lakes) - len(special_lakes)})"))

axs[0].legend(handles=handles, loc="upper center", ncol=2, bbox_to_anchor=(0.25, 1.0))

# Panel 2: prior stationary (all lakes)
for lake, df in prior_stationary_dfs.items():
    color = "turquoise" if lake in evolving_dfs else "red"
    axs[1].plot(df["datetime"], df["cumsum_vol"],  marker='o', markersize=2, color=color, alpha=0.5, lw=1)

# Add custom legend handles and legend
handles = [
    Line2D([0], [0], color="turquoise", lw=1.25, marker='o', markersize=3, label=f"evolving outlines found (n={len(evolving_outlines_lakes)})"),
    Line2D([0], [0], color="red", lw=1.25, marker='o', markersize=3, label=f"evolving outlines not found (n={len(no_evolving_outlines_lakes)})")
]
axs[1].legend(handles=handles, loc="upper center", bbox_to_anchor=(0.79, 1.0))

# Panel 3: updated stationary (subset)
for lake, df in updated_stationary_dfs.items():
    if lake in special_lakes:
        axs[2].plot(df["datetime"], df["cumsum_vol"], color=special_colors[lake], marker='o', markersize=2, lw=1.5, zorder=1)
    else:
        axs[2].plot(df["datetime"], df["cumsum_vol"], color="lightgray", marker='o', markersize=2, lw=0.8, zorder=0)

# Panel 4: bias (evolving – prior stationary)
for lake, df in evolving_dfs.items():
    df_evo = evolving_dfs[lake][["datetime", "cumsum_vol"]].rename(columns={"cumsum_vol": "evolving"})
    df_stat = prior_stationary_dfs[lake][["datetime", "cumsum_vol"]].rename(columns={"cumsum_vol": "stationary"})
    merged = pd.merge(df_evo, df_stat, on="datetime", how="inner")
    merged["bias"] = merged["evolving"] - merged["stationary"]
    if lake in special_colors:
        color = special_colors[lake]
        lw = 1.5
        zorder = 2
    else:
        color = "lightgray"
        lw = 0.8
        zorder = 1
    axs[3].plot(merged["datetime"], merged["bias"], color=color, marker='o', markersize=2, zorder=zorder, lw=lw)

# Panel 5: bias (evolving – updated stationary)
for lake, df in evolving_dfs.items():
    df_evo = evolving_dfs[lake][["datetime", "cumsum_vol"]].rename(columns={"cumsum_vol": "evolving"})
    df_stat = updated_stationary_dfs[lake][["datetime", "cumsum_vol"]].rename(columns={"cumsum_vol": "stationary"})
    merged = pd.merge(df_evo, df_stat, on="datetime", how="inner")
    merged["bias"] = merged["evolving"] - merged["stationary"]
    color = "darkblue" if lake in special_lakes else "lightgray"
    merged["bias"] = merged["evolving"] - merged["stationary"]
    if lake in special_colors:
        color = special_colors[lake]
        lw = 1.5
        zorder = 2
    else:
        color = "lightgray"
        lw = 0.8
        zorder = 1
    axs[4].plot(merged["datetime"], merged["bias"], color=color, marker='o', markersize=2, zorder=zorder, lw=lw)

# Axis formatting
for ax in axs:
    ax.xaxis.set_major_locator(mdates.YearLocator(base=1))  # Major ticks every other year
    ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Minor ticks every quarter
    ax.xaxis.set_major_formatter(mdates.DateFormatter("%Y"))
    ax.tick_params(axis="x")

    # Set x-axis limits
    axs[row].set(xlim=(cyc_dates['cyc_start_datetimes'].iloc[0],
        # Set righthand x-axis limit slightly earlier to prevent tick mark displaying when there is no data point
        (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2))))

# Set axes titles
axs[0].set_ylabel('cumulative $dV$ [km$^3$]\nusing evolving outlines')
axs[1].set_ylabel('cumulative $dV$ [km$^3$]\nusing prior stationary outlines')
axs[2].set_ylabel('cumulative $dV$ [km$^3$]\nusing updated stationary outlines')
axs[3].set_ylabel('cumulative $dV$ bias [km$^3$],\nevolving – prior stationary')
axs[4].set_ylabel('cumulative $dV$ bias [km$^3$],\nevolving – updated stationary')

# Set y axes limits
for ax in axs:
    ax.set_ylim(-4.5, 4.5)

# Adding annotations at the top left of the subplot
ax_array = np.array(axs)  # Convert gridspec list of lists into numpy array to use .flatten() method
char_index = 97  # ASCII value for 'a'
for i, ax in enumerate(ax_array.flatten()):
    # `transform=ax.transAxes` makes coordinates relative to the axes (0,0 is bottom left and 1,1 is top right)
    ax.text(0.01, 0.98, chr(char_index), transform=ax.transAxes, fontsize=14, va='top', ha='left')
    char_index += 1 # Increment the ASCII index to get the next character
    

axs[-1].set_xlabel("year")

# Save and close plot
plt.savefig(OUTPUT_DIR + '/figures/FigS4_individual_lakes_dV.jpg',
    dpi=300, bbox_inches='tight')

plt.show()

In [None]:
plt.close('all')

## Fig. S5

### Exploratory data analysis

In [None]:
# Explore different lake groups to find one to highlight in publication

# Example lake groups
lake_groups = [
    ('Bindschadler', ['Bindschadler_1', 'Bindschadler_2', 'Bindschadler_3', 'Bindschadler_4', 'Bindschadler_5', 'Bindschadler_6']),
    ('Byrd', ['Byrd_1', 'Byrd_2', 'Byrd_s1', 'Byrd_s2', 'Byrd_s3', 'Byrd_s4', 'Byrd_s5', 'Byrd_s6', 'Byrd_s7', 'Byrd_s8',
     'Byrd_s9', 'Byrd_s10', 'Byrd_s11', 'Byrd_s12', 'Byrd_s13', 'Byrd_s14', 'Byrd_s15']),
    ('Cook', ['Cook_E1', 'Cook_E2']),
    ('David', ['David_1', 'David_s1', 'David_s2', 'David_s3', 'David_s4', 'David_s5']),
    ('EAP', ['EAP_1', 'EAP_2', 'EAP_3', 'EAP_4', 'EAP_5', 'EAP_6', 'EAP_7', 'EAP_8', 'EAP_9']),
    ('Foundation_N', ['Foundation_N1', 'Foundation_N2', 'Foundation_N3']),
    ('Foundation', ['Foundation_1', 'Foundation_2', 'Foundation_3', 'Foundation_4', 'Foundation_5', 'Foundation_6', 'Foundation_7', 'Foundation_8',
     'Foundation_9', 'Foundation_10', 'Foundation_11', 'Foundation_12', 'Foundation_13', 'Foundation_14', 'Foundation_15', 'Foundation_16']),
    ('Institute', ['Institute_E1', 'Institute_E2', 'Institute_W1', 'Institute_W2']),
    ('KambTrunk', ['KT3', 'KT2', 'KT1']),
    ('Kamb', ['Kamb_1', 'Kamb_2', 'Kamb_3', 'Kamb_4', 'Kamb_5', 'Kamb_6', 'Kamb_7', 'Kamb_8', 'Kamb_9', 'Kamb_10', 'Kamb_11', 'Kamb_12']),
    ('MacAyeal', ['Mac1', 'Mac2', 'Mac3', 'Mac4', 'Mac5', 'Mac6']),
    ('Nimrod', ['Nimrod_1', 'Nimrod_2']),
    ('Ninnis', ['Ninnis_1', 'Ninnis_2']),
    ('Recovery', ['Rec1', 'Rec2', 'Rec3', 'Rec4', 'Rec5', 'Rec6', 'Rec7', 'Rec8', 'Rec9']),
    ('Slessor', ['Slessor_1', 'Slessor_23', 'Slessor_4', 'Slessor_5', 'Slessor_6', 'Slessor_7']),
    ('Thwaites', ['Thw_70', 'Thw_124', 'Thw_142', 'Thw_170']),
    ('Totten', ['Totten_1', 'Totten_2']),
    ('Wilkes', ['Wilkes_1', 'Wilkes_2']),
    ('Mercer_Whillans', ['EngelhardtSubglacialLake', 'UpperEngelhardtSubglacialLake', 'Lake12', 'Lake10', 'Lake78', 'WhillansSubglacialLake', 
     'LowerMercerSubglacialLake', 'MercerSubglacialLake', 'LowerConwaySubglacialLake', 'ConwaySubglacialLake', 'UpperSubglacialLakeConway', 
    'Whillans_6', 'Whillans_7', 'Whillans_8'])
]

# Call the function
plot_lake_groups_dV(lake_groups)

### Fig. S5

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 14,
    'axes.labelsize': 14,
    'xtick.labelsize': 11,
    'ytick.labelsize': 11,
    'legend.fontsize': 10,
})

In [None]:
# Select lakes to highlight in figure
lake_groups = [('Thwaites', ['Thw_70', 'Thw_124', 'Thw_142', 'Thw_170'])]

# Initialize lists to store valid lake data
valid_lakes = []
evolving_outlines_gdfs = []
lake_gdfs = []
evolving_geom_calcs_dfs = []
stationary_geom_calcs_dfs = []
evolving_union_geom_calcs_dfs = []

# Process lakes and populate the lists
for lake_name in lake_groups[0][1]:  # Access the lake list from the first group
    print(f"Processing data for {lake_name}...")
    
    # Get lake data from stationary outlines
    lake_gdf = stationary_outlines_gdf[stationary_outlines_gdf['name'] == lake_name]
    if lake_gdf.empty:
        print(f"Skipping {lake_name}: not found in stationary outlines")
        continue
    
    # Try loading evolving outlines
    try:
        evolving_outlines_gdf = gpd.read_file(os.path.join(
            'output/lake_outlines/evolving_outlines',
            f'{lake_name}.geojson'))
    except Exception as e:
        print(f"Skipping {lake_name}: no evolving outlines file - {str(e)}")
        continue
    
    # Try loading geometric calculations
    try:
        evolving_geom_calcs_df = pd.read_csv(os.path.join(
            'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill/',
            f'{lake_name}.csv'))
        evolving_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_geom_calcs_df['mid_pt_datetime'])

        evolving_union_geom_calcs_df = pd.read_csv(os.path.join(
            'output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes/',
            f'{lake_name}.csv'))
        evolving_union_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_union_geom_calcs_df['mid_pt_datetime'])

        stationary_geom_calcs_df = pd.read_csv(os.path.join(
            'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/',
            f'{lake_name}.csv'))
        stationary_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(stationary_geom_calcs_df['mid_pt_datetime'])
    except Exception as e:
        print(f"Skipping {lake_name}: error loading geometric calculations - {str(e)}")
        continue
    
    print(f"Valid data found for {lake_name}")
    valid_lakes.append(lake_name)
    lake_gdfs.append(lake_gdf)
    evolving_outlines_gdfs.append(evolving_outlines_gdf)
    evolving_geom_calcs_dfs.append(evolving_geom_calcs_df)
    stationary_geom_calcs_dfs.append(stationary_geom_calcs_df)
    evolving_union_geom_calcs_dfs.append(evolving_union_geom_calcs_df)

if not valid_lakes:
    raise ValueError("No valid lakes found to process")
    
# Create figure
fig = plt.figure(figsize=(10, 15))

# Create a 3x2 gridspec
gs = fig.add_gridspec(3, 2)

# Main spatial overview panel in first cell
ax_main = fig.add_subplot(gs[0, 0])

# Get combined extent for all valid lakes
x_mins, x_maxs, y_mins, y_maxs = [], [], [], []

for lake_gdf, evolving_outlines_gdf in zip(lake_gdfs, evolving_outlines_gdfs):
    # Find evolving and stationary outlines union for plotting extent
    lake_name = lake_gdf['name'].iloc[0]
    evolving_stationary_union_gdf = gpd.GeoDataFrame(
        geometry=[lake_gdf.geometry.iloc[0].union(evolving_outlines_gdf.geometry.union_all())],
        crs=lake_gdf.crs)
    
    # Get extent
    x_min, y_min, x_max, y_max = evolving_stationary_union_gdf['geometry'].bounds.iloc[0]
    buffer_dist = max(x_max - x_min, y_max - y_min) * 0.05
    x_mins.append(x_min - buffer_dist)
    x_maxs.append(x_max + buffer_dist)
    y_mins.append(y_min - buffer_dist)
    y_maxs.append(y_max + buffer_dist)

# Set plot extent
x_min, x_max = min(x_mins), max(x_maxs)
y_min, y_max = min(y_mins), max(y_maxs)

# Plot MOA background
mask_x = (moa_highres_da.x >= x_min) & (moa_highres_da.x <= x_max)
mask_y = (moa_highres_da.y >= y_min) & (moa_highres_da.y <= y_max)
moa_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
ax_main.imshow(moa_subset[0,:,:], cmap='gray', clim=[14000, 17000],
              extent=[x_min, x_max, y_min, y_max])

# Plot stationary outlines
stationary_color = 'darkturquoise'
for lake_gdf in lake_gdfs:
    lake_gdf.boundary.plot(ax=ax_main, color=stationary_color, linewidth=2)

# Define custom offsets and display names for each lake
# Format: 'lake_name': {'offset': (x_offset, y_offset), 'display': 'custom_name'}
label_configs = {
    'Thw_70': {
        'offset': (-9e3, 8e3),
        'display_bold': r'$\mathbf{Thw}_{\mathbf{70}}$',  # used in ax_main
        'display': r'Thw$_{70}$'  # used in subplot titles
    },
    'Thw_124': {
        'offset': (-23e3, 21e3),
        'display_bold': r'$\mathbf{Thw}_{\mathbf{124}}$',
        'display': r'Thw$_{124}$'
    },
    'Thw_142': {
        'offset': (-26e3, 23e3),
        'display_bold': r'$\mathbf{Thw}_{\mathbf{142}}$',
        'display': r'Thw$_{142}$'
    },
    'Thw_170': {
        'offset': (-18e3, 12e3),
        'display_bold': r'$\mathbf{Thw}_{\mathbf{170}}$',
        'display': r'Thw$_{170}$'
    }
}

# Add lake labels
for lake_gdf in lake_gdfs:
    # Get the centroid of the lake geometry
    centroid = lake_gdf.geometry.iloc[0].centroid
    # Get the lake name
    lake_name = lake_gdf['name'].iloc[0]
    # Get custom offset and display name for this lake (or use defaults)
    config = label_configs.get(lake_name, {'offset': (0, 0), 'display': lake_name})
    x_offset, y_offset = config['offset']
    # display_name = config['display']
    display_name = config.get('display_bold', config.get('display', lake_name))
    # Add label
    ax_main.annotate(display_name, 
                    xy=(centroid.x + x_offset, centroid.y + y_offset),
                    color='white',
                    fontweight='bold',
                    ha='center', va='center',
                    path_effects=[PathEffects.withStroke(linewidth=3, foreground='k')])

# Plot evolving outlines with time-based coloring
cmap = plt.get_cmap('plasma')
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                   mdates.date2num(cyc_start_datetimes[-1]))

for evolving_outlines_gdf in evolving_outlines_gdfs:
    for idx, row in evolving_outlines_gdf.iterrows():
        color = cmap(norm(mdates.date2num(pd.to_datetime(row['mid_pt_datetime']))))
        gpd.GeoSeries(row['geometry']).boundary.plot(
            ax=ax_main, color=color, linewidth=1)

    # Plot inset map
    axIns = ax_main.inset_axes([0.7, 0.02, 0.3, 0.3]) # [left, bottom, width, height] (fractional axes coordinates)
    axIns.set_aspect('equal')
    moa_2014_coastline.plot(ax=axIns, color='gray', edgecolor='k', linewidth=0.1)
    moa_2014_groundingline.plot(ax=axIns, color='ghostwhite', edgecolor='k', linewidth=0.1)
    axIns.axis('off')
    # Plot star to indicate location
    axIns.scatter(((x_max+x_min)/2), ((y_max+y_min)/2), marker='*', 
        linewidth=1, color='k', s=75)

# Plot evolving outlines union
for lake_gdf in lake_gdfs:
    lake_name = lake_gdf['name'].iloc[0]
    evolving_union_gdf = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == lake_name]
    evolving_union_gdf.boundary.plot(ax=ax_main, color='k', linestyle='dotted', linewidth=2)

# Format overview axes
km_scale = 1e3
ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
ax_main.xaxis.set_major_formatter(ticks_x)
ax_main.yaxis.set_major_formatter(ticks_y)
ax_main.set_xlabel('x [km]')
ax_main.set_ylabel('y [km]')

# Set up colormap
min_date = pd.to_datetime(cyc_start_datetimes[1])
max_date = pd.to_datetime(cyc_start_datetimes[-1])
date_range = pd.date_range(min_date, max_date, periods=len(cyc_start_datetimes[1:]))
years = date_range.year.unique()
years = pd.to_datetime(years, format='%Y')
n_dates = len(cyc_start_datetimes[1:])
cmap = plt.get_cmap('plasma', n_dates)
norm = plt.Normalize(mdates.date2num(min_date), mdates.date2num(max_date))
m = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
m.set_array(np.linspace(mdates.date2num(min_date), mdates.date2num(max_date), n_dates))

# Add colorbar
divider = make_axes_locatable(ax_main)
cax = divider.append_axes('bottom', size='2.5%', pad=0.55)
cbar = fig.colorbar(m, cax=cax, orientation='horizontal')

# Set colorbar ticks
cbar.ax.xaxis.set_major_formatter(year_interval_formatter())
cbar.ax.xaxis.set_major_locator(mdates.YearLocator())  # Every year
cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))  # Quarter year ticks

# cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Quarter intervals only
cbar.set_label('year', size=12)

# Get y axis limits for volume plots
y_min, y_max = get_overall_y_limits(evolving_geom_calcs_dfs, 
                                  stationary_geom_calcs_dfs,
                                  evolving_union_geom_calcs_dfs)        
# Calculate limits with buffer
y_range = y_max - y_min
buffer = y_range * 0.05
y_limits = (y_min - buffer, y_max + buffer)

# Create axes for all plots (excluding the overview plot position)
axes = []
plot_positions = [(0,1), (1,0), (1,1), (2,0), (2,1)]  # Row, Col positions for dV plots

for pos in plot_positions:
    ax = fig.add_subplot(gs[pos])
    axes.append(ax)

# Plot individual lakes
for idx, (lake_name, evolving_df, stationary_df, union_df) in enumerate(zip(
        valid_lakes, evolving_geom_calcs_dfs, stationary_geom_calcs_dfs, evolving_union_geom_calcs_dfs)):
    ax = axes[idx]
    ax.axhline(0, color='k', linewidth=0.5)
    
    dates = mdates.date2num(evolving_df['mid_pt_datetime'])
    
    # Plot stationary outline
    stationary_cumsum = np.cumsum(np.divide(stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
    ax.plot(dates, stationary_cumsum, color=stationary_color, marker='o', markerfacecolor='w', markersize=3, label='Stationary', linewidth=2)
    ax.scatter(dates, stationary_cumsum, color=stationary_color, s=5)

    # Store line segments for multi-colored line in legend
    lines = []
    for i, dt in enumerate(dates):
        line = ax.plot(1, 1, color=cmap(norm(mdates.date2num(cyc_start_datetimes[i]))), linewidth=2)[0]
        lines.append(line)
        line.remove()  # Remove the dummy lines after creating them

    # Plot evolving outlines (multi-colored line)
    x = dates
    y = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'], 1e9))
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
    lc.set_array(x)
    lc.set_linewidth(2)
    ax.add_collection(lc)
    ax.scatter(x, y, c=x, cmap=cmap, norm=norm, s=9)

    # Plot evolving outlines union
    union_cumsum = np.cumsum(np.divide(union_df['stationary_outline_dV_corr (m^3)'], 1e9))
    ax.plot(dates, union_cumsum, color='k', marker='o', markersize=3, linewidth=2)

    # Plot bias
    bias = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'] - stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
    ax.plot(dates, bias, color='r', marker='o', markerfacecolor='white', markersize=3, linewidth=2)

    bias2 = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'] - union_df['stationary_outline_dV_corr (m^3)'], 1e9))
    ax.plot(dates, bias2, color='darkred', marker='o', markersize=3, linewidth=2)

    # Format axes
    ax.xaxis.set_major_formatter(year_interval_formatter())
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))  # Quarter year ticks (Jan, Apr, Jul, Oct)

    # Set x and y axes limit
    ax.set_xlim(cyc_dates['cyc_start_datetimes'].iloc[0],
        (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2)))
    ax.set_ylim(y_min, y_max)    

    # Handle y-axis labels
    row = plot_positions[idx][0]
    col = plot_positions[idx][1]
    if (col == 1 and row == 0) or (col == 0 and (row == 1 or row == 2)):  # First dV plot (0,1) and left column of rows 1 and 2
        ax.set_ylabel('cumulative $dV$ [km$^3$]')
    else:  # All other plots
        ax.set_yticklabels([])
        
    # Handle x-axis labels
    if row == 2:  # Bottom row
        ax.set_xlabel('year')
    else:  # Top row
        ax.set_xticklabels([])
        ax.set_xlabel('')
    
    # Get the display name from label_configs, fallback to lake_name if not found
    display_name = label_configs.get(lake_name, {}).get('display', lake_name)
    
    # Use display_name instead of lake_name for the title
    ax.set_title(display_name, fontsize=16)

# Add legends
stationary_line = plt.Line2D([], [], color=stationary_color, linestyle='solid', linewidth=2)
stationary_dV_line = plt.Line2D([], [], color=stationary_color, marker='o', markersize=5, markerfacecolor='white', linestyle='solid', linewidth=2)
# Full colormap for evolving outlines with one representative data point
mid_idx = len(cyc_dates['cyc_start_datetimes']) // 2
evolving_scatter_line = []
for i, dt in enumerate(cyc_dates['cyc_start_datetimes']):
    if i == mid_idx:  # only the middle line gets a marker
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid',
                   marker='o', markersize=5))
    else:
        evolving_scatter_line.append(
            Line2D([0], [0],
                   color=cmap(norm(mdates.date2num(dt))),
                   linestyle='solid'))
evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)
evolving_union_dV_line = plt.Line2D([], [], color='k', marker='o', markersize=5, linestyle='solid', linewidth=2)
bias_line = plt.Line2D([], [], color='red', marker='o', markersize=5, markerfacecolor='white', linestyle='solid', linewidth=2)  # evolving - prior stationary (all lakes)
bias_line2 = plt.Line2D([], [], color='darkred', marker='o', markersize=5, linestyle='solid', linewidth=2)  # evolving - updated stationary (only evolving lakes)

legend = ax_main.legend(
    [tuple(lines),
     stationary_line,
     evolving_union_line,],
    ['evolving outlines',
     'prior stationary outline',
     'updated stationary outline',],
    handlelength=3,
    handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    fontsize=12,
    loc='upper center', bbox_to_anchor=(0.5, 1.3),
)

legend = axes[0].legend(
    [tuple(evolving_scatter_line), 
     stationary_dV_line,
     evolving_union_dV_line,
     bias_line,
     bias_line2],
    ['evolving outlines',
     'prior stationary outline',
     'updated stationary outline',
     'bias (evolving − prior stationary)',
     'bias (evolving − updated stationary)'],
    handlelength=3,
    handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    fontsize=12,
    loc='lower center'
)
    
# Plot combined data in the last position
last_ax = axes[-1]

# Combine all dataframes by summing values for each timestamp
combined_evolving = pd.concat(evolving_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
combined_stationary = pd.concat(stationary_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
combined_union = pd.concat(evolving_union_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()

dates = mdates.date2num(combined_evolving['mid_pt_datetime'])

# Plot stationary outline
stationary_cumsum = np.cumsum(np.divide(combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
last_ax.plot(dates, stationary_cumsum, color=stationary_color,  marker='o', markersize=3, markerfacecolor='w', linewidth=2)
# last_ax.scatter(dates, stationary_cumsum, color=stationary_color, s=5)

# Plot evolving outlines (multi-colored line)
evolving_cumsum = np.cumsum(np.divide(combined_evolving['evolving_outlines_dV_corr (m^3)'], 1e9))
points = np.array([dates, evolving_cumsum]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(dates)
lc.set_linewidth(2)
last_ax.add_collection(lc)
last_ax.scatter(dates, evolving_cumsum, c=dates, cmap=cmap, norm=norm, s=9)

# Plot evolving outlines union
union_cumsum = np.cumsum(np.divide(combined_union['stationary_outline_dV_corr (m^3)'], 1e9))
last_ax.plot(dates, union_cumsum, color='k', linestyle='solid', marker='o', markersize=3, linewidth=2)
# last_ax.scatter(dates, union_cumsum, color='k', s=5)

# Plot bias
bias_cumsum = np.cumsum(np.divide(
    combined_evolving['evolving_outlines_dV_corr (m^3)'] - 
    combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
last_ax.plot(dates, bias_cumsum, color='r', marker='o', markersize=3, markerfacecolor='w', linewidth=2)
# last_ax.scatter(dates, bias_cumsum, color='r', s=5)

bias_cumsum2 = np.cumsum(np.divide(
    combined_evolving['evolving_outlines_dV_corr (m^3)'] - 
    combined_union['stationary_outline_dV_corr (m^3)'], 1e9))
last_ax.plot(dates, bias_cumsum2, color='darkred', marker='o', markersize=3, linewidth=2)
# last_ax.scatter(dates, bias_cumsum, color='r', s=5)

# Set axes limits and format
last_ax.set_xlim(cyc_dates['cyc_start_datetimes'].iloc[0],
                 (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2)))
last_ax.set_ylim(y_min, y_max)
last_ax.set_yticklabels([])
last_ax.axhline(0, color='k', linewidth=0.5)
last_ax.set_xlabel('year', size=14) # TEMP ensure same font size as other 'year'
last_ax.xaxis.set_major_formatter(year_interval_formatter())
last_ax.xaxis.set_major_locator(mdates.YearLocator())
last_ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))

last_ax.set_title('total', fontsize=16)

# Add subplot annotations ('a'-'f') to all plots
char_index = 97  # ASCII value for 'a'
for ax in [ax_main] + axes:
    ax.text(0.03, 0.97, chr(char_index), transform=ax.transAxes, 
            fontsize=20, va='top', ha='left')
    char_index += 1

# Adjust layout to prevent overlapping
plt.tight_layout(h_pad=0.0)

# Save and close plot
plt.savefig(OUTPUT_DIR + '/figures/FigS5_Thw_lakes_dV.jpg',
    dpi=400, bbox_inches='tight')

# Preview plot
plt.show()

In [None]:
plt.close('all')

## Fig. S6

In [None]:
def combine_first_rows(folder_path):
    '''
    Reads all CSV files in the specified folder and combines their first rows into a single DataFrame.
    
    Parameters:
    folder_path (str): Path to the folder containing CSV files
    
    Returns:
    pandas.DataFrame: DataFrame containing the first row from each CSV file
    '''
    # List to store first rows
    first_rows = []
    
    # Iterate through all files in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            file_path = os.path.join(folder_path, filename)
            try:
                # Read the CSV file
                df = pd.read_csv(file_path)
                
                # Get the first row and add filename as a column
                if not df.empty:
                    first_row = df.iloc[0:1].copy()
                    
                    # Add full filename and filename without extension as columns
                    first_row['name'] = os.path.splitext(filename)[0]
                    
                    first_rows.append(first_row)
                
            except Exception as e:
                print(f'Error processing {filename}: {str(e)}')
    
    # Combine all first rows into a single DataFrame
    if first_rows:
        result = pd.concat(first_rows, ignore_index=True)
        return result
    else:
        return pd.DataFrame()

In [None]:
folder_path = '/home/jovyan/1_evolving_lakes/output/FigS1_lake_reexamination_methods/levels'
result_df = combine_first_rows(folder_path)
result_df

In [None]:
# Remove lake rows where we found through manual inspection that the lake's evolving outlines were similar to off-lake, background activity
evolving_lake_names = [f.replace('.geojson', '') for f in os.listdir('output/lake_outlines/evolving_outlines') if f.endswith('.geojson')]
filtered_df = result_df[result_df['name'].isin(evolving_lake_names)]
print(len(filtered_df))  # =98 because Site_B and Site_C saved as output file Site_BC.geojson (Fig. S4e,f)
filtered_df

In [None]:
def plot_level_histogram(df, column='level', bins=10):
    '''
    Creates a histogram of a specified column with enhanced styling
    
    Parameters:
    df (pandas.DataFrame): DataFrame containing the data
    column (str): Name of the column to create histogram for
    bins (int): Number of bins for the histogram
    '''
    # Create figure and axis objects with larger size
    plt.figure(figsize=(10, 6))
    
    # Create histogram
    plt.hist(df[column], bins=bins, color='skyblue', edgecolor='black')
    
    # Customize the plot
    plt.title(f'Distribution of {column}', pad=15, fontsize=14)
    plt.xlabel(column, fontsize=12)
    plt.ylabel('Count', fontsize=12)
    
    # Add grid for better readability
    plt.grid(True, alpha=0.3)
    
    # Rotate x-axis labels if needed
    plt.xticks(rotation=0)
    
    # Adjust layout
    plt.tight_layout()
    
    # Display the plot
    plt.show()

# Example usage:
# plot_level_histogram(filtered_df, column='level', bins=35)
# plot_level_histogram(filtered_df, column='within_area_multiple', bins=35)

In [None]:
plot_level_histogram(filtered_df, column='level', bins=35)
plot_level_histogram(filtered_df, column='within_area_multiple', bins=35)

In [None]:
filtered_df[filtered_df['level'] > 0.5].sort_values('level', ascending=False)

In [None]:
# def plot_level_histogram(df, column='level', bins=10):
#     """
#     Histogram with:
#     - values >0.5 highlighted per lake (alphabetized)
#     - values <=0.5 grouped as 'other lakes' (gray)
#     - same bar widths
#     - x-axis capped at 1.75
#     - gray bars drawn first so highlights appear on top
#     - legend correctly filtered to avoid '_nolegend_' warnings
#     """
#     plt.figure(figsize=(10, 6))
#     df_other = df[df[column] <= 0.5]
#     df_highlight = df[df[column] > 0.5]
#     # Create bin edges aligned with 0.01 intervals
#     bin_edges = np.arange(0, 1.8, 0.01)
#     bin_width = bin_edges[1] - bin_edges[0]
#     cmap = plt.get_cmap("tab20")
#     unique_names = sorted(df_highlight['name'].unique()) if not df_highlight.empty else []
    
#     # --- Plot gray "other lakes" first ---
#     counts_other, _ = np.histogram(df_other[column], bins=bin_edges)
    
#     # Only plot bars where counts > 0 to avoid black lines along x-axis
#     non_zero_mask = counts_other > 0
#     if non_zero_mask.any():
#         bars_other = plt.bar(bin_edges[:-1][non_zero_mask], counts_other[non_zero_mask], 
#                            width=bin_width, color='gray', edgecolor='black', alpha=0.6,
#                            align='edge')
        
#         # Force the label after creation
#         if len(df_other) > 0 and counts_other.sum() > 0:
#             bars_other[0].set_label(f'other lakes (n={len(df_other)})')
#     else:
#         bars_other = None
    
#     # --- Plot highlighted lakes (stacked) ---
#     highlight_bars = []
#     cumulative_counts = np.zeros(len(bin_edges) - 1)  # Track cumulative height for stacking
    
#     for i, name in enumerate(unique_names):
#         subset = df_highlight[df_highlight['name'] == name]
#         counts, _ = np.histogram(subset[column], bins=bin_edges)
        
#         bars = plt.bar(bin_edges[:-1], counts, width=bin_width,
#                        color=cmap(i % cmap.N), edgecolor='black', alpha=0.8,
#                        align='edge', bottom=cumulative_counts)
        
#         # Update cumulative counts for next lake
#         cumulative_counts += counts
        
#         # Force the label after creation
#         if counts.sum() > 0:
#             bars[0].set_label(name)
#             highlight_bars.append(bars)
    
#     # --- Build legend handles and labels safely ---
#     legend_handles = []
#     legend_labels = []
    
#     # Add colored lakes alphabetically
#     if highlight_bars:
#         valid_bars = [(b[0], b[0].get_label()) for b in highlight_bars 
#                       if not b[0].get_label().startswith('_') and b[0].get_label()]
#         sorted_bars = sorted(valid_bars, key=lambda x: x[1].lower())
        
#         for h, l in sorted_bars:
#             legend_handles.append(h)
#             legend_labels.append(l)
    
#     # Add "other lakes" last if valid
#     if (bars_other is not None and len(df_other) > 0 and counts_other.sum() > 0 and 
#         not bars_other[0].get_label().startswith('_') and bars_other[0].get_label()):
#         legend_handles.append(bars_other[0])
#         legend_labels.append(bars_other[0].get_label())
    
#     plt.xlabel('$dh$ threshold [m]')
#     plt.ylabel('Count')
#     plt.xlim(0, 1.7)
    
#     # Set major x-axis ticks at 0.1 intervals
#     plt.xticks(np.arange(0, 1.7, 0.1))
    
#     # Set minor x-axis ticks at 0.01 intervals
#     ax = plt.gca()
#     ax.set_xticks(np.arange(0, 1.7, 0.01), minor=True)
    
#     # Create legend if we have handles
#     if legend_handles:
#         plt.legend(legend_handles, legend_labels)
    
#     plt.tight_layout()

#     # Save and show plot
#     plt.savefig(OUTPUT_DIR + '/figures/FigS6_dh_threshold_distribution.jpg',
#         dpi=300, bbox_inches='tight')
#     plt.show()

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 9,
    'axes.labelsize': 14,
    'xtick.labelsize': 12,
    'ytick.labelsize': 12,
    'legend.fontsize': 12,
})

In [None]:
def plot_level_histogram(df, column='level', bins=10):
    """
    Histogram with:
    - values >0.5 highlighted per lake (alphabetized)
    - values <=0.5 grouped as 'other lakes' (gray)
    - same bar widths
    - x-axis capped at 1.75
    - gray bars drawn first so highlights appear on top
    - legend correctly filtered and labeled using lake_label_map
    """

    # --- Lake label map with upright subscripts ---
    lake_label_map = {
        'Lambert_1': r'Lambert$_{1}$',
        'Site_A': r'Site$_{\text{A}}$',
        'Site_BC': r'Site$_{\text{BC}}$',
        'Slessor_1': r'Slessor$_{1}$',
        'Slessor_23': r'Slessor$_{23}$',
        'Thw_70': r'Thw$_{70}$',
        'Thw_124': r'Thw$_{124}$',
        'Thw_142': r'Thw$_{142}$',
        'Thw_170': r'Thw$_{170}$'
    }

    # --- Custom legend ordering ---
    custom_order = [
        'Lambert_1', 'Site_A', 'Site_BC', 
        'Slessor_1', 'Slessor_23', 
        'Thw_70', 'Thw_124', 'Thw_142', 'Thw_170'
    ]

    # Create figure
    plt.figure(figsize=(10, 6))
    df_other = df[df[column] <= 0.5]
    df_highlight = df[df[column] > 0.5]
    
    # Create bin edges aligned with 0.01 intervals
    bin_edges = np.arange(0, 1.8, 0.01)
    bin_width = bin_edges[1] - bin_edges[0]
    cmap = plt.get_cmap("tab20")
    unique_names = sorted(df_highlight['name'].unique()) if not df_highlight.empty else []
    
    # Plot gray "other lakes" first (stacked vertical bars)
    counts_other, _ = np.histogram(df_other[column], bins=bin_edges)
    
    # Only plot bars where counts > 0 to avoid black lines along x-axis
    non_zero_mask = counts_other > 0
    if non_zero_mask.any():
        bars_other = plt.bar(bin_edges[:-1][non_zero_mask], counts_other[non_zero_mask], 
                           width=bin_width, color='gray', edgecolor='black', alpha=0.6,
                           align='edge')
        
        # Force the label after creation
        if len(df_other) > 0 and counts_other.sum() > 0:
            bars_other[0].set_label(f'other lakes (n={len(df_other)})')
    else:
        bars_other = None
    
    # Plot highlighted lakes (stacked vertical bars)
    highlight_bars = []
    cumulative_counts = np.zeros(len(bin_edges) - 1)  # Track cumulative height for stacking
    
    for i, name in enumerate(unique_names):
        subset = df_highlight[df_highlight['name'] == name]
        counts, _ = np.histogram(subset[column], bins=bin_edges)
        
        bars = plt.bar(bin_edges[:-1], counts, width=bin_width,
                       color=cmap(i % cmap.N), edgecolor='black', alpha=0.8,
                       align='edge', bottom=cumulative_counts)
        
        # Update cumulative counts for next lake
        cumulative_counts += counts
        
        # Force the label after creation
        if counts.sum() > 0:
            bars[0].set_label(name)
            highlight_bars.append(bars)
    
    # Build legend handles and labels
    legend_handles = []
    legend_labels = []

    if highlight_bars:
        valid_bars = [(b[0], b[0].get_label()) for b in highlight_bars 
                      if not b[0].get_label().startswith('_') and b[0].get_label()]

        # Use custom order where possible, fallback to alphabetical
        def custom_sort_key(name):
            return custom_order.index(name) if name in custom_order else len(custom_order) + ord(name[0])
        
        sorted_bars = sorted(valid_bars, key=lambda x: custom_sort_key(x[1]))
        
        for h, l in sorted_bars:
            label_pretty = lake_label_map.get(l, l)
            legend_handles.append(h)
            legend_labels.append(label_pretty)
    
    # Add "other lakes" last if valid
    if (bars_other is not None and len(df_other) > 0 and counts_other.sum() > 0 and 
        not bars_other[0].get_label().startswith('_') and bars_other[0].get_label()):
        legend_handles.append(bars_other[0])
        legend_labels.append(bars_other[0].get_label())
    
    plt.xlabel('$dh$ threshold [m]')
    plt.ylabel('Count')
    plt.xlim(0, 1.7)
    
    # Set major x-axis ticks at 0.1 intervals
    plt.xticks(np.arange(0, 1.7, 0.1))
    
    # Set minor x-axis ticks at 0.01 intervals
    ax = plt.gca()
    ax.set_xticks(np.arange(0, 1.7, 0.01), minor=True)
    
    # Create legend if we have handles
    if legend_handles:
        plt.legend(legend_handles, legend_labels)
    
    plt.tight_layout()

    # Save and show plot
    plt.savefig(OUTPUT_DIR + '/figures/FigS6_dh_threshold_distribution.jpg',
        dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
plot_level_histogram(filtered_df, column='level')

In [None]:
plt.close('all')