Notebook generates Figs. 2, 3, S2, and S3.

Written 2023-11-11 by W. Sauthoff (wsauthoff.github.io)

# Set up computing environment

In [None]:
# Import libraries
import datetime
import fiona
import functools
import geopandas as gpd
import glob
import holoviews as hv
hv.extension('bokeh')
import hvplot.pandas
from IPython.display import clear_output
import math
from math import radians
import matplotlib
from matplotlib.collections import LineCollection
import matplotlib.colors as colors
from matplotlib.colors import ListedColormap
import matplotlib.dates as mdates
from matplotlib.gridspec import GridSpec
from matplotlib.legend_handler import HandlerPatch, HandlerTuple
import matplotlib.patches as mpatches
import matplotlib.patheffects as PathEffects
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import os
import pandas as pd
from pathlib import Path
from pyproj import CRS, Geod, Transformer
import rioxarray
from shapely.geometry import MultiPolygon, Point, Polygon
from shapely.ops import transform, unary_union

# Magic functions
%matplotlib widget

# Define data directories dependent on home environment
# Replace with your directory file paths
if os.getenv('HOME') == '/home/jovyan':
    DATA_DIR = '/home/jovyan/data'
    OUTPUT_DIR = '/home/jovyan/1_evolving_lakes/output/Figs23_S23_lake_reexamination_results'
    os.makedirs(OUTPUT_DIR, exist_ok=True)

# Define constants and coordinate transforms for the geodesic area calculation
CRS_LL = "EPSG:4326" # wgs84 in lon,lat
GEOD = CRS(CRS_LL).get_geod() # geod object for calculating geodesic area on defined ellipsoid
CRS_XY = "EPSG:3031" # Antarctic Polar Stereographic in x, y
XY_TO_LL = Transformer.from_crs(CRS_XY, CRS_LL, always_xy = True) # make coord transformer
geod = Geod(ellps="WGS84") # Create a Geod object for calculating area on the WGS84 ellipsoid

# Define utility functions
def ll2ps(lon, lat):
    """
    Transform coordinates from geodetic coordinates (lon, lat)
    to Antarctic Polar Stereograph coordinates (x, y)
    x, y = ll2ps(lon, lat)
    """
    crs_ll = CRS("EPSG:4326")
    crs_xy = CRS("EPSG:3031")
    ll_to_xy = Transformer.from_crs(crs_ll, crs_xy, always_xy = True)
    x, y = ll_to_xy.transform(lon, lat)
    return x, y

# Functions

In [None]:
def quantify_lake_extensions(lakes_gdf, evolving_outlines_union_gdf, area_threshold=0.25):
    """
    Quantify lakes with extensions beyond their original stationary outlines using geodesic area calculations.
    
    Parameters:
    -----------
    lakes_gdf : GeoDataFrame
        GeoDataFrame containing stationary lake outlines with 'name' column
    evolving_outlines_union_gdf : GeoDataFrame
        GeoDataFrame containing evolving union outlines with 'name' column
    area_threshold : float, default=0.05
        Minimum fraction of area increase to consider as a reportable extension
        (e.g., 0.05 means 5% area increase)
    
    Returns:
    --------
    tuple
        (lakes_with_extensions, extension_results)
        - lakes_with_extensions: count of lakes with reportable extensions
        - extension_results: DataFrame with detailed extension metrics for each lake
    """
    # Define the geodesic object for Earth calculations
    geod = Geod(ellps="WGS84")
    
    # Create a transformer to convert from EPSG:3031 to EPSG:4326
    project = Transformer.from_crs("EPSG:3031", "EPSG:4326", always_xy=True).transform

    def transform_to_4326(geometry):
        '''
        Transform geometry from EPSG:3031 to EPSG:4326
        '''
        if geometry is None or not geometry.is_valid:
            return None
        
        try:
            # Use functools.partial to create a function that can be used with shapely's transform
            project_func = functools.partial(project)
            transformed_geom = transform(project_func, geometry)
            return transformed_geom
        except Exception as e:
            print(f"Error transforming geometry: {e}")
            return None

    def calculate_geodesic_area_and_perimeter(geometry):
        '''
        Calculate geodesic area and perimeter of a polygon or multipolygon.
        First transforms geometry from EPSG:3031 to EPSG:4326, then performs calculations.
        '''
        
        # Ensure geometry exists and is valid
        if geometry is None or not geometry.is_valid:
            return None, None
        
        # Transform geometry to EPSG:4326
        geom_4326 = transform_to_4326(geometry)
        if geom_4326 is None:
            return None, None
            
        if isinstance(geom_4326, Polygon):
            # Calculate area and perimeter for a single polygon
            area, perimeter = geod.polygon_area_perimeter(geom_4326.exterior.coords.xy[0], 
                                                          geom_4326.exterior.coords.xy[1])
            # Subtract areas of holes if any exist
            for interior in geom_4326.interiors:
                hole_area, _ = geod.polygon_area_perimeter(interior.coords.xy[0], 
                                                           interior.coords.xy[1])
                area -= hole_area
                
            return abs(area), abs(perimeter)
            
        elif isinstance(geom_4326, MultiPolygon):
            # Calculate combined area and perimeter for multipolygons
            total_area = 0
            total_perimeter = 0
            for part in geom_4326.geoms:
                # Add the part's area
                part_area, part_perimeter = geod.polygon_area_perimeter(part.exterior.coords.xy[0], 
                                                                        part.exterior.coords.xy[1])
                total_area += abs(part_area)
                total_perimeter += abs(part_perimeter)
                
                # Subtract areas of holes if any exist
                for interior in part.interiors:
                    hole_area, _ = geod.polygon_area_perimeter(interior.coords.xy[0], 
                                                              interior.coords.xy[1])
                    total_area -= abs(hole_area)
            
            return total_area, total_perimeter
        else:
            return None, None
            
    results = []
    
    # Process each lake
    for lake_name in lakes_gdf['name'].unique():
        try:
            # Get stationary outline
            stationary = lakes_gdf[lakes_gdf['name'] == lake_name]['geometry'].iloc[0]
            
            # Get evolving union outline
            evolving = evolving_outlines_union_gdf[
                evolving_outlines_union_gdf['name'] == lake_name]['geometry'].iloc[0]
            
            # Skip if either geometry is missing
            if stationary is None or evolving is None:
                continue
                
            # Calculate geodesic areas
            stationary_area, _ = calculate_geodesic_area_and_perimeter(stationary)
            evolving_area, _ = calculate_geodesic_area_and_perimeter(evolving)
            
            # Calculate extension area (area in evolving that's not in stationary)
            if evolving.contains(stationary):
                # Simple case: evolving completely contains stationary
                extension = evolving.difference(stationary)
            else:
                # More complex case: find areas in evolving that aren't in stationary
                extension = evolving.difference(stationary)
            
            # Get geodesic area of the extension
            extension_area, _ = calculate_geodesic_area_and_perimeter(extension)
            
            # If any area calculation failed, skip this lake
            if stationary_area is None or evolving_area is None or extension_area is None:
                print(f"Warning: Could not calculate geodesic area for {lake_name}, skipping.")
                continue
            
            # Calculate relative extension (as percentage of original area)
            relative_extension = extension_area / stationary_area if stationary_area > 0 else 0
            
            # Calculate metrics to determine if this is a reportable extension
            has_reportable_extension = relative_extension >= area_threshold
            
            results.append({
                'lake_name': lake_name,
                'stationary_area': stationary_area,
                'evolving_area': evolving_area,
                'extension_area': extension_area,
                'relative_extension': relative_extension,
                'has_reportable_extension': has_reportable_extension
            })

            # Clear output
            clear_output(wait=True)
            
        except Exception as e:
            print(f"Error processing {lake_name}: {e}")
            continue
    
    # Create DataFrame with results
    extension_df = pd.DataFrame(results)
    
    # Count lakes with extensions
    lakes_with_extensions = extension_df['has_reportable_extension'].sum()
    
    return lakes_with_extensions, extension_df

def generate_extension_summary(extension_df, km2=True):
    """
    Generate a summary report about lake extensions.
    
    Parameters:
    -----------
    extension_df : DataFrame
        DataFrame with lake extension results from quantify_lake_extensions
    km2 : bool, default=True
        If True, displays area in square kilometers, otherwise in square meters
        
    Returns:
    --------
    str
        Summary text with key findings
    """
    total_lakes = len(extension_df)
    lakes_with_extensions = extension_df['has_reportable_extension'].sum()
    
    # Calculate average extension for lakes that have extensions
    extended_lakes = extension_df[extension_df['has_reportable_extension']]
    avg_extension = extended_lakes['relative_extension'].mean() if len(extended_lakes) > 0 else 0
    
    # Calculate total area of extensions
    total_extension_area = extended_lakes['extension_area'].sum() if len(extended_lakes) > 0 else 0
    area_unit = "km²" if km2 else "m²"
    area_divisor = 1_000_000 if km2 else 1  # Convert to km² if requested
    
    # Find lake with maximum extension
    if len(extended_lakes) > 0:
        max_extension_lake = extended_lakes.loc[extended_lakes['relative_extension'].idxmax()]
        max_extension_pct = max_extension_lake['relative_extension'] * 100
        max_lake_name = max_extension_lake['lake_name']
        
        # Find lake with largest absolute extension area
        max_area_lake = extended_lakes.loc[extended_lakes['extension_area'].idxmax()]
        max_area_value = max_area_lake['extension_area'] / area_divisor
        max_area_lake_name = max_area_lake['lake_name']
    else:
        max_extension_pct = 0
        max_lake_name = "None"
        max_area_value = 0
        max_area_lake_name = "None"
    
    # Generate summary
    summary = f"""Lake Extension Analysis Summary:
    ---------------------------------
    We found {lakes_with_extensions} lakes ({lakes_with_extensions/total_lakes*100:.1f}% of {total_lakes} analyzed) 
    with previously unidentified lake extensions beyond their original stationary outlines.
    
    For lakes with extensions:
    - Total extension area: {total_extension_area/area_divisor:.2f} {area_unit}
    - Average extension: {avg_extension*100-100:.1f}% beyond original outline
    - Largest relative extension: {max_extension_pct:.1f}% beyond original outline (Lake {max_lake_name})
    - Largest absolute extension: {max_area_value:.2f} {area_unit} (Lake {max_area_lake_name})
    """
    
    return summary

# Example usage:
# lakes_with_extensions, extension_results = quantify_lake_extensions(
#     lakes_gdf, 
#     evolving_outlines_union_gdf, 
# )
# summary = generate_extension_summary(extension_results, km2=True)
# print(summary)

In [None]:
def muliple_area_buffer(polygon, area_multiple, precision=100):
    """
    This function takes a polygon and returns a polygon with a buffer such that the area of the buffered polygon
    is approximately the specified multiple the area of the original polygon.

    :param polygon: Shapely Polygon object
    :param area_multiple: the multiple of the original polygon area you wish the buffered polygon to be
    :param precision: Precision for the iterative process to find the buffer distance
    :return: Buffered Polygon

    # Example usage
    # Define a simple square polygon
    square = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
    # Apply the function to find the buffered polygon area and bounds
    buffered_poly = muliple_area_buffer(square, 2)
    """
    original_area = polygon.area
    target_area = area_multiple * original_area
    buffer_distance = 0
    buffered_polygon = polygon

    while True:
        buffered_polygon = polygon.buffer(buffer_distance)
        if buffered_polygon.area >= target_area:
            break
        buffer_distance += precision
    
    # Convert to geodataframe
    buffered_polygon_gdf = gpd.GeoDataFrame({'geometry': [buffered_polygon]})

    return buffered_polygon

In [None]:
os.makedirs(OUTPUT_DIR + '/lake_group_dV_plots', exist_ok=True)

def plot_lake_groups_dV(lake_groups):
    """
    Create multi-panel plots for groups of lakes showing spatial overview and volume changes.
    Lakes are arranged in rows of three plots of equal size, with valid data checking.
    
    Parameters:
    -----------
    lake_groups : list of tuples
        Each tuple contains (group_name, lake_list) where:
        - group_name: str, name of the lake group for file naming and identification
        - lake_list: list of str, names of lakes to be analyzed together
    """
    
    for group_idx, (group_name, lake_list) in enumerate(lake_groups):
        print(f"\nProcessing lake group: {group_name}")
        
        # Lists to store valid lake data
        valid_lakes = []
        evolving_outlines_gdfs = []
        evolving_geom_calcs_dfs = []
        stationary_geom_calcs_dfs = []
        evolving_union_geom_calcs_dfs = []
        lake_gdfs = []
        
        # First pass: collect all valid lake data
        for lake_name in lake_list:
            print(f"Checking data for {lake_name}...")
            
            # Get lake data from stationary outlines
            lake_gdf = stationary_outlines_gdf[stationary_outlines_gdf['name'] == lake_name]
            if lake_gdf.empty:
                print(f"Skipping {lake_name}: not found in stationary outlines")
                continue
                
            # Try loading evolving outlines gdf
            try:
                evolving_outlines_gdf = gpd.read_file(os.path.join(
                    'output/lake_outlines/evolving_outlines',
                    f'{lake_name}.geojson'))
            except Exception as e:
                print(f"  Skipping {lake_name}: no evolving outlines file")
                continue

            # Attempt to open the geometric calculations CSV files
            try:
                evolving_geom_calcs_df = pd.read_csv('output/geometric_calcs/evolving_outlines_geom_calc/forward_fill/{}.csv'.format(lake_name))
                evolving_union_geom_calcs_df = pd.read_csv('output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes/{}.csv'.format(lake_name))
                stationary_geom_calcs_df = pd.read_csv('output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/{}.csv'.format(lake_name))
            except FileNotFoundError:
                print(f"At least one of the geometric calculations CSV files for {lake_name} not found. Skipping...")
                continue

            # Convert strings to datetime
            evolving_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_geom_calcs_df['mid_pt_datetime'])
            evolving_union_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_union_geom_calcs_df['mid_pt_datetime'])
            stationary_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(stationary_geom_calcs_df['mid_pt_datetime'])

            # If we got here, all data is valid
            print(f"Valid data found for {lake_name}")
            valid_lakes.append(lake_name)
            lake_gdfs.append(lake_gdf)
            evolving_outlines_gdfs.append(evolving_outlines_gdf)
            evolving_geom_calcs_dfs.append(evolving_geom_calcs_df)
            stationary_geom_calcs_dfs.append(stationary_geom_calcs_df)
            evolving_union_geom_calcs_dfs.append(evolving_union_geom_calcs_df)
        
        # Skip this group if no valid lakes found
        if not valid_lakes:
            print(f"Skipping group {group_name}: no valid lakes found")
            continue
            
        print(f"\nCreating plots for valid lakes in group {group_name}: {valid_lakes}")

        # Calculate plot layout (including space for combined plot)
        n_lakes = len(valid_lakes)
        n_plots = n_lakes + 1  # Add 1 for the combined plot
        n_rows = (n_plots + 2) // 3  # Integer division rounded up
        
        # Create figure
        fig = plt.figure(figsize=(15, 5*n_rows + 3))
        gs = fig.add_gridspec(n_rows + 1, 3, height_ratios=[1] + [1]*n_rows)
        
        # Main spatial overview panel
        ax_main = fig.add_subplot(gs[0, :])
        
        # Get combined extent for all valid lakes
        x_mins, x_maxs, y_mins, y_maxs = [], [], [], []
        
        for lake_gdf, evolving_outlines_gdf in zip(lake_gdfs, evolving_outlines_gdfs):
            # Find evolving and stationary outlines union for plotting extent
            lake_name = lake_gdf['name'].iloc[0]
            evolving_stationary_union_gdf = gpd.GeoDataFrame(
                geometry=[lake_gdf.geometry.iloc[0].union(evolving_outlines_gdf.geometry.union_all())],
                crs=lake_gdf.crs)

            # Get extent
            x_min, y_min, x_max, y_max = evolving_stationary_union_gdf['geometry'].bounds.iloc[0]
            buffer_dist = max(x_max - x_min, y_max - y_min) * 0.05
            x_mins.append(x_min - buffer_dist)
            x_maxs.append(x_max + buffer_dist)
            y_mins.append(y_min - buffer_dist)
            y_maxs.append(y_max + buffer_dist)
        
        # Set plot extent
        x_min, x_max = min(x_mins), max(x_maxs)
        y_min, y_max = min(y_mins), max(y_maxs)
        
        # Plot MOA background
        mask_x = (moa_highres_da.x >= x_min) & (moa_highres_da.x <= x_max)
        mask_y = (moa_highres_da.y >= y_min) & (moa_highres_da.y <= y_max)
        moa_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
        ax_main.imshow(moa_subset[0,:,:], cmap='gray', clim=[14000, 17000],
                      extent=[x_min, x_max, y_min, y_max])
        
        # Plot stationary outlines
        stationary_color = 'darkturquoise'
        for lake_gdf in lake_gdfs:
            lake_gdf.boundary.plot(ax=ax_main, color=stationary_color, linewidth=2)

        # Plot evolving outlines union
        for lake_gdf in lake_gdfs:
            lake_name = lake_gdf['name'].iloc[0]
            evolving_union_gdf = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == lake_name]
            evolving_union_gdf.boundary.plot(ax=ax_main, color='k', linestyle='dotted', linewidth=2)
        
        # Plot evolving outlines with time-based coloring
        cmap = plt.get_cmap('plasma')
        norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                           mdates.date2num(cyc_start_datetimes[-1]))
        
        for evolving_outlines_gdf in evolving_outlines_gdfs:
            for idx, row in evolving_outlines_gdf.iterrows():
                color = cmap(norm(mdates.date2num(pd.to_datetime(row['mid_pt_datetime']))))
                gpd.GeoSeries(row['geometry']).boundary.plot(
                    ax=ax_main, color=color, linewidth=1)

        # Format overview axes
        km_scale = 1e3
        ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
        ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
        ax_main.xaxis.set_major_formatter(ticks_x)
        ax_main.yaxis.set_major_formatter(ticks_y)
        ax_main.set_xlabel('x [km]')
        ax_main.set_ylabel('y [km]')

        # Set up colormap
        min_date = pd.to_datetime(cyc_start_datetimes[1])
        max_date = pd.to_datetime(cyc_start_datetimes[-1])
        date_range = pd.date_range(min_date, max_date, periods=len(cyc_start_datetimes[1:]))
        years = date_range.year.unique()
        years = pd.to_datetime(years, format='%Y')
        n_dates = len(cyc_start_datetimes[1:])
        cmap = plt.get_cmap('plasma', n_dates)
        norm = plt.Normalize(mdates.date2num(min_date), mdates.date2num(max_date))
        m = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
        m.set_array(np.linspace(mdates.date2num(min_date), mdates.date2num(max_date), n_dates))
        
        # Add colorbar
        divider = make_axes_locatable(ax_main)
        cax = divider.append_axes('bottom', size='2.5%', pad=0.5)
        cbar = fig.colorbar(m, cax=cax, orientation='horizontal')

        # Set colorbar ticks
        cbar.ax.xaxis.set_major_formatter(year_interval_formatter(interval=4))
        cbar.ax.xaxis.set_major_locator(mdates.YearLocator())  # Every year
        cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))  # Quarter year ticks
        cbar.set_label('Year')

        # # Get y axis limits for volume plots
        y_min, y_max = get_overall_y_limits(evolving_geom_calcs_dfs, 
                                          stationary_geom_calcs_dfs,
                                          evolving_union_geom_calcs_dfs)        
        # Calculate limits with buffer
        y_range = y_max - y_min
        buffer = y_range * 0.05
        y_limits = (y_min - buffer, y_max + buffer)
        
        # Create axes for all plots
        axes = []
        for idx in range(n_plots):
            row = (idx // 3) + 1
            col = idx % 3
            ax = fig.add_subplot(gs[row, col])
            axes.append(ax)
        
        # Plot individual lakes
        for idx, (lake_name, evolving_df, stationary_df, union_df) in enumerate(zip(
                valid_lakes, evolving_geom_calcs_dfs, stationary_geom_calcs_dfs, evolving_union_geom_calcs_dfs)):
            ax = axes[idx]
            ax.axhline(0, color='k', linestyle='--')
            
            dates = mdates.date2num(evolving_df['mid_pt_datetime'])
            
            # Plot stationary outline
            stationary_cumsum = np.cumsum(np.divide(stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
            ax.plot(dates, stationary_cumsum, color=stationary_color, label='Stationary', linewidth=2)
            ax.scatter(dates, stationary_cumsum, color=stationary_color, s=5)

            # Plot evolving outlines union
            union_cumsum = np.cumsum(np.divide(union_df['stationary_outline_dV_corr (m^3)'], 1e9))
            ax.plot(dates, union_cumsum, color='k', linestyle='dotted', label='Union', linewidth=2)
            ax.scatter(dates, union_cumsum, color='k', s=5)

            # Store line segments for multi-colored line in legend
            lines = []
            for i, dt in enumerate(dates):
                line = ax.plot(1, 1, color=cmap(norm(mdates.date2num(cyc_start_datetimes[i]))), linewidth=2)[0]
                lines.append(line)
                line.remove()  # Remove the dummy lines after creating them

            # Store line segments for multi-colored line in legend
            onlake_lines = []
            for i, dt in enumerate(dates):
                x, y = 1, 1
                onlake_line, = ax.plot(x, y, color=cmap(norm(mdates.date2num(cyc_start_datetimes[i]))), linewidth=2)
                onlake_lines.append(onlake_line)

            # Plot evolving outlines (multi-colored line)
            x = dates
            y = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'], 1e9))
            points = np.array([x, y]).T.reshape(-1, 1, 2)
            segments = np.concatenate([points[:-1], points[1:]], axis=1)
            lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
            lc.set_array(x)
            lc.set_linewidth(2)
            ax.add_collection(lc)
            ax.scatter(x, y, c=x, cmap=cmap, norm=norm, s=9)

            # Plot bias
            bias = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'] - 
                                     stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
            ax.plot(dates, bias, color='r', label='Bias', linewidth=2)
            ax.scatter(dates, bias, color='r', linewidth=2, s=5)

            # Add legend only to the first plot
            if idx == 0:
                stationary_line = plt.Line2D([], [], color=stationary_color, linestyle='solid', linewidth=2)
                evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)
                bias_line = plt.Line2D([], [], color='red', linestyle='solid', linewidth=2)
                legend = ax.legend(
                    [tuple(lines), 
                     evolving_union_line,
                     stationary_line,
                     bias_line],
                    ['evolving outlines',
                     'evolving outlines union',
                     'stationary outline',
                     'bias (evolving − stationary)'],
                    handlelength=3,
                    handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
                    fontsize=12,
                    loc='upper center')

            # Format axes
            ax.xaxis.set_major_formatter(year_interval_formatter())
            ax.xaxis.set_major_locator(mdates.YearLocator())
            ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))
            ax.set_xlabel('Year')
            ax.set_title(lake_name)

            # Set x and y axes limit
            ax.set_xlim(cyc_start_datetimes[0], cyc_end_datetimes[-1])
            ax.set_ylim(y_min, y_max)    

            # Handle y-axis labels and ticks
            if idx % 3 == 0:  # Leftmost column
                ax.set_ylabel('cumulative dV [km$^3$]')
            else:  # Middle and right columns
                ax.set_yticklabels([])
            
            # Handle x-axis labels
            # Calculate if this is the last plot in its column
            current_row = (idx // 3) + 1
            current_col = idx % 3
            is_last_in_column = True
            for next_idx in range(idx + 1, n_plots):
                if next_idx % 3 == current_col:  # Same column
                    is_last_in_column = False
                    break
            
            if not is_last_in_column:
                ax.set_xticklabels([])
                ax.set_xlabel('')
            else:
                ax.set_xlabel('Year')
            
            ax.set_title(lake_name)
        
        # Format last plot (combined data)
        last_ax = axes[-1]
        last_col = (n_plots - 1) % 3

        # Plot combined data
        # Combine all dataframes by summing values for each timestamp
        combined_evolving = pd.concat(evolving_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        combined_stationary = pd.concat(stationary_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        combined_union = pd.concat(evolving_union_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        
        dates = mdates.date2num(combined_evolving['mid_pt_datetime'])
        
        # Plot stationary outline
        stationary_cumsum = np.cumsum(np.divide(combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
        last_ax.plot(dates, stationary_cumsum, color=stationary_color, label='Stationary', linewidth=2)
        last_ax.scatter(dates, stationary_cumsum, color=stationary_color, s=5)

        # Plot evolving outlines union
        union_cumsum = np.cumsum(np.divide(combined_union['stationary_outline_dV_corr (m^3)'], 1e9))
        last_ax.plot(dates, union_cumsum, color='k', linestyle='dotted', label='Union', linewidth=2)
        last_ax.scatter(dates, union_cumsum, color='k', s=5)

        # Plot evolving outlines (multi-colored line)
        evolving_cumsum = np.cumsum(np.divide(combined_evolving['evolving_outlines_dV_corr (m^3)'], 1e9))
        points = np.array([dates, evolving_cumsum]).T.reshape(-1, 1, 2)
        segments = np.concatenate([points[:-1], points[1:]], axis=1)
        lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
        lc.set_array(dates)
        lc.set_linewidth(2)
        last_ax.add_collection(lc)
        last_ax.scatter(dates, evolving_cumsum, c=dates, cmap=cmap, norm=norm, s=9)

        # Plot bias
        bias_cumsum = np.cumsum(np.divide(
            combined_evolving['evolving_outlines_dV_corr (m^3)'] - 
            combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
        last_ax.plot(dates, bias_cumsum, color='r', label='Bias', linewidth=2)
        last_ax.scatter(dates, bias_cumsum, color='r', s=5)

        # Set axes limits for combined plot
        last_ax.set_xlim(cyc_start_datetimes[0], cyc_end_datetimes[-1])
        last_ax.set_ylim(y_min, y_max)
        last_ax.axhline(0, color='k', linestyle='--')
        
        # Set y-axis formatting for combined plot
        if last_col == 0:  # Leftmost column
            last_ax.set_ylabel('Cumulative dV [km$^3$]')
        else:
            last_ax.set_yticklabels([])
        
        # Always show x-axis labels for the combined plot as it's the last one
        last_ax.set_xlabel('Year')
        last_ax.xaxis.set_major_formatter(year_interval_formatter())
        last_ax.xaxis.set_major_locator(mdates.YearLocator())
        last_ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))
        last_ax.set_title('Summed')

        # Save the figure using the group name
        sanitized_group_name = group_name.replace(' ', '_').replace('/', '_')
        plt.savefig(f'{OUTPUT_DIR}/lake_group_dV_plots/{sanitized_group_name}.jpg', 
                   dpi=300, bbox_inches='tight')
        plt.close()

        # Clear output
        clear_output(wait=True)

def get_overall_y_limits(evolving_geom_calcs_dfs, stationary_geom_calcs_dfs, evolving_union_geom_calcs_dfs):
    """
    Calculate overall y-axis limits for all lake volume plots based on three types of geometric calculations.
    
    Parameters:
    -----------
    evolving_geom_calcs_dfs : list of pandas.DataFrame
        List of dataframes containing evolving outline calculations
    stationary_geom_calcs_dfs : list of pandas.DataFrame
        List of dataframes containing stationary outline calculations
    evolving_union_geom_calcs_dfs : list of pandas.DataFrame
        List of dataframes containing evolving union calculations
        
    Returns:
    --------
    tuple : (y_min, y_max)
        The minimum and maximum y-axis values with a 5% buffer
    """
    all_y_values = []
    
    # Process each lake's data
    for evolving_df, stationary_df, union_df in zip(evolving_geom_calcs_dfs, 
                                                   stationary_geom_calcs_dfs,
                                                   evolving_union_geom_calcs_dfs):
        # Calculate cumulative values for all time series
        stationary_cumsum = np.cumsum(np.divide(stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
        evolving_cumsum = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'], 1e9))
        union_cumsum = np.cumsum(np.divide(union_df['stationary_outline_dV_corr (m^3)'], 1e9))
        bias_cumsum = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'] - 
                                        stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
        
        # Extend list with all values
        all_y_values.extend(stationary_cumsum)
        all_y_values.extend(evolving_cumsum)
        all_y_values.extend(union_cumsum)
        all_y_values.extend(bias_cumsum)
    
    # Also include the combined plot values if there are any lakes
    if evolving_geom_calcs_dfs:
        # Combine all dataframes by summing values for each timestamp
        combined_evolving = pd.concat(evolving_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        combined_stationary = pd.concat(stationary_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        combined_union = pd.concat(evolving_union_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
        
        # Calculate cumulative sums for combined data
        stationary_cumsum = np.cumsum(np.divide(combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
        evolving_cumsum = np.cumsum(np.divide(combined_evolving['evolving_outlines_dV_corr (m^3)'], 1e9))
        union_cumsum = np.cumsum(np.divide(combined_union['stationary_outline_dV_corr (m^3)'], 1e9))
        bias_cumsum = np.cumsum(np.divide(combined_evolving['evolving_outlines_dV_corr (m^3)'] - 
                                        combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
        
        all_y_values.extend(stationary_cumsum)
        all_y_values.extend(evolving_cumsum)
        all_y_values.extend(union_cumsum)
        all_y_values.extend(bias_cumsum)
    
    # Calculate limits with a small buffer (5% of range)
    y_min = min(all_y_values)
    y_max = max(all_y_values)
    y_range = y_max - y_min
    buffer = y_range * 0.05
    
    return y_min - buffer, y_max + buffer

def year_interval_formatter(interval=2, start_year=2012):
    '''
    Create custom formatter that labels years at specified intervals
    
    Parameters:
    -----------
    interval : int, default=2
        Interval between labeled years (e.g., 2 for every 2 years, 4 for every 4 years)
    start_year : int, optional
        Starting year for the interval. If None, uses modulo arithmetic.
        If provided, labels years that are start_year + n*interval
    
    Returns:
    --------
    function : formatter function for matplotlib
    
    Examples:
    ---------
    # Every 2 years (even years): 2012, 2014, 2016, 2018, 2020, 2022, 2024
    formatter = year_interval_formatter(interval=2)
    
    # Every 4 years starting from 2012: 2012, 2016, 2020, 2024
    formatter = year_interval_formatter(interval=4, start_year=2012)
    
    # Every 4 years using modulo (years divisible by 4): 2012, 2016, 2020, 2024
    formatter = year_interval_formatter(interval=4)

    
    # Usage examples:
    # For specific case (2012, 2016, 2020, 2024):
    formatter = year_interval_formatter(interval=4, start_year=2012)
    
    # For every 4 years using modulo:
    formatter = year_interval_formatter(interval=4)
    
    # For every 2 years using modulo:
    formatter = year_interval_formatter(interval=2)
    '''
    def formatter_func(x, pos):
        date = mdates.num2date(x)
        year = date.year
        
        if start_year is not None:
            # Use specific starting year and interval
            if (year - start_year) % interval == 0 and year >= start_year:
                return date.strftime('%Y')
        else:
            # Use modulo arithmetic
            if year % interval == 0:
                return date.strftime('%Y')
        
        return ''
    
    return formatter_func

# Import datasets

In [None]:
# Import subglacial lake outlines 
stationary_outlines_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/stationary_outlines_gdf.geojson')
reexamined_stationary_outlines_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/reexamined_stationary_outlines_gdf.geojson')
evolving_outlines_union_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/evolving_outlines_union_gdf.geojson')

In [None]:
# Import CryoSat-2 SARIn mode mask
# See 0_preprocess_data.ipynb for data source and pre-processing steps
gdf_SARIn_3_1 = gpd.read_file('output/CryoSat2_SARIn_mode_masks/gdf_SARIn_3_1.geojson')
gdf_SARIn_3_1_3_6_diff= gpd.read_file('output/CryoSat2_SARIn_mode_masks/gdf_SARIn_3_1_3_6_diff.geojson')

In [None]:
# Import cyc_dates
cyc_dates = pd.read_csv('output/cycle_dates.csv', parse_dates=['cyc_start_datetimes', 'cyc_end_datetimes'])

# Store the cyc_dates columns as a np array with datetime64[ns] data type
cyc_start_datetimes = [np.datetime64(ts) for ts in cyc_dates['cyc_start_datetimes']]
cyc_end_datetimes = [np.datetime64(ts) for ts in cyc_dates['cyc_end_datetimes']]

In [None]:
# Import MODIS Mosaic of Antarctica (MOA) surface imagery
# https://nsidc.org/data/nsidc-0730/versions/1
moa_highres = DATA_DIR + '/moa125_2014_hp1_v01.tif' 
moa_highres_da = rioxarray.open_rasterio(moa_highres)

In [None]:
# MODIS MOA 2014 coastline and grounding line
# https://nsidc.org/data/nsidc-0730/versions/1
shp = DATA_DIR + '/moa2014_coastline_v01.shp' 
moa_2014_coastline = gpd.read_file(shp)
shp = DATA_DIR + '/moa2014_grounding_line_v01.shp' 
moa_2014_groundingline = gpd.read_file(shp)

# Results and discussion

In [None]:
# How many previously identified lakes were analyzed?
print(len(stationary_outlines_gdf), 'lakes reanalyzed')
print(len(reexamined_stationary_outlines_gdf), 
    'lakes analyzed in revised inventory due to Site_B and Site_C being combined into Site_BC')

In [None]:
# How many lakes are missing CryoSat-2 SARIn coverage?
print(stationary_outlines_gdf[stationary_outlines_gdf['CS2_SARIn_start'] == '<NA>'].shape[0])
print(reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['CS2_SARIn_start'] == '<NA>'].shape[0])

In [None]:
# How many lakes have CryoSat-2 SARIn coverage?
print(stationary_outlines_gdf[stationary_outlines_gdf['CS2_SARIn_start'] != '<NA>'].shape[0])
print(reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['CS2_SARIn_start'] != '<NA>'].shape[0])

In [None]:
# How many lakes have CryoSat-2 SARIn coverage from the start of the mission?
print(stationary_outlines_gdf[stationary_outlines_gdf['CS2_SARIn_start'] == '2010.5'].shape[0])
print(reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['CS2_SARIn_start'] == '2010.5'].shape[0])

In [None]:
# How many lakes have CryoSat-2 SARIn coverage starting when LRM/SARIn boundary moved inland?
print(stationary_outlines_gdf[stationary_outlines_gdf['CS2_SARIn_start'] == '2013.75'].shape[0])
print(reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['CS2_SARIn_start'] == '2013.75'].shape[0])

In [None]:
# How many lakes exhibit evolving outlines?
print(len([f for f in os.listdir('output/lake_outlines/evolving_outlines') if f.endswith('.geojson')]))

In [None]:
# How many lakes exhibit no evolving outlines?
print(len([f for f in os.listdir('output/lake_outlines/evolving_outlines') if f.endswith('.txt')]))
print(len(stationary_outlines_gdf) - len([f for f in os.listdir('output/lake_outlines/evolving_outlines') if f.endswith('.geojson')]))
print(len(reexamined_stationary_outlines_gdf) - len([f for f in os.listdir('output/lake_outlines/evolving_outlines') if f.endswith('.geojson')]))

## Evolving outlines

In [None]:
# Analyze lake extensions
lakes_with_extensions, extension_results = quantify_lake_extensions(
    reexamined_stationary_outlines_gdf, 
    evolving_outlines_union_gdf, 
    area_threshold=0.25)
summary = generate_extension_summary(extension_results, km2=True)
print(summary)

In [None]:
# How many lakes have no extension beyond stationary outline?
len(extension_results[extension_results['relative_extension'] == 0])

In [None]:
# View lakes that exceed the area threshold
extension_results[extension_results['relative_extension'] > 0.25].sort_values('relative_extension', ascending=False)

## Active area and carbon export

### Active area

In [None]:
# Read in continental summation geometric calculation csv files - evolving outlines (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

# Store dataframes from dfs list for code readability
superset_IS2_evolving_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - stationary outlines (all lakes)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

superset_IS2_stationary_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - evolving union (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_evolving_union_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

In [None]:
# Plot evolving outlines time series as multi-colored line using LineCollection from points/segments 
print(np.round(superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'].max() / 1e6, 0))

# Plot stationary outlines
print(np.round(superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'].max() / 
               superset_IS2_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] * 100, 0), '%')

# 
print(np.round(superset_IS2_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 0))

### Dissolved inorganic carbon (DIC) export estimates

In [None]:
# We use the microbial respiration rate  of 1.4 x 10^4 g C d-1 measured directly in Mercer Subglacial Lake (SLM; Venturelli and others, 2023) 
# as an estimate of dissolved inorganic carbon (DIC) flux from saturated sediment to the subglacial water column via respiration
# doi.org/10.1029/2022AV000846
resp_rate = 1.4 * 10**4  # g C d-1 in SLM
print('respiration rate:', resp_rate, 'g C d-1')

# Estimate DIC export from SLM per CryoSat-2/ICESat-2 satellite repeat cycle (91 days) time step
SLM_DIC_export_per_step = resp_rate * 91
print('DIC export per time step at SLM:', SLM_DIC_export_per_step, 'g C / 91 d')  # g C / 91 d in SLM

In [None]:
# Find the stationary areas
SLM_stationary_area = reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['name'] == 'MercerSubglacialLake']['area (m^2)'].values[0]
SLM_evolving_union_area = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == 'MercerSubglacialLake']['area (m^2)'].values[0]
print('SLM stationary outline area:', np.round(SLM_stationary_area/1e6,1), 'km^2')
print('SLM updated stationary area:', np.round(SLM_evolving_union_area/1e6,1), 'km^2')

In [None]:
# Find the per area DIC export values using stationary outline
SLM_stationary_per_area_DIC_export = resp_rate / SLM_stationary_area
SLM_stationary_per_area_per_step_DIC_export = resp_rate / SLM_stationary_area * 91

print('SLM stationary outline per area DIC export:', np.round(SLM_stationary_per_area_DIC_export,6), 'g C d-1 m-2')
print('SLM stationary outline per area per time step DIC export:', np.round(SLM_stationary_per_area_per_step_DIC_export,6), 'g C step-1 m-2')

In [None]:
# Find the per area per time step DIC export values using evolving union outline
SLM_evolving_union_per_area_DIC_export = resp_rate / SLM_evolving_union_area
SLM_evolving_union_per_area_per_step_DIC_export = resp_rate / SLM_evolving_union_area * 91

print('SLM updated stationary outline per area DIC export:', np.round(SLM_evolving_union_per_area_DIC_export,6), 'g C d-1 m-2')
print('SLM updated stationary outline per area per time step DIC export:', np.round(SLM_evolving_union_per_area_per_step_DIC_export,6), 'g C step-1 m-2')

In [None]:
# Next we estimate the per area respiration using evolving outlines

In [None]:
# Plot dV time series to view filling/draining history

# Load SLM geometries dataframe
SLM_evolving_outlines_geom = pd.read_csv('/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/evolving_outlines_geom_calc/forward_fill/MercerSubglacialLake.csv')
SLM_stationary_outline_geom = pd.read_csv('/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/MercerSubglacialLake.csv')
SLM_updated_stationary_outline_geom = pd.read_csv('/home/jovyan/1_evolving_lakes/Sauthoff-2025-GRL/output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes/MercerSubglacialLake.csv')

# Convert to datetimes
SLM_evolving_outlines_geom['mid_pt_datetime'] = pd.to_datetime(SLM_evolving_outlines_geom['mid_pt_datetime'])

# Define x and y for plotting
x = SLM_evolving_outlines_geom['mid_pt_datetime']
y = (SLM_evolving_outlines_geom['evolving_outlines_dV_corr (m^3)'] / 1e9)  # km^3
y2 = np.cumsum(SLM_evolving_outlines_geom['evolving_outlines_dV_corr (m^3)'] / 1e9)  # km^3
y3 = (SLM_stationary_outline_geom['stationary_outline_dV_corr (m^3)'] / 1e9)  # km^3
y4 = np.cumsum(SLM_stationary_outline_geom['stationary_outline_dV_corr (m^3)'] / 1e9)  # km^3
y5 = (SLM_updated_stationary_outline_geom['stationary_outline_dV_corr (m^3)'] / 1e9)  # km^3
y6 = np.cumsum(SLM_updated_stationary_outline_geom['stationary_outline_dV_corr (m^3)'] / 1e9)  # km^3

# Create the plot
plt.figure(figsize=(15, 6))  # Set figure size

# Get current axis
ax = plt.gca()

# Define masks for shading
positive_mask = y > 0
negative_mask = y < 0

# +1 = positive, -1 = negative, 0 = zero
sign_series = positive_mask.astype(int) - negative_mask.astype(int)

# Find change points
changes = np.diff(sign_series, prepend=sign_series.iloc[0])
change_points = np.where(changes != 0)[0]
change_points = np.concatenate(([0], change_points, [len(x)]))

# Compute half time step for proper discrete alignment
if len(x) > 1:
    dt = (x.iloc[1] - x.iloc[0]) / 2
else:
    dt = pd.Timedelta(days=0)  # fallback

# Shaded regions
for i in range(len(change_points) - 1):
    start_idx = change_points[i]
    end_idx = change_points[i + 1]

    # Determine color based on start of interval
    val = sign_series.iloc[start_idx]
    if val > 0:
        color = "blue"
    elif val < 0:
        color = "red"
    else:
        color = "gray"

    # Use start_idx-1 to fully cover previous regime for perfect alignment
    plt.axvspan(x.iloc[start_idx-1], x.iloc[end_idx-1],
        alpha=0.2, color=color, zorder=0)
    
# Plot both lines (with fixed colors)
plt.plot(x, y, 'k-', linewidth=2, label='evolving outlines $dV$ ($km^3$)')
plt.plot(x, y2, 'k--', linewidth=2, label='evolving outlines cumulative $dV$ ($km^3$)')
plt.plot(x, y3, 'b-', linewidth=2, label='stationary outline $dV$ ($km^3$)')
plt.plot(x, y4, 'b--', linewidth=2, label='stationary outline cumulative $dV$ ($km^3$)')
plt.plot(x, y5, 'g-', linewidth=2, label='updated stationary outline $dV$ ($km^3$)')
plt.plot(x, y6, 'g--', linewidth=2, label='updated stationary outline cumulative $dV$ ($km^3$)')

# Format the x-axis to show years as major ticks and quarters as minor ticks
# Set major ticks to years
ax.xaxis.set_major_locator(mdates.YearLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Format as year only

# Set minor ticks to quarters
ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1, 4, 7, 10]))  # Jan, Apr, Jul, Oct

# Add gridlines for better readability
ax.grid(True, which='major', linestyle='-', alpha=0.7)
ax.grid(True, which='minor', linestyle='--', alpha=0.3)

# Add reference line at y=0
plt.axhline(y=0, color='black', linestyle='-', lw=0.5)

# Add labels, title, and legend
plt.xlabel('Year', fontsize=12)
plt.ylabel('$dV$ ($km^3$)', fontsize=12)
ax.set_title('Mercer Subglacial Lake $dV$ time series')

ax.set_xlim(x.iloc[0], x.iloc[-1])

# Add proxy patches for shaded regions
pos_patch = mpatches.Patch(color='blue', alpha=0.2, label='filling')
neg_patch = mpatches.Patch(color='red', alpha=0.2, label='draining')
zero_patch = mpatches.Patch(color='gray', alpha=0.2, label='zero cumulative $dV$')

# Add legend with both line plots and shaded patches
plt.legend(handles=[pos_patch, neg_patch, zero_patch] + ax.get_legend_handles_labels()[0],
           loc='best', bbox_to_anchor=(1, 0.5, 0.25, 0.25))  # (x, y, width, height)

# Show the plot
plt.tight_layout()
plt.show()

We observed a 3.75 year filling period prior to the 2018-2019 subglacial lake access campaign (2014.5 - 2018.25) cf. 4.5 years reported by Venturelli and others (2023).

In [None]:
# Filter for date range 2014.75 to 2018.25
# Filter for SLM fill period prior to Venturelli et al., 2023 sampling (2014.5 to 2018.25)
start_date = pd.to_datetime('2014-07-01')  # ~2014.5
end_date = pd.to_datetime('2018-04-01')    # ~2018.25
SLM_evolving_outlines_geom_filling_period = SLM_evolving_outlines_geom[(SLM_evolving_outlines_geom['mid_pt_datetime'] >= start_date) & (SLM_evolving_outlines_geom['mid_pt_datetime'] <= end_date)]

In [None]:
# Method 1: Conservation-based integration

# Respiration rate [g C d-1]
resp_rate = resp_rate  

# Duration of each step [days/time step]
time_step_days = 91  

# Total filling period length [days = (years in filling period) * (days/year)]
T_days = 3.75 * 365.25  

# Total carbon respired over 3.75-year filling period [g C]
C_total = resp_rate * T_days 

# Area at each time step [m^2]
areas = SLM_evolving_outlines_geom_filling_period['evolving_outlines_area (m^2)'].values

# Total area–time exposure [m^2 d]
A_time_total = np.sum(areas * time_step_days)

# Conservation-based per-area per-day rate [g C d-1 m-2]
rate_method1 = C_total / A_time_total

print('Method 1:', np.round(rate_method1, 6), 'g C d-1 m-2')

In [None]:
# Method 2 - Mean of per-step rates
rate_method2 = np.average(resp_rate / SLM_evolving_outlines_geom_filling_period['evolving_outlines_area (m^2)'])
print('Method 2:', np.round(rate_method2,6), 'g C d-1 m-2')

In [None]:
# Method 3 - Mean area approach
rate_method3 = resp_rate / np.average(SLM_evolving_outlines_geom_filling_period['evolving_outlines_area (m^2)'])
print('Method 3:', np.round(rate_method3,6), 'g C d-1 m-2')

In [None]:
# Convert per-area respiration rate to per-area, per-time-step respiration rate to be compatible 
# [g C m-2 time step-1 = (g C d-1 m-2) * (days/time step)]
SLM_evolving_per_area_per_step_DIC_export_filling_period = rate_method1 * time_step_days

## Volume change

### Bias

In [None]:
# Set the directory paths for the new folder structure
evolving_folder = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill/'
stationary_folder = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/'

# Files to ignore in our analysis
files_to_ignore = [
    'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv', 
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_noCS2_IS2_lakes_sum.csv'
]

# Get lists of CSV files in each directory, excluding the files to ignore
evolving_csv_files = [f for f in os.listdir(evolving_folder) 
                     if f.endswith('.csv') and f not in files_to_ignore]
stationary_csv_files = [f for f in os.listdir(stationary_folder) 
                       if f.endswith('.csv') and f not in files_to_ignore]

# Find common lake names (files that exist in both folders)
evolving_lake_names = {os.path.splitext(f)[0] for f in evolving_csv_files}
stationary_lake_names = {os.path.splitext(f)[0] for f in stationary_csv_files}
common_lake_names = evolving_lake_names.intersection(stationary_lake_names)

# Initialize DataFrame to store lake-level results
lake_results_df = pd.DataFrame({
    'lake_name': list(common_lake_names),
    'greater_than_125_percent': False,
    'less_than_75_percent': False,
    'both_conditions': False,
    'either_condition': False,
    'total_time_steps': 0,
    'valid_data_found': False
})

# Create a list to store the combined data for all lakes (for time step analysis)
all_combined_data = []

# Loop through each lake and process data
for idx, lake_name in enumerate(lake_results_df['lake_name']):
    try:
        # Read the evolving lake data
        evolving_file_path = os.path.join(evolving_folder, f"{lake_name}.csv")
        evolving_df = pd.read_csv(evolving_file_path)
        
        # Read the stationary lake data
        stationary_file_path = os.path.join(stationary_folder, f"{lake_name}.csv")
        stationary_df = pd.read_csv(stationary_file_path)
        
        # Identify the date column and volume column in each dataframe
        evolving_date_col = None
        evolving_vol_col = None
        
        for col in evolving_df.columns:
            if any(date_indicator in col.lower() for date_indicator in ['date', 'time', 'day']):
                evolving_date_col = col
            if 'dv_corr' in col.lower():
                evolving_vol_col = col
        
        # For stationary dataframe
        stationary_date_col = None
        stationary_vol_col = None
        
        for col in stationary_df.columns:
            if any(date_indicator in col.lower() for date_indicator in ['date', 'time', 'day']):
                stationary_date_col = col
            if 'dv_corr' in col.lower():
                stationary_vol_col = col
        
        # Skip if we couldn't identify the necessary columns
        if not all([evolving_date_col, evolving_vol_col, stationary_date_col, stationary_vol_col]):
            print(f"Skipping {lake_name} - could not identify all required columns")
            continue
        
        # Create standardized dataframes for merging
        evolving_std_df = pd.DataFrame({
            'lake_name': lake_name,
            'date': evolving_df[evolving_date_col],
            'evolving_outlines_dV_corr (m^3)': evolving_df[evolving_vol_col]
        })
        
        stationary_std_df = pd.DataFrame({
            'lake_name': lake_name,
            'date': stationary_df[stationary_date_col],
            'stationary_outline_dV_corr (m^3)': stationary_df[stationary_vol_col]
        })
        
        # Merge the dataframes on lake_name and date
        merged_df = pd.merge(evolving_std_df, stationary_std_df, on=['lake_name', 'date'])
        
        # Filter out rows where evolving volume is 0
        filtered_df = merged_df[merged_df['evolving_outlines_dV_corr (m^3)'] != 0]
        
        # Skip lakes with no valid data
        if filtered_df.empty:
            print(f"Skipping {lake_name} - no valid data after filtering")
            continue
        
        # Update lake-level results
        lake_results_df.loc[idx, 'valid_data_found'] = True
        lake_results_df.loc[idx, 'total_time_steps'] = len(filtered_df)
        
        # Check conditions for lake-level analysis
        condition_greater_than = (filtered_df['evolving_outlines_dV_corr (m^3)'] > 
                                1.25 * filtered_df['stationary_outline_dV_corr (m^3)'])
        condition_less_than = (filtered_df['evolving_outlines_dV_corr (m^3)'] < 
                             0.75 * filtered_df['stationary_outline_dV_corr (m^3)'])
        
        # Update DataFrame with lake-level results
        lake_results_df.loc[idx, 'greater_than_125_percent'] = condition_greater_than.any()
        lake_results_df.loc[idx, 'less_than_75_percent'] = condition_less_than.any()
        lake_results_df.loc[idx, 'both_conditions'] = condition_greater_than.any() and condition_less_than.any()
        
        # Calculate either condition
        condition_either = condition_greater_than | condition_less_than
        lake_results_df.loc[idx, 'either_condition'] = condition_either.any()
        
        # Add to combined data for time step analysis
        all_combined_data.append(filtered_df)
        
    except Exception as e:
        print(f"Error processing {lake_name}: {e}")
        continue

# ==================== LAKE-LEVEL ANALYSIS ====================
print("\n" + "="*60)
print("LAKE-LEVEL ANALYSIS RESULTS")
print("="*60)

# Filter out lakes with no valid data for final analysis
valid_lake_results = lake_results_df[lake_results_df['valid_data_found']]

# Calculate proportions using vectorized operations
total_valid_lakes = len(valid_lake_results)
print(f"Valid lakes analyzed: {total_valid_lakes}")

if total_valid_lakes > 0:
    lake_proportions = {
        'greater_than_125_percent': valid_lake_results['greater_than_125_percent'].sum() / total_valid_lakes,
        'less_than_75_percent': valid_lake_results['less_than_75_percent'].sum() / total_valid_lakes,
        'both_conditions': valid_lake_results['both_conditions'].sum() / total_valid_lakes,
        'either_condition': valid_lake_results['either_condition'].sum() / total_valid_lakes
    }
    
    # Print lake-level results
    print(f"Lakes meeting condition 1 (greater than 125%): {valid_lake_results['greater_than_125_percent'].sum()} ({lake_proportions['greater_than_125_percent']:.2f})")
    print(f"Lakes meeting condition 2 (less than 75%): {valid_lake_results['less_than_75_percent'].sum()} ({lake_proportions['less_than_75_percent']:.2f})")
    print(f"Lakes meeting either condition: {valid_lake_results['either_condition'].sum()} ({lake_proportions['either_condition']:.2f})")
    print(f"Lakes meeting both conditions: {valid_lake_results['both_conditions'].sum()} ({lake_proportions['both_conditions']:.2f})")
    
else:
    print("No valid lakes found for analysis")

# ==================== TIME STEP ANALYSIS ====================
print("\n" + "="*60)
print("TIME STEP ANALYSIS RESULTS")
print("="*60)

# Concatenate all the combined data into a single dataframe
if all_combined_data:
    time_step_df = pd.concat(all_combined_data, ignore_index=True)
    
    # Define the separate conditions, using absolute values
    condition1 = (time_step_df['evolving_outlines_dV_corr (m^3)'].abs() > 
                  1.25 * time_step_df['stationary_outline_dV_corr (m^3)'].abs())
    condition2 = (time_step_df['evolving_outlines_dV_corr (m^3)'].abs() < 
                  0.75 * time_step_df['stationary_outline_dV_corr (m^3)'].abs())
    
    # Condition where either condition is met
    either_condition = (condition1 | condition2)
    
    # Condition where neither condition is met
    neither_condition = ~(condition1 | condition2)
    
    # Condition where both conditions are met
    both_conditions = condition1 & condition2
    
    # Count the number of rows meeting each condition
    num_rows_condition1 = condition1.sum()
    num_rows_condition2 = condition2.sum()
    num_rows_either_condition = either_condition.sum()
    num_rows_neither_condition = neither_condition.sum()
    num_rows_both_conditions = both_conditions.sum()
    
    # Calculate proportions based on the length of time_step_df
    total_time_steps = len(time_step_df)
    proportion_condition1 = num_rows_condition1 / total_time_steps if total_time_steps > 0 else 0
    proportion_condition2 = num_rows_condition2 / total_time_steps if total_time_steps > 0 else 0
    proportion_either_condition = num_rows_either_condition / total_time_steps if total_time_steps > 0 else 0
    proportion_neither_condition = num_rows_neither_condition / total_time_steps if total_time_steps > 0 else 0
    proportion_both_conditions = num_rows_both_conditions / total_time_steps if total_time_steps > 0 else 0
    
    # Calculate sum of proportions as a sanity check
    sum_of_proportions = np.sum([proportion_condition1, proportion_condition2, proportion_both_conditions, proportion_neither_condition])
    
    # Print out the time step results
    print(f"Valid time steps analyzed: {total_time_steps}")
    print(f"Proportion of time steps meeting condition 1 (greater than 125%): {np.round(proportion_condition1, 2)}")
    print(f"Proportion of time steps meeting condition 2 (less than 75%): {np.round(proportion_condition2, 2)}")
    print(f"Proportion of time steps meeting either conditions: {np.round(proportion_either_condition, 2)}")
    print(f"Proportion of time steps meeting neither condition: {np.round(proportion_neither_condition, 2)}")
    print(f"Proportion of time steps meeting both conditions: {np.round(proportion_both_conditions, 2)}")
    print(f"Sum of proportions: {np.round(sum_of_proportions, 2)}")
    
else:
    print("Failed to create any valid combined data for time step analysis.")

# ==================== SUMMARY ====================
print("\n" + "="*60)
print("ANALYSIS SUMMARY")
print("="*60)

if total_valid_lakes > 0 and all_combined_data:
    print(f"Analysis completed successfully:")
    print(f"  • {total_valid_lakes} lakes analyzed")
    print(f"  • {len(time_step_df)} total time steps analyzed")
    print(f"\nKey findings:")
    print(f"  • {lake_proportions['either_condition']:.1%} of lakes have at least one time step with dV differences ±25%")
    print(f"  • {proportion_either_condition:.1%} of time steps show dV differences ±25%")
else:
    print("Analysis could not be completed due to data issues.")

### Explaining continental sum trends

In [None]:
directory = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'

def create_interactive_plot(directory, stationary_outlines_gdf):
    # Load and process all lake data
    dfs = []
    lake_names = []
    
    for file in os.listdir(directory):
        # Exclude continental summation files
        if "subset" in file or "superset" in file:
            continue

        if file.endswith('.csv'):
            file_path = os.path.join(directory, file)
            df = pd.read_csv(file_path)
            lake_name = os.path.splitext(file)[0]
            
            # Process each lake's data
            df['lake_name'] = lake_name
            df['datetime'] = pd.to_datetime(df['mid_pt_datetime'])
            df['cumsum_vol'] = np.cumsum(df['evolving_outlines_dV_corr (m^3)']/1e9)
            dfs.append(df)
            lake_names.append(lake_name)
    
    # Combine all dataframes
    combined_df = pd.concat(dfs, ignore_index=True)
    
    # Create plot using hvplot
    plot = combined_df.hvplot.line(
        x='datetime',
        y='cumsum_vol',
        by='lake_name',
        width=800,
        height=400,
        title='Lake Volume Changes Over Time',
        xlabel='Date',
        ylabel='Cumulative Volume Change (km³)',
        grid=True,
        legend=False
    )
    
    return plot

# Usage:
plot = create_interactive_plot(directory, stationary_outlines_gdf)
plot  # Display in notebook

In [None]:
directory = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'

def create_interactive_plot(directory, stationary_outlines_gdf):
    # Load and process all lake data
    dfs_subset_CS2_IS2_lakes = []
    lake_names = []
    
    for file in os.listdir(directory):
        if file.endswith('.csv'):
            file_path = os.path.join(directory, file)
            df = pd.read_csv(file_path)
            lake_name = os.path.splitext(file)[0]
            
            # Process each lake's data
            df['lake_name'] = lake_name
            df['datetime'] = pd.to_datetime(df['mid_pt_datetime'])
            df['cumsum_vol'] = np.cumsum(np.divide(df['evolving_outlines_dV_corr (m^3)'], 1e9))
            dfs_subset_CS2_IS2_lakes.append(df)
            lake_names.append(lake_name)
    
    # Combine all dataframes
    combined_df = pd.concat(dfs_subset_CS2_IS2_lakes, ignore_index=True)
    
    # Create the plot using Dataset and Curve
    dataset = hv.Dataset(combined_df)
    curves = dataset.to(hv.Curve, 
                       kdims=['datetime'], 
                       vdims=['cumsum_vol', 'lake_name'],
                       groupby='lake_name')
    
    # Apply options to the plot
    plot = curves.opts(
        width=800,
        height=400,
        tools=['hover'],
        title='Lake volume changes over time',
        xlabel='Date',
        ylabel='Cumulative volume change (km³)',
        show_grid=True,
        toolbar='above'
    )
    
    return plot

# Usage:
plot = create_interactive_plot(directory, stationary_outlines_gdf)
plot  # Display in notebook

In [None]:
# Investigate the lakes driving the deviation of evolving and stationary 

evolving_directory = 'output/geometric_calcs/evolving_outlines_geom_calc'
stationary_directory = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes'

def process_lake_data(directory, is_evolving=True):
    dfs = {}
    
    for file in os.listdir(directory):
        # Exclude continental summation files
        if "subset" in file or "superset" in file:
            continue

        if file.endswith('.csv'):
            file_path = os.path.join(directory, file)
            df = pd.read_csv(file_path)
            lake_name = os.path.splitext(file)[0]
            
            # Process each lake's data
            df['lake_name'] = lake_name
            df['datetime'] = pd.to_datetime(df['mid_pt_datetime'])
            
            # Calculate cumulative volume based on directory type
            if is_evolving:
                df['cumsum_vol'] = np.cumsum(df['evolving_outlines_dV_corr (m^3)'])
            else:
                df['cumsum_vol'] = np.cumsum(df['stationary_outline_dV_corr (m^3)'])
            
            dfs[lake_name] = df
    
    return dfs

def create_interactive_plot(evolving_directory, stationary_directory):
    # Load data from both directories
    evolving_dfs = process_lake_data(evolving_directory, is_evolving=True)
    stationary_dfs = process_lake_data(stationary_directory, is_evolving=False)
    
    # Initialize lists to store processed dataframes
    plot_dfs = []
    
    # Process common lakes
    common_lakes = set(evolving_dfs.keys()) & set(stationary_dfs.keys())
    for lake_name in common_lakes:
        evolving_df = evolving_dfs[lake_name].copy()
        stationary_df = stationary_dfs[lake_name].copy()
        
        # Calculate difference (evolving - stationary)
        merged_df = pd.merge(
            evolving_df[['datetime', 'cumsum_vol']], 
            stationary_df[['datetime', 'cumsum_vol']], 
            on='datetime', 
            suffixes=('_evolving', '_stationary')
        )
        merged_df['cumsum_vol'] = merged_df['cumsum_vol_evolving'] - merged_df['cumsum_vol_stationary']
        merged_df['lake_name'] = lake_name + '_difference'
        plot_dfs.append(merged_df[['datetime', 'cumsum_vol', 'lake_name']])
    
    # Process lakes only in stationary directory
    stationary_only = set(stationary_dfs.keys()) - set(evolving_dfs.keys())
    for lake_name in stationary_only:
        df = stationary_dfs[lake_name].copy()
        df['lake_name'] = lake_name + '_stationary'
        plot_dfs.append(df[['datetime', 'cumsum_vol', 'lake_name']])
    
    # Combine all dataframes
    combined_df = pd.concat(plot_dfs, ignore_index=True)
    
    # Create plot using hvplot
    plot = combined_df.hvplot.line(
        x='datetime',
        y='cumsum_vol',
        by='lake_name',
        width=800,
        height=400,
        title='Lake volume changes over time',
        xlabel='Date',
        ylabel='Cumulative volume change (km³)',
        grid=True,
        legend=False
    )
    
    return plot

# Usage:
plot = create_interactive_plot(evolving_directory, stationary_directory)
plot  # Display in notebook

# Figures

## Fig. 2

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 9,
    'axes.labelsize': 10,
    'axes.titlesize': 9,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 9,
})

In [None]:
# Boolen arg on whether to use the forward filled version of the evolving outlines
forward_fill = True
# forward_fill = False

# Select lakes to be included in plot
selected_lakes = stationary_outlines_gdf[stationary_outlines_gdf['name'].isin(['ConwaySubglacialLake', 'David_s1', 'Slessor_23'])]
desired_order = ['ConwaySubglacialLake', 'David_s1', 'Slessor_23']
stationary_outlines_gdf_filtered = gpd.GeoDataFrame(pd.concat([selected_lakes[selected_lakes['name'] == name] for name in desired_order]))

# Number of rows and columns
nrows, ncols = 4, 3

# Create a 4x3 grid of plots (4 metrics, 3 lakes per metric)
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(15, 20), constrained_layout=True)

# Define the display names for lakes
lake_names = ['Conway Subglacial Lake', 'David$_{s1}$', 'Slessor$_{23}$']

# Add titles to the top row of subplots
for col, title in enumerate(lake_names):
    axs[0, col].set_title(title, fontsize=18, pad=12)

# Define color that will be reused
stationary_outline_color  = 'darkturquoise'

for row in range(1, nrows):
    # Share y-axis within each row but not between rows
    for col in range(ncols):
        axs[row, col].sharey(axs[row, 0])

# Pick colormap and normalize to cyc_start_datetimes
cmap = plt.get_cmap('plasma', len(cyc_start_datetimes[1:]))
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                    mdates.date2num(cyc_start_datetimes[-1]))

for idx, (lake_idx, lake) in enumerate(stationary_outlines_gdf_filtered.iterrows()):
    # Select the row by index and convert it to a GeoDataFrame
    lake_gdf = stationary_outlines_gdf_filtered.loc[[lake_idx]]
    lake_name = lake_gdf['name'].iloc[0]
    stationary_outline = lake_gdf['geometry']
    print(f"\nProcessing lake: {lake_name}")
    
    # Attempt to open the evolving outlines GeoJSON file
    try:
        evolving_outlines_gdf = gpd.read_file('output/lake_outlines/evolving_outlines/{}.geojson'.format(lake_name))
    except fiona.errors.DriverError:
        print(f"File for {lake_name} not found. Skipping...")
        continue  # Skip the rest of the function if the file doesn't exist
    
    # Attempt to open the geometric calculations CSV files
    try:
        if forward_fill==True: 
            evolving_geom_calcs_df = pd.read_csv('output/geometric_calcs/evolving_outlines_geom_calc/forward_fill/{}.csv'.format(lake_name))
        elif forward_fill==False:
            print('using not forward fill')
            evolving_geom_calcs_df = pd.read_csv('output/geometric_calcs/evolving_outlines_geom_calc/{}.csv'.format(lake_name))
        evolving_union_geom_calcs_df = pd.read_csv('output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes/{}.csv'.format(lake_name))
        stationary_geom_calcs_df = pd.read_csv('output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/{}.csv'.format(lake_name))
    except FileNotFoundError:
        print(f"At least of of the geometric calculations CSV files for {lake_name} not found. Skipping...")
        continue  # Skip the rest of the function if the file doesn't exist
    
    # Convert of strings to datetime
    evolving_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_geom_calcs_df['mid_pt_datetime'])

    
    # Panel - evolving outlines ------------------------------------------------------
    
    # Find evolving and stationary outlines union for plotting extent
    evolving_stationary_union_gdf = gpd.GeoDataFrame(
        geometry=[lake_gdf.geometry.iloc[0].union(evolving_outlines_gdf.geometry.union_all())],
        crs=lake_gdf.crs)
    x_min, y_min, x_max, y_max = evolving_stationary_union_gdf.bounds.iloc[0]

    # Make plots a uniform size
    # Make x_min, y_min, x_max, and y_max define a square area centered at the original midpoints
    # Calculate the midpoints of the current bounds
    x_mid = (x_min + x_max) / 2
    y_mid = (y_min + y_max) / 2
    
    # Calculate the current spans of the x and y dimensions
    x_span = x_max - x_min
    y_span = y_max - y_min
    
    # Determine the maximum span to ensure square dimensions
    max_span = max(x_span, y_span)
    
    # Update the min and max values to match the new span, keeping the midpoint the same
    x_min = x_mid - max_span / 2
    x_max = x_mid + max_span / 2
    y_min = y_mid - max_span / 2
    y_max = y_mid + max_span / 2
    
    buffer_frac = 0.35
    x_buffer = abs(x_max-x_min)*buffer_frac
    y_buffer = abs(y_max-y_min)*buffer_frac

    # Plot MOA surface imagery
    mask_x = (moa_highres_da.x >= x_min-x_buffer) & (moa_highres_da.x <= x_max+x_buffer)
    mask_y = (moa_highres_da.y >= y_min-y_buffer) & (moa_highres_da.y <= y_max+y_buffer)
    moa_highres_da_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
    axs[0,idx].imshow(moa_highres_da_subset[0,:,:], cmap='gray', clim=[14000, 17000], extent=[x_min-x_buffer, x_max+x_buffer, y_min-y_buffer, y_max+y_buffer])
    
    # Use for loop to store each time slice as line segment to use in legend
    # And plot each evolving outline in the geodataframe color by date
    lines = []  # list of lines to be used for the legend
    for dt_idx, dt in enumerate(cyc_start_datetimes[1:]):
        x = 1; y = 1
        line, = axs[0,idx].plot(x, y, color=cmap(norm(mdates.date2num(cyc_start_datetimes[dt_idx]))))
        lines.append(line)
        
        # Filter rows that match the current time step
        evolving_outlines_gdf_dt_sub = evolving_outlines_gdf[evolving_outlines_gdf['mid_pt_datetime'] == dt]
    
        # Plotting the subset if not empty
        if not evolving_outlines_gdf_dt_sub.empty:
            evolving_outlines_gdf_dt_sub.boundary.plot(ax=axs[0,idx], 
                color=cmap(norm(mdates.date2num(cyc_start_datetimes[dt_idx]))), linewidth=1)
    
    # Plot stationary outline
    stationary_outlines_gdf['geometry'].boundary.plot(ax=axs[0,idx], color=stationary_outline_color, linewidth=2)

    # Import evolving_outlines_union_gdf and plot
    evolving_union_gdf = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == lake_name]
    evolving_union_gdf.boundary.plot(ax=axs[0,idx], color='k', linestyle='dotted', linewidth=2)

    # Plot inset map
    axIns = axs[0,idx].inset_axes([0.01, -0.01, 0.3, 0.3]) # [left, bottom, width, height] (fractional axes coordinates)
    axIns.set_aspect('equal')
    moa_2014_coastline.plot(ax=axIns, color='gray', edgecolor='k', linewidth=0.1, zorder=3)
    moa_2014_groundingline.plot(ax=axIns, color='ghostwhite', edgecolor='k', linewidth=0.1, zorder=3)
    axIns.axis('off')
    # Plot star to indicate location
    axIns.scatter(((x_max+x_min)/2), ((y_max+y_min)/2), marker='*', 
        linewidth=2, color='k', s=30, zorder=3)

    # Change polar stereographic m to km
    km_scale = 1e3
    ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
    axs[0,idx].xaxis.set_major_formatter(ticks_x)
    ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
    axs[0,idx].yaxis.set_major_formatter(ticks_y)
    
    # Set axes limits
    axs[0,idx].set(xlim=(x_min-x_buffer, x_max+x_buffer), ylim=(y_min-y_buffer, y_max+y_buffer))


    # Panel - Active area ---------------------------------------------
    
    # Plot horizontal zero line for reference
    axs[1,idx].axhline(0, color='k', linewidth=1)
    
    # Plot stationary outline and evolving outlines unary union areas
    axs[1,idx].axhline(np.divide(lake_gdf['area (m^2)'], 1e6).values, 
        color=stationary_outline_color, linestyle='solid', linewidth=2)
    axs[1,idx].axhline(np.divide(evolving_union_gdf['area (m^2)'].iloc[0], 1e6), 
        color='k', linestyle='dotted', linewidth=2)

    # Plot evolving outlines
    x = mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime'])
    y = np.divide(evolving_geom_calcs_df['evolving_outlines_area (m^2)'], 1e6)
    
    # Create points and segments for LineCollection
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    
    # Create a LineCollection, using the discrete colormap and norm
    lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
    lc.set_array(x)
    lc.set_linewidth(2)

    # Plot multi-colored line and scatter for data points
    line = axs[1,idx].add_collection(lc)
    scatter = axs[1,idx].scatter(x, y, c=x, cmap=cmap, s=9, norm=norm, zorder=2)
    
    # Get the maximum y value across all data for this lake
    if idx == 0:
        max_y = max(
            np.divide(lake_gdf['area (m^2)'], 1e6).values[0],
            np.divide(evolving_union_gdf['area (m^2)'], 1e6).values[0],
            np.divide(evolving_geom_calcs_df['evolving_outlines_area (m^2)'], 1e6).max()
        )
    
    # Set y limit with padding above the maximum value to avoid data plotting behind legend
    axs[1,idx].set_ylim(bottom=None, top=max_y * 1.3)
    
    # Panel - Cumulative dh/dt -------------------------------------------------------
    
    # Plot horizontal zero line for reference
    axs[2,idx].axhline(0, color='k', linewidth=1)

    # Plot stationary outlines off-lake secular dh
    axs[2,idx].plot(mdates.date2num(stationary_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(stationary_geom_calcs_df['stationary_outline_region_dh (m)']),
        color='lightgray', linestyle='solid', linewidth=2)
    axs[2,idx].scatter(mdates.date2num(stationary_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(stationary_geom_calcs_df['stationary_outline_region_dh (m)']),
        color='lightgray', linestyle='solid', linewidth=2, s=5)

    # Plot evolving outlines off-lake secular dh
    axs[2,idx].plot(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(evolving_union_geom_calcs_df['stationary_outline_region_dh (m)']), color='dimgray', 
            linestyle='solid', linewidth=2)
    axs[2,idx].scatter(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(evolving_union_geom_calcs_df['stationary_outline_region_dh (m)']), color='dimgray', 
            linestyle='solid', linewidth=2, s=5)
    
    # Plot stationary outline time series
    axs[2,idx].plot(mdates.date2num(stationary_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(stationary_geom_calcs_df['stationary_outline_dh_corr (m)']), 
            color=stationary_outline_color, linestyle='solid', linewidth=2)
    axs[2,idx].scatter(mdates.date2num(stationary_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(stationary_geom_calcs_df['stationary_outline_dh_corr (m)']), 
            color=stationary_outline_color, linestyle='solid', linewidth=2, s=5)

    # Plot evolving outlines time series
    x = mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime'])
    y = np.cumsum(evolving_geom_calcs_df['evolving_outlines_dh_corr (m)'])
    
    # Create points and segments for LineCollection
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    
    # Create a LineCollection, using the discrete colormap and norm
    lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
    lc.set_array(x)
    lc.set_linewidth(2)

    # Plot multi-colored line and scatter for data points
    line = axs[2,idx].add_collection(lc)
    scatter = axs[2,idx].scatter(x, y, c=x, cmap=cmap, s=9, norm=norm, zorder=2)

    # Plot evolving outlines union outline time series
    axs[2,idx].plot(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(evolving_union_geom_calcs_df['stationary_outline_dh_corr (m)']), 
            color='k', linestyle='dotted', linewidth=2)
    axs[2,idx].scatter(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(evolving_union_geom_calcs_df['stationary_outline_dh_corr (m)']), 
            color='k', linestyle='dotted', linewidth=2, s=5)
    
    # Plot bias (evolving - prior stationary)
    axs[2,idx].plot(mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(evolving_geom_calcs_df['evolving_outlines_dh_corr (m)'] - 
        stationary_geom_calcs_df['stationary_outline_dh_corr (m)']), 
        color='red', linestyle='solid', linewidth=2)
    axs[2,idx].scatter(mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(evolving_geom_calcs_df['evolving_outlines_dh_corr (m)'] - 
        stationary_geom_calcs_df['stationary_outline_dh_corr (m)']),
        color='red', linestyle='solid', linewidth=2, s=5)

    # Plot bias (evolving - updated stationary)
    axs[2,idx].plot(mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(evolving_geom_calcs_df['evolving_outlines_dh_corr (m)'] - 
        evolving_union_geom_calcs_df['stationary_outline_dh_corr (m)']), 
        color='darkred', linestyle='solid', linewidth=2)
    axs[2,idx].scatter(mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(evolving_geom_calcs_df['evolving_outlines_dh_corr (m)'] - 
        evolving_union_geom_calcs_df['stationary_outline_dh_corr (m)']),
        color='darkred', linestyle='solid', linewidth=2, s=5)

    axs[2,idx].set_ylim(bottom=-17.5, top=17.5)

    # Panel - Cumulative dV/dt --------------------------------------------------
    
    # Plot horizontal line at zero for reference
    axs[3,idx].axhline(0, color='k', linewidth=1)
    
    # Plot stationary outline time series
    axs[3,idx].plot(mdates.date2num(stationary_geom_calcs_df['mid_pt_datetime']), 
        np.divide(np.cumsum(stationary_geom_calcs_df['stationary_outline_dV_corr (m^3)']), 1e9), 
        color=stationary_outline_color, linestyle='solid', linewidth=2)
    axs[3,idx].scatter(mdates.date2num(stationary_geom_calcs_df['mid_pt_datetime']), 
        np.divide(np.cumsum(stationary_geom_calcs_df['stationary_outline_dV_corr (m^3)']), 1e9), 
        color=stationary_outline_color, linestyle='solid', linewidth=2, s=5)

    # Plot evolving outlines time series
    x = mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime'])
    y = np.cumsum(np.divide(evolving_geom_calcs_df['evolving_outlines_dV_corr (m^3)'], 1e9))
    
    # Create points and segments for LineCollection
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    
    # Create a LineCollection, using the discrete colormap and norm
    lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
    lc.set_array(x)
    lc.set_linewidth(2)

    # Plot multi-colored line and scatter for data points
    line = axs[3,idx].add_collection(lc)
    scatter = axs[3,idx].scatter(x, y, c=x, cmap=cmap, s=9, norm=norm, zorder=2)

    # Plot evolving outlines union outline time series
    axs[3,idx].plot(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(np.divide(evolving_union_geom_calcs_df['stationary_outline_dV_corr (m^3)'], 1e9)), 
            color='k', linestyle='dotted', linewidth=2)
    axs[3,idx].scatter(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']), 
        np.cumsum(np.divide(evolving_union_geom_calcs_df['stationary_outline_dV_corr (m^3)'], 1e9)), 
            color='k', linestyle='dotted', linewidth=2, s=5)
    
    # Plot bias (evolving - prior stationary)
    axs[3,idx].plot(mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(np.divide((evolving_geom_calcs_df['evolving_outlines_dV_corr (m^3)'] -
            stationary_geom_calcs_df['stationary_outline_dV_corr (m^3)']), 1e9)), 
            color='red', linestyle='solid', linewidth=2)
    axs[3,idx].scatter(mdates.date2num(evolving_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(np.divide((evolving_geom_calcs_df['evolving_outlines_dV_corr (m^3)'] -
            stationary_geom_calcs_df['stationary_outline_dV_corr (m^3)']), 1e9)), 
            color='red', linestyle='solid', linewidth=2, s=5)

    # Plot bias (evolving - updated stationary)
    axs[3,idx].plot(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(np.divide((evolving_geom_calcs_df['evolving_outlines_dV_corr (m^3)'] -
            evolving_union_geom_calcs_df['stationary_outline_dV_corr (m^3)']), 1e9)), 
            color='darkred', linestyle='solid', linewidth=2)
    axs[3,idx].scatter(mdates.date2num(evolving_union_geom_calcs_df['mid_pt_datetime']),
        np.cumsum(np.divide((evolving_geom_calcs_df['evolving_outlines_dV_corr (m^3)'] -
            evolving_union_geom_calcs_df['stationary_outline_dV_corr (m^3)']), 1e9)), 
            color='darkred', linestyle='solid', linewidth=2, s=5)

    axs[3,idx].set_ylim(bottom=-3.5, top=3.5)

# Add colorbar, legends, and titles
idx=0  # Add colorbar and legends only to first row of plots

# Set up colormap
min_date = pd.to_datetime(cyc_start_datetimes[1])
max_date = pd.to_datetime(cyc_start_datetimes[-1])
date_range = pd.date_range(min_date, max_date, periods=len(cyc_start_datetimes[1:]))
years = date_range.year.unique()
years = pd.to_datetime(years, format='%Y')
n_dates = len(cyc_start_datetimes[:-1])
cmap = plt.get_cmap('plasma', n_dates)
norm = plt.Normalize(mdates.date2num(min_date), mdates.date2num(max_date))
m = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
m.set_array(np.linspace(mdates.date2num(min_date), mdates.date2num(max_date), n_dates))

# Add colorbar
cax = inset_axes(axs[0,idx],
                 width='67%',
                 height='3%',
                 loc='lower left',
                 bbox_to_anchor=[0.31, 0.12, 1, 1],  # [left, bottom, width, height]
                 bbox_transform=axs[0,idx].transAxes,
                 borderpad=0)
cbar = fig.colorbar(m, cax=cax, orientation='horizontal')
cbar.set_label('year', size=10, labelpad=5)

# Set ticks for all years but labels only for odd years
tick_locations = [mdates.date2num(date) for date in years[1:]]
tick_labels = [date.strftime('%Y') if date.year % 2 == 0 else '' for date in years[1:]]
cbar.set_ticks(tick_locations)
cbar.set_ticklabels(tick_labels)

# Add minor ticks for quarters
cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Quarter intervals only

# Add legends
stationary_line = plt.Line2D([], [], color=stationary_outline_color, linestyle='solid', linewidth=2)
evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)

legend = axs[0,idx].legend(
    [stationary_line,
     tuple(lines), 
     evolving_union_line], 
    ['stationary outline',
     'evolving outlines',
     'updated stationary outline'],
     handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    fontsize=12, loc='upper center')

legend = axs[1,idx].legend(
    [stationary_line,
     tuple(lines), 
     evolving_union_line],
    ['stationary outline',
     'evolving outlines', 
     'updated stationary outline'], 
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    fontsize=12, loc='upper center')

evolving_region = plt.Line2D([], [], color='dimgray', linestyle='solid', linewidth=2)
stationary_region = plt.Line2D([], [], color='lightgray', linestyle='solid', linewidth=2)
bias = plt.Line2D([], [], color='red', linestyle='solid', linewidth=2)
bias2 = plt.Line2D([], [], color='darkred', linestyle='solid', linewidth=2)

legend = axs[2,idx].legend(
    [stationary_region,
     evolving_region,
     stationary_line,  
     tuple(lines),
     evolving_union_line, 
     bias, bias2],
    ['stationary outline off-lake secular',
     'updated stationary outline off-lake secular',
     'stationary outline', 
     'evolving outlines',
     'updated stationary outline', 
     'bias (evolving − prior stationary)',
     'bias (evolving − updated stationary)'],
     handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    fontsize=11, loc='lower center')

legend = axs[3,idx].legend(
    [stationary_line,
     tuple(lines),
     evolving_union_line,
     bias, bias2],
    ['stationary outline',
     'evolving outlines',
     'updated stationary outline', 
     'bias (evolving − prior stationary)',
     'bias (evolving − updated stationary)'], 
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    fontsize=12, loc='lower center')

# Set font sizes for all plots
TICK_SIZE = 14
LABEL_SIZE = 18

# Set common font sizes and axis labels
for i in range(nrows):
    for j in range(ncols):
        # Set tick sizes for all plots
        axs[i,j].tick_params(axis='both', labelsize=TICK_SIZE)

        # Add subplot labels (a, b, c, etc.)
        axs[i,j].text(0.02, 0.98, chr(97 + i*ncols + j), transform=axs[i,j].transAxes, 
                      fontsize=20, va='top', ha='left')

        # Configure row-specific settings
        if i == 0:
            axs[i,j].set_xlabel('x [km]', size=LABEL_SIZE)
        if i == 3:
            axs[i,j].set_xlabel('year', size=LABEL_SIZE)
        if 0 < i < 4:
            axs[i,j].xaxis.set_major_formatter(year_interval_formatter())
            axs[i,j].xaxis.set_major_locator(mdates.YearLocator())
            axs[i,j].xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))
        if j == 0:  # Leftmost column labels
            y_labels = ['y [km]', 'active area [km$^2$]', 'cumulative $dh$ [m]', 'cumulative $dV$ [km$^3$]']
            axs[i,j].set_ylabel(y_labels[i], size=LABEL_SIZE)
        # Do not display redundant tick labels
        if 0 < i < 3:
            axs[i,j].set_xticklabels([])
        if i > 0:
            # Set x-axis limits
            axs[i,j].set(xlim=(cyc_dates['cyc_start_datetimes'].iloc[0],
               # Set righthand x-axis limit slightly earlier to prevent tick mark displaying when there is no data point
               (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2))))
            if j > 0:
                axs[i,j].tick_params(axis='y', which='both', labelleft=False)
            else:
                axs[i,j].tick_params(axis='y', which='both', labelleft=True)

# Clear output
clear_output()

# Save and close plot
plt.savefig(OUTPUT_DIR + '/figures/Fig2_lake_reexamination_results.jpg', dpi=300, bbox_inches='tight')

# Preview plot
plt.show()

In [None]:
plt.close('all')

## Fig. 3
NOTE: You must run "Dissolved inorganic carbon (DIC) export estimates" section of code earlier in notebook for necessary plotting variables.

In [None]:
# Create filtered geodataframes of lakes based on whether they have evolving outlines
folder_path = os.path.join ('output/lake_outlines/evolving_outlines')

# Lakes with non-dynamic outlines (.txt)
no_evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='txt', exclude=False)
print('non-dynamic:',len(no_evolving_outlines_lakes))

# Lakes with evolving outlines (.geojson)
evolving_outlines_lakes = filter_gdf_by_folder_contents(stationary_lakes_gdf, folder_path, file_extension='geojson', exclude=False)

# For the evolving_outlines_lakes, we must add the special case of Site_B_Site_C that are now a combined lake
include_list = ['Site_B', 'Site_C']
included_rows = stationary_lakes_gdf[stationary_lakes_gdf['name'].isin(include_list)]
evolving_outlines_lakes = pd.concat([evolving_outlines_lakes, included_rows]).drop_duplicates()
print('dynamic:',len(evolving_outlines_lakes))

In [None]:
# Create a 3x1 grid of plots
nrows, ncols = 3, 1

# Setup figure
fig, ax = plt.subplots(nrows, ncols, gridspec_kw={'height_ratios': [2.5, 2.5, 5]}, sharex=True, figsize=(10, 12), constrained_layout=True)

# Define colors and linestyles that will be reused and create lines for legend
stationary_outline_color  = 'darkturquoise'
stationary_line = plt.Line2D([], [], color=stationary_outline_color, linestyle='dashed', linewidth=2)
stationary_all_lakes_line = plt.Line2D([], [], color=stationary_outline_color, linestyle='solid', linewidth=2)
evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)
evolving_stationary_union_all_lakes_line = plt.Line2D([], [], color='teal', linestyle='solid', linewidth=2)
evolving_stationary_union_evolving_lakes_line = plt.Line2D([], [], color='teal', linestyle='dashed', linewidth=2)
bias = plt.Line2D([], [], color='red', linestyle='solid', linewidth=2)
bias2 = plt.Line2D([], [], color='darkred', linestyle='solid', linewidth=2)

# Pick colormap and make continuous cmap discrete for evolving outlines
cmap = plt.get_cmap('plasma', len(cyc_start_datetimes[1:]))
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                    mdates.date2num(cyc_start_datetimes[-1]))

# Use for loop to store each time step as line segment to use in legend
lines = []  # list of lines to be used for the legend
for dt_idx, dt in enumerate(cyc_dates['cyc_start_datetimes'][1:]):
    x = 1; y = 1
    line, = ax[0].plot(x, y, color=cmap(norm(mdates.date2num(cyc_dates['cyc_start_datetimes'][dt_idx]))))
    lines.append(line)

# Read in continental summation geometric calculation csv files - evolving outlines (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/evolving_outlines_geom_calc/forward_fill'
filenames = {
    'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv',
}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

# Store dataframes from dfs list for code readability
superset_IS2_evolving_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - stationary outlines (all lakes)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}') 
       for name, filename in filenames.items()}

superset_IS2_stationary_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_stationary_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']

# Read in continental summation geometric calculation csv files - evolving union (only lakes with evolving outlines)
base_path = 'output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes'
filenames = {'superset_IS2_lakes_sum': 'superset_IS2_lakes_sum.csv',
    'subset_CS2_IS2_lakes_SARInPreExpansion_sum': 'subset_CS2_IS2_lakes_SARInPreExpansion_sum.csv',
    'subset_CS2_IS2_lakes_SARInPostExpansion_sum': 'subset_CS2_IS2_lakes_SARInPostExpansion_sum.csv'}

dfs = {name: pd.read_csv(f'{base_path}/{filename}')
       for name, filename in filenames.items()}

superset_IS2_evolving_union_sum_df = dfs['superset_IS2_lakes_sum']
subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPreExpansion_sum']
subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df = dfs['subset_CS2_IS2_lakes_SARInPostExpansion_sum']


# Panel - Lake active area ---------------------------------------------
# Plot horizontal line at zero for reference
ax[0].axhline(0, color='k', linewidth=1)

# Plot evolving outlines time series as multi-colored line using LineCollection from points/segments 
x = mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'])
y = subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_area (m^2)'] / 1e6
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(1)
line = ax[0].add_collection(lc)
scatter = ax[0].scatter(x, y, c=x, cmap=cmap, norm=norm, s=5)

# Plot evolving outlines time series (ICESat-2 era)
x = mdates.date2num(superset_IS2_evolving_sum_df['mid_pt_datetime'])
y = superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'] / 1e6
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(2)
line = ax[0].add_collection(lc)
scatter = ax[0].scatter(x, y, c=x, cmap=cmap, norm=norm, s=9)

# Store dates and time period for satellite coverage eras
time_span = mdates.date2num(cyc_dates['cyc_end_datetimes'].iloc[-1]) - mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
start_date = mdates.date2num(cyc_dates['cyc_start_datetimes'].iloc[0])
SARIn_expand_date = mdates.date2num(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2014-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0])
CS2_IS2_tie_pt = mdates.date2num(cyc_dates[cyc_dates['dataset'] == 'IceSat2_ATL15'].iloc[0]['cyc_start_datetimes'])

# Plot prior stationary outlines
ax[0].axhline(subset_CS2_IS2_SARInPreExpansion_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color=stationary_outline_color, linestyle='solid', linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span)
ax[0].axhline(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_area (m^2)'].iloc[-1] / 1e6, 
              color=stationary_outline_color, linestyle='solid', linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1)
ax[0].axhline(superset_IS2_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color=stationary_outline_color, linestyle='solid', linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1)

# Plot updated stationary outlines
ax[0].axhline(subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color='k', linestyle='dotted', linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span)
ax[0].axhline(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[-1] / 1e6, 
              color='k', linestyle='dotted', linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1)
ax[0].axhline(superset_IS2_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] / 1e6, 
              color='k', linestyle='dotted', linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1)


# Carbon export

# Define conversion factor for grams
# g_conv = 1e6  # Megagrams
g_conv = 1e9  # Gigagrams

# Plot horizontal line at zero for reference
ax[1].axhline(0, color='k', linewidth=1)

# Plot evolving outlines time series as multi-colored line using LineCollection from points/segments 
x = mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'])
y = subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_area (m^2)'] * SLM_evolving_per_area_per_step_DIC_export_filling_period / g_conv
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(1)
line = ax[1].add_collection(lc)
scatter = ax[1].scatter(x, y, c=x, cmap=cmap, norm=norm, s=5)

# Plot evolving outlines time series (ICESat-2 era)
x = mdates.date2num(superset_IS2_evolving_sum_df['mid_pt_datetime'])
y = superset_IS2_evolving_sum_df['evolving_outlines_area (m^2)'] * SLM_evolving_per_area_per_step_DIC_export_filling_period / g_conv 
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(2)
line = ax[1].add_collection(lc)
scatter = ax[1].scatter(x, y, c=x, cmap=cmap, norm=norm, s=9)

# Plot stationary outlines
ax[1].axhline(subset_CS2_IS2_SARInPreExpansion_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv,
              color=stationary_outline_color, linestyle='solid', linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span)
ax[1].axhline(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_area (m^2)'].iloc[-1] * SLM_stationary_per_area_per_step_DIC_export / g_conv, 
              color=stationary_outline_color, linestyle='solid', linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1)
ax[1].axhline(superset_IS2_stationary_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv, 
              color=stationary_outline_color, linestyle='solid', linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1)

# Plot evolving outlines union
ax[1].axhline(subset_CS2_IS2_SARInPreExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv,
              color='k', linestyle='dotted', linewidth=1,
              xmin=0, xmax=(SARIn_expand_date-start_date)/time_span)
ax[1].axhline(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[-1] * SLM_stationary_per_area_per_step_DIC_export / g_conv,
              color='k', linestyle='dotted', linewidth=1,
              xmin=(SARIn_expand_date-start_date)/time_span, xmax=1)
ax[1].axhline(superset_IS2_evolving_union_sum_df['stationary_outline_area (m^2)'].iloc[0] * SLM_stationary_per_area_per_step_DIC_export / g_conv,
              color='k', linestyle='dotted', linewidth=2, 
              xmin=(CS2_IS2_tie_pt-start_date)/time_span, xmax=1)


# Panel - cumulative dV/dt --------------------------------------------------
# Plot horizontal line at zero for reference
ax[2].axhline(0, color='k', linewidth=1)

# Plot dV time series of evolving outlines using LineCollection from points/segments to plot multi-colored line
x = mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'])
y = np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] / 1e9)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(1)
line = ax[2].add_collection(lc)
scatter = ax[2].scatter(x, y, c=x, cmap=cmap, norm=norm, s=5)

CS2_last_cyc_date = str(cyc_dates[cyc_dates['dataset'] == 'CryoSat2_SARIn']['cyc_start_datetimes'].iloc[-1])
cum_sum_last_CS2_midcyc_date = np.cumsum(np.divide(
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'], 1e9)).iloc[-1]

x = mdates.date2num(superset_IS2_evolving_sum_df['mid_pt_datetime'])
y = np.cumsum(superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(x)
lc.set_linewidth(2)
line = ax[2].add_collection(lc)
scatter = ax[2].scatter(x, y, c=x, cmap=cmap, norm=norm, s=9)

# Plot dV time series of stationary outline of all lakes
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9), 
    color=stationary_outline_color, linestyle='solid', linewidth=1)
ax[2].scatter(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9), 
           color=stationary_outline_color, linestyle='solid', linewidth=1, s=3)

cum_sum_last_CS2_midcyc_date = np.cumsum(
    subset_CS2_IS2_SARInPostExpansion_stationary_sum_df[subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)'] / 1e9).iloc[-1]

ax[2].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
np.cumsum(superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color=stationary_outline_color, linestyle='solid', linewidth=2)
ax[2].scatter(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color=stationary_outline_color, linestyle='solid', linewidth=2, s=5)

# Plot bias (evolving - prior stationary)
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='red', linestyle='solid', linewidth=1)
ax[2].scatter(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='red', linestyle='solid', linewidth=1, s=3)

cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df[
        subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]

ax[2].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='red', linestyle='solid', linewidth=2)
ax[2].scatter(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_stationary_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='red', linestyle='solid', linewidth=2, s=5)

# Plot bias (evolving - updated stationary)
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='darkred', linestyle='solid', linewidth=1)
ax[2].scatter(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
        color='darkred', linestyle='solid', linewidth=1, s=3)

cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['evolving_outlines_dV_corr (m^3)'] - 
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df[
        subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
        ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]

ax[2].plot(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='darkred', linestyle='solid', linewidth=2)
ax[2].scatter(mdates.date2num(superset_IS2_stationary_sum_df['mid_pt_datetime']), 
    np.cumsum((superset_IS2_evolving_sum_df['evolving_outlines_dV_corr (m^3)'] -
        superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9) + cum_sum_last_CS2_midcyc_date, 
        color='darkred', linestyle='solid', linewidth=2, s=5)

# Plot dV time series of updated stationary outline
ax[2].plot(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
    color='k', linestyle='dotted', linewidth=1)
ax[2].scatter(mdates.date2num(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['stationary_outline_dV_corr (m^3)']) / 1e9, 
    color='k', linestyle='solid', linewidth=1, s=5)

cum_sum_last_CS2_midcyc_date = np.cumsum((
    subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df[subset_CS2_IS2_SARInPostExpansion_evolving_union_sum_df['mid_pt_datetime'] <= CS2_last_cyc_date]
    ['stationary_outline_dV_corr (m^3)']) / 1e9).iloc[-1]

ax[2].plot(mdates.date2num(superset_IS2_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color='k', linestyle='dotted', linewidth=2)
ax[2].scatter(mdates.date2num(superset_IS2_evolving_union_sum_df['mid_pt_datetime']), 
    np.cumsum(superset_IS2_evolving_union_sum_df['stationary_outline_dV_corr (m^3)'] / 1e9) + cum_sum_last_CS2_midcyc_date, 
           color='k', linestyle='solid', linewidth=2, s=5)

# Set y axes limits
ax0_auto_ymin, ax0_auto_ymax = ax[0].get_ylim()
ax1_auto_ymin, ax1_auto_ymax = ax[1].get_ylim()
ax[0].set_ylim(-(ax0_auto_ymax-ax0_auto_ymin)*0.1, None) # Prescribe lower limit to accommodate text annotations of satellite eras
ax[1].set_ylim(-(ax1_auto_ymax-ax1_auto_ymin)*0.1, None)
ax[2].set_ylim(-8.5, 8.5)
del ax0_auto_ymin, ax0_auto_ymax, ax1_auto_ymin, ax1_auto_ymax

# Add colorbar, legends, and titles
ax[2].set_xlabel('year', size=14)

# Plot vertical lines to indicate CS2 SARIn mode mask moving inland and ICESat-2 era start
for row in [0,1,2]:
    ax[row].axvline(SARIn_expand_date, color='dimgray', linestyle='solid', linewidth=1, ymin=-1, ymax=1)
    ax[row].axvline(CS2_IS2_tie_pt, color='dimgray', linestyle='solid', linewidth=1, ymin=-1, ymax=1)

# Add text label near the vertical line
start_date_text = pd.to_datetime(cyc_dates['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
sarin_expand_date_text = pd.to_datetime(cyc_dates[cyc_dates['cyc_start_datetimes'] == '2014-10-01T18:00:00.000000000']['cyc_start_datetimes'].iloc[0]) + pd.Timedelta(days=15)
is2_start_date_text = pd.to_datetime(cyc_dates[cyc_dates['dataset'] == 'IceSat2_ATL15'].iloc[0]['cyc_start_datetimes']) + pd.Timedelta(days=15)

ax[0].text(start_date_text, 
    -800, 'CryoSat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')
ax[0].text(sarin_expand_date_text, 
    -800, 'SARIn mode expands', horizontalalignment='left', verticalalignment='top', color='k')
ax[0].text(is2_start_date_text, 
    -800, 'ICESat-2 era begins', horizontalalignment='left', verticalalignment='top', color='k')

# Add legends
x0 = 0.44
legend = ax[0].legend([stationary_all_lakes_line,
                       tuple(lines),
                       evolving_union_line],
    [f'stationary outlines (n={len(no_evolving_outlines_lakes)})',
     f'evolving outlines (n={len(evolving_outlines_lakes)})',
     f'updated stationary outline (n={len(no_evolving_outlines_lakes)})'],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper center', bbox_to_anchor=(x0, 1))

legend = ax[2].legend([bias,
                       bias2], 
                      ['bias (evolving − prior stationary)',
                       'bias (evolving − updated stationary)'],
    handlelength=3, handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
    loc='upper center', bbox_to_anchor=(x0, 1))

for row in [0,1]:
    # Remove x tick labels
    ax[row].set_xticklabels([])

    # Format the x-axis to display years only
    ax[row].xaxis.set_major_locator(mdates.YearLocator(base=1))  # Major ticks every other year
    ax[row].xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Minor ticks every quarter
    ax[row].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))  # Display major ticks as years

    # Set x-axis limits
    ax[row].set(xlim=(cyc_dates['cyc_start_datetimes'].iloc[0],
        # Set righthand x-axis limit slightly earlier to prevent tick mark displaying when there is no data point
        (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2))))

# Set axes titles
ax[0].set_ylabel('active area [km$^2$]', size=14)
ax[1].set_ylabel('DIC export [Gg C]', size=14)
ax[2].set_ylabel('cumulative $dV$ [km$^3$]', size=14)

# Adding annotations 'a', 'b', 'c' at the top left of the subplot
ax_array = np.array(ax)  # Convert gridspec list of lists into numpy array to use .flatten() method
char_index = 97  # ASCII value for 'a'
for i, ax_i in enumerate(ax_array.flatten()):
    # `transform=ax.transAxes` makes coordinates relative to the axes (0,0 is bottom left and 1,1 is top right)
    ax_i.text(0.01, 0.98, chr(char_index), transform=ax_i.transAxes, fontsize=14, va='top', ha='left')
    char_index += 1 # Increment the ASCII index to get the next character
    
# Save and close plot
plt.savefig(OUTPUT_DIR + '/figures/Fig3_lake_reexamination_results_continental_integration.jpg',
    dpi=300, bbox_inches='tight')

# Preview plot
plt.show()

In [None]:
plt.close('all')

## Fig. S2

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 9,
    'axes.labelsize': 10,
    'axes.titlesize': 9,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 9,
})

In [None]:
# Select lakes to be included in plot
selected_lakes = reexamined_stationary_outlines_gdf[reexamined_stationary_outlines_gdf['name'].isin(['Institute_E1', 'Mac2', 'Site_BC'])]
desired_order = ['Institute_E1', 'Mac2', 'Site_BC']
stationary_outlines_gdf_filtered = gpd.GeoDataFrame(pd.concat([selected_lakes[selected_lakes['name'] == name] for name in desired_order]))

# Create a grid of plots
nrows, ncols = 3, 2
fig, axs = plt.subplots(nrows=nrows, ncols=ncols, figsize=(6, 9), constrained_layout=False)

# Define colors and linestyles for legend
stationary_outline_color = 'darkturquoise'
stationary_line = plt.Line2D([], [], color=stationary_outline_color, linestyle='solid', linewidth=2)
evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)

# Set up colormap for temporal evolution
cmap = plt.get_cmap('plasma', len(cyc_start_datetimes[1:]))
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                    mdates.date2num(cyc_start_datetimes[-1]))

for row in range(1, nrows):
    # Share y-axis within each row but not between rows
    for col in range(ncols):
        axs[row, col].sharey(axs[row, 0])

# Process each lake sequentially
for row, lake_name in enumerate(desired_order):
    print('working on {}'.format(lake_name))
    
    # Get the lake data for the current lake
    lake_gdf = stationary_outlines_gdf_filtered[stationary_outlines_gdf_filtered['name'] == lake_name]
    stationary_outline = lake_gdf['geometry']
    
    # Load evolving outlines
    try:
        evolving_outlines_gdf = gpd.read_file(os.path.join(
            os.getcwd(), 'output/lake_outlines/evolving_outlines/{}.geojson'.format(lake_name)))
    except fiona.errors.DriverError:
        print(f"File for {lake_name} not found. Skipping...")
        continue
    
    # Find evolving and stationary outlines union for plotting extent
    evolving_stationary_union_gdf = gpd.GeoDataFrame(
        geometry=[lake_gdf.geometry.iloc[0].union(evolving_outlines_gdf.geometry.union_all())],
        crs=lake_gdf.crs)
    x_min, y_min, x_max, y_max = evolving_stationary_union_gdf.bounds.iloc[0]
    
    # Make plots uniform size and square
    x_mid = (x_min + x_max) / 2
    y_mid = (y_min + y_max) / 2
    x_span = x_max - x_min
    y_span = y_max - y_min
    max_span = max(x_span, y_span)
    
    # Update bounds to ensure square dimensions
    x_min = x_mid - max_span / 2
    x_max = x_mid + max_span / 2
    y_min = y_mid - max_span / 2
    y_max = y_mid + max_span / 2
    
    # Add buffer around the plot
    buffer_frac = 0.35
    x_buffer = abs(x_max-x_min) * buffer_frac
    y_buffer = abs(y_max-y_min) * buffer_frac
    
    # Create empty lists to store centroid coordinates
    centroids_x = []
    centroids_y = []
    centroid_dates = []
    
    # Plot both outline and centroid views
    for col in [0, 1]:
        # Plot MOA surface imagery for both columns
        mask_x = (moa_highres_da.x >= x_min-x_buffer) & (moa_highres_da.x <= x_max+x_buffer)
        mask_y = (moa_highres_da.y >= y_min-y_buffer) & (moa_highres_da.y <= y_max+y_buffer)
        moa_highres_da_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
        axs[row, col].imshow(moa_highres_da_subset[0,:,:], cmap='gray', clim=[14000, 17000], 
                           extent=[x_min-x_buffer, x_max+x_buffer, y_min-y_buffer, y_max+y_buffer])

        # Plot stationary outline in both columns
        if lake_name == 'Site_BC':
            # Plot both Site_B and Site_C outlines
            for site in ['Site_B', 'Site_C']:
                stationary_outlines_gdf[stationary_outlines_gdf['name'] == site]['geometry'].boundary.plot(
                    ax=axs[row, col], 
                    color=stationary_outline_color, 
                    linewidth=2
                )

                # Calculate centroid
                centroid = stationary_outlines_gdf[stationary_outlines_gdf['name'] == site]['geometry'].iloc[0].centroid
        
                # Plot centroids
                axs[row, col].scatter(centroid.x, centroid.y, 
                                   c=stationary_outline_color,
                                   marker='.', s=50, linewidth=1, zorder=2)

        else:
            # Original code for other lakes
            stationary_outlines_gdf[stationary_outlines_gdf['name'] == lake_name]['geometry'].boundary.plot(
                ax=axs[row, col], 
                color=stationary_outline_color, 
                linewidth=2)
    
    # Plot evolving outlines with colors based on date (left column only)
    lines = []
    for dt_idx, dt in enumerate(cyc_start_datetimes[1:]):
        # Create line for legend
        x, y = 1, 1
        line, = axs[row, 0].plot(x, y, color=cmap(norm(mdates.date2num(cyc_start_datetimes[dt_idx]))))
        lines.append(line)
        
        # Plot evolving outlines for this time step
        evolving_outlines_gdf_dt_sub = evolving_outlines_gdf[evolving_outlines_gdf['mid_pt_datetime'] == dt]
        if not evolving_outlines_gdf_dt_sub.empty:
            # Plot outline in left column
            evolving_outlines_gdf_dt_sub.boundary.plot(
                ax=axs[row, 0], 
                color=cmap(norm(mdates.date2num(cyc_start_datetimes[dt_idx]))), 
                linewidth=1
            )
            
            # Calculate and store centroid
            centroid = evolving_outlines_gdf_dt_sub.geometry.iloc[0].centroid
            centroids_x.append(centroid.x)
            centroids_y.append(centroid.y)
            centroid_dates.append(dt)
    
    # Plot centroids in right column
    axs[row, 1].scatter(centroids_x, centroids_y, 
                       c=[cmap(norm(mdates.date2num(dt))) for dt in centroid_dates],
                       marker='+', s=100, linewidth=1, zorder=2)
    
    # Set the same limits and formatting for both plots
    for col in [0, 1]:
        # Format axis ticks to show kilometers
        km_scale = 1e3
        ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
        ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
        axs[row, col].xaxis.set_major_formatter(ticks_x)
        axs[row, col].yaxis.set_major_formatter(ticks_y)
        
        # Set axes limits
        axs[row, col].set(xlim=(x_min-x_buffer, x_max+x_buffer), 
                         ylim=(y_min-y_buffer, y_max+y_buffer))
        
        # Remove y-tick labels for right column only
        if col == 1:
            plt.setp(axs[row, col].get_yticklabels(), visible=False)
        
        axs[2, col].set_xlabel('x [km]')        
        axs[row, 0].set_ylabel('y [km]')
        # axs[row, 0].set_ylabel(f'{lake_name}\n\ny [km]')

    # Create and style inset map (only for left column)
    axIns = axs[row, 0].inset_axes([0.01, -0.01, 0.3, 0.3])
    axIns.set_aspect('equal')
    moa_2014_coastline.plot(ax=axIns, color='gray', edgecolor='k', linewidth=0.1, zorder=3)
    moa_2014_groundingline.plot(ax=axIns, color='ghostwhite', edgecolor='k', linewidth=0.1, zorder=3)
    axIns.axis('off')
    
    # Add location marker to inset map
    axIns.scatter(((x_max+x_min)/2), ((y_max+y_min)/2), marker='*', 
                 linewidth=2, color='k', s=15, zorder=3)

for i in range(axs.shape[0]):
    for j in range(axs.shape[1]):
        # Add subplot labels (a, b, c, etc.)
        axs[i,j].text(0.02, 0.98, chr(97 + i*2 + j), transform=axs[i,j].transAxes, 
                     fontsize=14, va='top', ha='left')

# Set up colormap
min_date = pd.to_datetime(cyc_start_datetimes[1])
max_date = pd.to_datetime(cyc_start_datetimes[-1])
date_range = pd.date_range(min_date, max_date, periods=len(cyc_start_datetimes[1:]))
years = date_range.year.unique()
years = pd.to_datetime(years, format='%Y')
n_dates = len(cyc_start_datetimes[1:])
cmap = plt.get_cmap('plasma', n_dates)
norm = plt.Normalize(mdates.date2num(min_date), mdates.date2num(max_date))
m = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
m.set_array(np.linspace(mdates.date2num(min_date), mdates.date2num(max_date), n_dates))

# Add colorbar
cax = fig.add_axes([0.15, 0.08, 0.73, 0.01])  # [left, bottom, width, height]
cbar = fig.colorbar(m, cax=cax, orientation='horizontal')
cbar.set_label('year', size=12, labelpad=5)

# Set ticks for all years but labels only for odd years
tick_locations = [mdates.date2num(date) for date in years[1:]]
tick_labels = [date.strftime('%Y') if date.year % 2 == 0 else '' for date in years[1:]]
cbar.set_ticks(tick_locations)
cbar.set_ticklabels(tick_labels)

# Add minor ticks for quarters
cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Quarter intervals only

# Adjust the layout to make room for the colorbar
plt.subplots_adjust(
    top=0.95,      # Reduce top margin (default is usually 0.9)
    bottom=0.15,   # Increase bottom margin for colorbar (up from 0.1)
    wspace=0.01,   # Keep the same horizontal spacing
    hspace=0.2     # Control vertical spacing between subplots
)

# Save and close plot
plt.savefig(OUTPUT_DIR + '/figures/FigS2_lake_migration.jpg',
    dpi=300, bbox_inches='tight')

# Preview plot
plt.show()

In [None]:
plt.close('all')

In [None]:
# View citation for each of plotted lakes for figure caption
selected_lakes

## Fig. S3

In [None]:
# Explore different lake groups to find one to highlight in publication

# Example lake groups
lake_groups = [
    ('Bindschadler', ['Bindschadler_1', 'Bindschadler_2', 'Bindschadler_3', 'Bindschadler_4', 'Bindschadler_5', 'Bindschadler_6']),
    ('Byrd', ['Byrd_1', 'Byrd_2', 'Byrd_s1', 'Byrd_s2', 'Byrd_s3', 'Byrd_s4', 'Byrd_s5', 'Byrd_s6', 'Byrd_s7', 'Byrd_s8',
     'Byrd_s9', 'Byrd_s10', 'Byrd_s11', 'Byrd_s12', 'Byrd_s13', 'Byrd_s14', 'Byrd_s15']),
    ('Cook', ['Cook_E1', 'Cook_E2']),
    ('David', ['David_1', 'David_s1', 'David_s2', 'David_s3', 'David_s4', 'David_s5']),
    ('EAP', ['EAP_1', 'EAP_2', 'EAP_3', 'EAP_4', 'EAP_5', 'EAP_6', 'EAP_7', 'EAP_8', 'EAP_9']),
    ('Foundation_N', ['Foundation_N1', 'Foundation_N2', 'Foundation_N3']),
    ('Foundation', ['Foundation_1', 'Foundation_2', 'Foundation_3', 'Foundation_4', 'Foundation_5', 'Foundation_6', 'Foundation_7', 'Foundation_8',
     'Foundation_9', 'Foundation_10', 'Foundation_11', 'Foundation_12', 'Foundation_13', 'Foundation_14', 'Foundation_15', 'Foundation_16']),
    ('Institute', ['Institute_E1', 'Institute_E2', 'Institute_W1', 'Institute_W2']),
    ('KambTrunk', ['KT3', 'KT2', 'KT1']),
    ('Kamb', ['Kamb_1', 'Kamb_2', 'Kamb_3', 'Kamb_4', 'Kamb_5', 'Kamb_6', 'Kamb_7', 'Kamb_8', 'Kamb_9', 'Kamb_10', 'Kamb_11', 'Kamb_12']),
    ('MacAyeal', ['Mac1', 'Mac2', 'Mac3', 'Mac4', 'Mac5', 'Mac6']),
    ('Nimrod', ['Nimrod_1', 'Nimrod_2']),
    ('Ninnis', ['Ninnis_1', 'Ninnis_2']),
    ('Recovery', ['Rec1', 'Rec2', 'Rec3', 'Rec4', 'Rec5', 'Rec6', 'Rec7', 'Rec8', 'Rec9']),
    ('Slessor', ['Slessor_1', 'Slessor_23', 'Slessor_4', 'Slessor_5', 'Slessor_6', 'Slessor_7']),
    ('Thwaites', ['Thw_70', 'Thw_124', 'Thw_142', 'Thw_170']),
    ('Totten', ['Totten_1', 'Totten_2']),
    ('Wilkes', ['Wilkes_1', 'Wilkes_2']),
    ('Mercer_Whillans', ['EngelhardtSubglacialLake', 'UpperEngelhardtSubglacialLake', 'Lake12', 'Lake10', 'Lake78', 'WhillansSubglacialLake', 
     'LowerMercerSubglacialLake', 'MercerSubglacialLake', 'LowerConwaySubglacialLake', 'ConwaySubglacialLake', 'UpperSubglacialLakeConway', 
    'Whillans_6', 'Whillans_7', 'Whillans_8'])
]

# Call the function
plot_lake_groups_dV(lake_groups)

In [None]:
plt.rcParams.update({
    'font.family': 'DejaVu Sans',
    'font.size': 9,
    'axes.labelsize': 10,
    'axes.titlesize': 9,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9,
    'legend.fontsize': 9,
})

In [None]:
# Figure S3

# Select lakes to highlight in figure
lake_groups = [('Thwaites', ['Thw_70', 'Thw_124', 'Thw_142', 'Thw_170'])]

# Initialize lists to store valid lake data
valid_lakes = []
evolving_outlines_gdfs = []
lake_gdfs = []
evolving_geom_calcs_dfs = []
stationary_geom_calcs_dfs = []
evolving_union_geom_calcs_dfs = []

# Process lakes and populate the lists
for lake_name in lake_groups[0][1]:  # Access the lake list from the first group
    print(f"Processing data for {lake_name}...")
    
    # Get lake data from stationary outlines
    lake_gdf = stationary_outlines_gdf[stationary_outlines_gdf['name'] == lake_name]
    if lake_gdf.empty:
        print(f"Skipping {lake_name}: not found in stationary outlines")
        continue
    
    # Try loading evolving outlines
    try:
        evolving_outlines_gdf = gpd.read_file(os.path.join(
            'output/lake_outlines/evolving_outlines',
            f'{lake_name}.geojson'))
    except Exception as e:
        print(f"Skipping {lake_name}: no evolving outlines file - {str(e)}")
        continue
    
    # Try loading geometric calculations
    try:
        evolving_geom_calcs_df = pd.read_csv(os.path.join(
            'output/geometric_calcs/evolving_outlines_geom_calc/',
            f'{lake_name}.csv'))
        evolving_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_geom_calcs_df['mid_pt_datetime'])

        evolving_union_geom_calcs_df = pd.read_csv(os.path.join(
            'output/geometric_calcs/stationary_outline_geom_calc/evolving_union_at_evolving_lakes/',
            f'{lake_name}.csv'))
        evolving_union_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(evolving_union_geom_calcs_df['mid_pt_datetime'])

        stationary_geom_calcs_df = pd.read_csv(os.path.join(
            'output/geometric_calcs/stationary_outline_geom_calc/stationary_outlines_at_all_lakes/',
            f'{lake_name}.csv'))
        stationary_geom_calcs_df['mid_pt_datetime'] = pd.to_datetime(stationary_geom_calcs_df['mid_pt_datetime'])
    except Exception as e:
        print(f"Skipping {lake_name}: error loading geometric calculations - {str(e)}")
        continue
    
    print(f"Valid data found for {lake_name}")
    valid_lakes.append(lake_name)
    lake_gdfs.append(lake_gdf)
    evolving_outlines_gdfs.append(evolving_outlines_gdf)
    evolving_geom_calcs_dfs.append(evolving_geom_calcs_df)
    stationary_geom_calcs_dfs.append(stationary_geom_calcs_df)
    evolving_union_geom_calcs_dfs.append(evolving_union_geom_calcs_df)

if not valid_lakes:
    raise ValueError("No valid lakes found to process")
    
# Create figure
fig = plt.figure(figsize=(10, 15))

# Create a 3x2 gridspec
gs = fig.add_gridspec(3, 2)

# Main spatial overview panel in first cell
ax_main = fig.add_subplot(gs[0, 0])

# Get combined extent for all valid lakes
x_mins, x_maxs, y_mins, y_maxs = [], [], [], []

for lake_gdf, evolving_outlines_gdf in zip(lake_gdfs, evolving_outlines_gdfs):
    # Find evolving and stationary outlines union for plotting extent
    lake_name = lake_gdf['name'].iloc[0]
    evolving_stationary_union_gdf = gpd.GeoDataFrame(
        geometry=[lake_gdf.geometry.iloc[0].union(evolving_outlines_gdf.geometry.union_all())],
        crs=lake_gdf.crs)
    
    # Get extent
    x_min, y_min, x_max, y_max = evolving_stationary_union_gdf['geometry'].bounds.iloc[0]
    buffer_dist = max(x_max - x_min, y_max - y_min) * 0.05
    x_mins.append(x_min - buffer_dist)
    x_maxs.append(x_max + buffer_dist)
    y_mins.append(y_min - buffer_dist)
    y_maxs.append(y_max + buffer_dist)

# Set plot extent
x_min, x_max = min(x_mins), max(x_maxs)
y_min, y_max = min(y_mins), max(y_maxs)

# Plot MOA background
mask_x = (moa_highres_da.x >= x_min) & (moa_highres_da.x <= x_max)
mask_y = (moa_highres_da.y >= y_min) & (moa_highres_da.y <= y_max)
moa_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
ax_main.imshow(moa_subset[0,:,:], cmap='gray', clim=[14000, 17000],
              extent=[x_min, x_max, y_min, y_max])

# Plot stationary outlines
stationary_color = 'darkturquoise'
for lake_gdf in lake_gdfs:
    lake_gdf.boundary.plot(ax=ax_main, color=stationary_color, linewidth=2)

# Define custom offsets and display names for each lake
# Format: 'lake_name': {'offset': (x_offset, y_offset), 'display': 'custom_name'}
label_configs = {
    'Thw_70': {
        'offset': (-8e3, 7e3),
        'display': 'Thw$_{70}$'
    },
    'Thw_124': {
        'offset': (-20e3, 20e3),
        'display': 'Thw$_{124}$'
    },
    'Thw_142': {
        'offset': (-22e3, 22e3),
        'display': 'Thw$_{142}$'
    },
    'Thw_170': {
        'offset': (-16e3, 11e3),
        'display': 'Thw$_{170}$'
    }
}

# Add lake labels
for lake_gdf in lake_gdfs:
    # Get the centroid of the lake geometry
    centroid = lake_gdf.geometry.iloc[0].centroid
    # Get the lake name
    lake_name = lake_gdf['name'].iloc[0]
    # Get custom offset and display name for this lake (or use defaults)
    config = label_configs.get(lake_name, {'offset': (0, 0), 'display': lake_name})
    x_offset, y_offset = config['offset']
    display_name = config['display']
    # Add label
    ax_main.annotate(display_name, 
                    xy=(centroid.x + x_offset, centroid.y + y_offset),
                    color='white',
                    fontweight='bold',
                    ha='center', va='center',
                    path_effects=[PathEffects.withStroke(linewidth=3, foreground='black')])

# Plot evolving outlines with time-based coloring
cmap = plt.get_cmap('plasma')
norm = plt.Normalize(mdates.date2num(cyc_start_datetimes[1]), 
                   mdates.date2num(cyc_start_datetimes[-1]))

for evolving_outlines_gdf in evolving_outlines_gdfs:
    for idx, row in evolving_outlines_gdf.iterrows():
        color = cmap(norm(mdates.date2num(pd.to_datetime(row['mid_pt_datetime']))))
        gpd.GeoSeries(row['geometry']).boundary.plot(
            ax=ax_main, color=color, linewidth=1)

    # Plot inset map
    axIns = ax_main.inset_axes([0.7, 0.02, 0.3, 0.3]) # [left, bottom, width, height] (fractional axes coordinates)
    axIns.set_aspect('equal')
    moa_2014_coastline.plot(ax=axIns, color='gray', edgecolor='k', linewidth=0.1)
    moa_2014_groundingline.plot(ax=axIns, color='ghostwhite', edgecolor='k', linewidth=0.1)
    axIns.axis('off')
    # Plot star to indicate location
    axIns.scatter(((x_max+x_min)/2), ((y_max+y_min)/2), marker='*', 
        linewidth=1, color='k', s=75)

# Plot evolving outlines union
for lake_gdf in lake_gdfs:
    lake_name = lake_gdf['name'].iloc[0]
    evolving_union_gdf = evolving_outlines_union_gdf[evolving_outlines_union_gdf['name'] == lake_name]
    evolving_union_gdf.boundary.plot(ax=ax_main, color='k', linestyle='dotted', linewidth=2)

# Format overview axes
km_scale = 1e3
ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
ax_main.xaxis.set_major_formatter(ticks_x)
ax_main.yaxis.set_major_formatter(ticks_y)
ax_main.set_xlabel('x [km]')
ax_main.set_ylabel('y [km]')

# Set up colormap
min_date = pd.to_datetime(cyc_start_datetimes[1])
max_date = pd.to_datetime(cyc_start_datetimes[-1])
date_range = pd.date_range(min_date, max_date, periods=len(cyc_start_datetimes[1:]))
years = date_range.year.unique()
years = pd.to_datetime(years, format='%Y')
n_dates = len(cyc_start_datetimes[1:])
cmap = plt.get_cmap('plasma', n_dates)
norm = plt.Normalize(mdates.date2num(min_date), mdates.date2num(max_date))
m = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
m.set_array(np.linspace(mdates.date2num(min_date), mdates.date2num(max_date), n_dates))

# Add colorbar
divider = make_axes_locatable(ax_main)
cax = divider.append_axes('bottom', size='2.5%', pad=0.5)
cbar = fig.colorbar(m, cax=cax, orientation='horizontal')

# Set colorbar ticks
cbar.ax.xaxis.set_major_formatter(year_interval_formatter())
cbar.ax.xaxis.set_major_locator(mdates.YearLocator())  # Every year
cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))  # Quarter year ticks

# cbar.ax.xaxis.set_minor_locator(mdates.MonthLocator(bymonth=[1,4,7,10]))  # Quarter intervals only
cbar.set_label('year', size=12)

# Get y axis limits for volume plots
y_min, y_max = get_overall_y_limits(evolving_geom_calcs_dfs, 
                                  stationary_geom_calcs_dfs,
                                  evolving_union_geom_calcs_dfs)        
# Calculate limits with buffer
y_range = y_max - y_min
buffer = y_range * 0.05
y_limits = (y_min - buffer, y_max + buffer)

# Create axes for all plots (excluding the overview plot position)
axes = []
plot_positions = [(0,1), (1,0), (1,1), (2,0), (2,1)]  # Row, Col positions for dV plots

for pos in plot_positions:
    ax = fig.add_subplot(gs[pos])
    axes.append(ax)

# Plot individual lakes
for idx, (lake_name, evolving_df, stationary_df, union_df) in enumerate(zip(
        valid_lakes, evolving_geom_calcs_dfs, stationary_geom_calcs_dfs, evolving_union_geom_calcs_dfs)):
    ax = axes[idx]
    ax.axhline(0, color='k', linestyle='--')
    
    dates = mdates.date2num(evolving_df['mid_pt_datetime'])
    
    # Plot stationary outline
    stationary_cumsum = np.cumsum(np.divide(stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
    ax.plot(dates, stationary_cumsum, color=stationary_color, label='Stationary', linewidth=2)
    ax.scatter(dates, stationary_cumsum, color=stationary_color, s=5)

    # Store line segments for multi-colored line in legend
    lines = []
    for i, dt in enumerate(dates):
        line = ax.plot(1, 1, color=cmap(norm(mdates.date2num(cyc_start_datetimes[i]))), linewidth=2)[0]
        lines.append(line)
        line.remove()  # Remove the dummy lines after creating them

    # Plot evolving outlines (multi-colored line)
    x = dates
    y = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'], 1e9))
    points = np.array([x, y]).T.reshape(-1, 1, 2)
    segments = np.concatenate([points[:-1], points[1:]], axis=1)
    lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
    lc.set_array(x)
    lc.set_linewidth(2)
    ax.add_collection(lc)
    ax.scatter(x, y, c=x, cmap=cmap, norm=norm, s=9)

    # Plot evolving outlines union
    union_cumsum = np.cumsum(np.divide(union_df['stationary_outline_dV_corr (m^3)'], 1e9))
    ax.plot(dates, union_cumsum, color='k', linestyle='dotted', label='Union', linewidth=2)
    ax.scatter(dates, union_cumsum, color='k', s=5)

    # Plot bias
    bias = np.cumsum(np.divide(evolving_df['evolving_outlines_dV_corr (m^3)'] - 
                             stationary_df['stationary_outline_dV_corr (m^3)'], 1e9))
    ax.plot(dates, bias, color='r', label='Bias', linewidth=2)
    ax.scatter(dates, bias, color='r', linewidth=2, s=5)

    # Add legend only to the first plot
    if idx == 0:
        stationary_line = plt.Line2D([], [], color=stationary_color, linestyle='solid', linewidth=2)
        evolving_union_line = plt.Line2D([], [], color='k', linestyle='dotted', linewidth=2)
        bias_line = plt.Line2D([], [], color='red', linestyle='solid', linewidth=2)
        legend = ax.legend(
            [tuple(lines), 
             evolving_union_line,
             stationary_line,
             bias_line],
            ['evolving outlines',
             'evolving outlines union',
             'stationary outline',
             'bias (evolving − stationary)'],
            handlelength=3,
            handler_map={tuple: HandlerTuple(ndivide=None, pad=0)},
            fontsize=12,
            loc='lower center'
        )

    # Format axes
    ax.xaxis.set_major_formatter(year_interval_formatter())
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))  # Quarter year ticks (Jan, Apr, Jul, Oct)

    # Set x and y axes limit
    ax.set_xlim(cyc_dates['cyc_start_datetimes'].iloc[0],
        (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2)))
    ax.set_ylim(y_min, y_max)    

    # Handle y-axis labels
    row = plot_positions[idx][0]
    col = plot_positions[idx][1]
    if (col == 1 and row == 0) or (col == 0 and (row == 1 or row == 2)):  # First dV plot (0,1) and left column of rows 1 and 2
        ax.set_ylabel('cumulative $dV$ [km$^3$]', fontsize=12)
    else:  # All other plots
        ax.set_yticklabels([])
        
    # Handle x-axis labels
    if row == 2:  # Bottom row
        ax.set_xlabel('Year', fontsize=12)
    else:  # Top row
        ax.set_xticklabels([])
        ax.set_xlabel('')
    
    # Get the display name from label_configs, fallback to lake_name if not found
    display_name = label_configs.get(lake_name, {}).get('display', lake_name)

    # Use display_name instead of lake_name for the title
    ax.set_title(display_name, fontsize=16)

# Plot combined data in the last position
last_ax = axes[-1]

# Combine all dataframes by summing values for each timestamp
combined_evolving = pd.concat(evolving_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
combined_stationary = pd.concat(stationary_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()
combined_union = pd.concat(evolving_union_geom_calcs_dfs).groupby('mid_pt_datetime').sum().reset_index()

dates = mdates.date2num(combined_evolving['mid_pt_datetime'])

# Plot stationary outline
stationary_cumsum = np.cumsum(np.divide(combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
last_ax.plot(dates, stationary_cumsum, color=stationary_color, label='Stationary', linewidth=2)
last_ax.scatter(dates, stationary_cumsum, color=stationary_color, s=5)

# Plot evolving outlines (multi-colored line)
evolving_cumsum = np.cumsum(np.divide(combined_evolving['evolving_outlines_dV_corr (m^3)'], 1e9))
points = np.array([dates, evolving_cumsum]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
lc = LineCollection(segments, cmap=cmap, norm=norm, linestyle='solid')
lc.set_array(dates)
lc.set_linewidth(2)
last_ax.add_collection(lc)
last_ax.scatter(dates, evolving_cumsum, c=dates, cmap=cmap, norm=norm, s=9)

# Plot bias
bias_cumsum = np.cumsum(np.divide(
    combined_evolving['evolving_outlines_dV_corr (m^3)'] - 
    combined_stationary['stationary_outline_dV_corr (m^3)'], 1e9))
last_ax.plot(dates, bias_cumsum, color='r', label='Bias', linewidth=2)
last_ax.scatter(dates, bias_cumsum, color='r', s=5)

# Plot evolving outlines union
union_cumsum = np.cumsum(np.divide(combined_union['stationary_outline_dV_corr (m^3)'], 1e9))
last_ax.plot(dates, union_cumsum, color='k', linestyle='dotted', label='Union', linewidth=2)
last_ax.scatter(dates, union_cumsum, color='k', s=5)

# Set axes limits and format
last_ax.set_xlim(cyc_dates['cyc_start_datetimes'].iloc[0],
                 (cyc_dates['cyc_end_datetimes'].iloc[-1] - datetime.timedelta(days=2)))
last_ax.set_ylim(y_min, y_max)
last_ax.set_yticklabels([])
last_ax.axhline(0, color='k', linestyle='--')
last_ax.set_xlabel('Year', fontsize=12)
last_ax.xaxis.set_major_formatter(year_interval_formatter())
last_ax.xaxis.set_major_locator(mdates.YearLocator())
last_ax.xaxis.set_minor_locator(mdates.MonthLocator([1, 4, 7, 10]))

last_ax.set_title('Summed', fontsize=16)

# Add subplot annotations ('a'-'f') to all plots
char_index = 97  # ASCII value for 'a'

# Add annotation to main spatial overview panel
ax_main.text(0.02, 0.98, chr(char_index), transform=ax_main.transAxes, 
             fontsize=14, va='top', ha='left')
char_index += 1

# Add annotations to volume plots
for ax in axes:
    ax.text(0.01, 0.98, chr(char_index), transform=ax.transAxes, 
            fontsize=14, va='top', ha='left')
    char_index += 1

# Adjust layout to prevent overlapping
plt.tight_layout()

# Save and close plot
plt.savefig(OUTPUT_DIR + '/figures/FigS3_Thw_lakes_dV.jpg',
    dpi=300, bbox_inches='tight')

# Preview plot
plt.show()

In [None]:
plt.close('all')