Notebook pre-processes altimetry data sets to construct a multi-mission CryoSat-2 to ICESat-2 (2010 to present) time series.

Written 2023-05-10 by W. Sauthoff (sauthoff@mines.edu)

# Set up computing environment

In [None]:
# Install dependency not pre-installed
%pip install openpyxl --quiet

In [None]:
# Install dependency not pre-installed
%pip install ipympl --quiet

In [None]:
# Import libraries
import datetime
import earthaccess
import geopandas as gpd
import glob
from IPython.display import clear_output
import matplotlib.lines as mlines
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import os
import pandas as pd
from pyproj import CRS, Transformer
from scipy.ndimage import zoom
import xarray as xr
from shapely.geometry import MultiPolygon, Point, Polygon

# Magic functions
%matplotlib widget

# Define data and output directories dependent on home environment
# Replace with your directory file paths
if os.getenv('HOME') == '/home/jovyan':
    DATA_DIR = '/home/jovyan/data'
    # Notebook creates many output plots for QC'ing data that were stored outside the GitHub repo
    OUTPUT_DIR = '/home/jovyan/1_evolving_lakes/output/0_preprocess_data'

# Define utility functions
def combine_quadrants_by_coords(quadrant_dir, ds_name='dataset', x_dim='x', y_dim='y'):
    """
    Combine quadrant NetCDF files into a full dataset using xarray.combine_by_coords.
    Overlapping x=0 and/or y=0 rows/columns are dropped depending on quadrant index.

    Parameters:
    -----------
    quadrant_dir : str
        Directory containing the quadrant NetCDF files.
    ds_name : str
        Base name prefix of the quadrant files (e.g., 'CryoSat2_SARIn_delta_h').
    x_dim : str
        Name of the x dimension.
    y_dim : str
        Name of the y dimension.

    Returns:
    --------
    xarray.Dataset
        Combined dataset with overlaps removed.
    """

    # Find all quadrant files matching the pattern
    files = sorted(glob.glob(os.path.join(quadrant_dir, f'{ds_name}_A*.nc')))
    
    datasets = []
    for i, file in enumerate(files):
        ds = xr.open_dataset(file)

        # Drop overlap according to quadrant order
        if i == 0:
            datasets.append(ds)  # A1: keep all
        elif i == 1:
            datasets.append(ds.drop_sel({x_dim: 0}, errors='ignore'))  # A2: drop x=0
        elif i == 2:
            datasets.append(ds.drop_sel({y_dim: 0}, errors='ignore'))  # A3: drop y=0
        elif i == 3:
            datasets.append(ds.drop_sel({x_dim: 0, y_dim: 0}, errors='ignore'))  # A4: drop x=0 and y=0

    # Merge all together based on coordinates
    combined = xr.combine_by_coords(datasets)

    return combined


def ll2ps(lon, lat):
    """
    Transform coordinates from geodetic coordinates (lon, lat)
    to Antarctic Polar Stereograph coordinates (x, y)
    x, y = ll2ps(lon, lat)
    """
    if not (-180 <= lon <= 180 and -90 <= lat <= 90):
        raise ValueError("Invalid coordinates: longitude must be between -180 and 180, latitude between -90 and 90")

    try:
        crs_ll = CRS("EPSG:4326")
        crs_xy = CRS("EPSG:3031")
        ll_to_xy = Transformer.from_crs(crs_ll, crs_xy, always_xy = True)
        x, y = ll_to_xy.transform(lon, lat)
        return x, y
    except Exception as e:
        raise ValueError(f"Error transforming coordinates: {e}")

def transform_geometry(geometry):
    """
    Transform the geometry of a GeoDataFrame row from lon/lat to polar stereographic.
    Handles Points, Polygons, and MultiPolygons.
    """
    if geometry.geom_type == 'Polygon':
        exterior = [(x, y) for x, y in zip(*geometry.exterior.coords.xy)]
        transformed_exterior = [ll2ps(lon, lat) for lon, lat in exterior]
        new_polygon = Polygon(transformed_exterior)
        return new_polygon
    elif geometry.geom_type == 'Point':
        x, y = ll2ps(*geometry.coords[0])
        return Point(x, y)
    elif geometry.geom_type == 'MultiPolygon':
        new_polygons = []
        for polygon in geometry.geoms:
            exterior = [(x, y) for x, y in zip(*polygon.exterior.coords.xy)]
            transformed_exterior = [ll2ps(lon, lat) for lon, lat in exterior]
            new_polygons.append(Polygon(transformed_exterior))
        return MultiPolygon(new_polygons)
    else:
        # Add support for other geometry types as needed
        raise ValueError(f"Unsupported geometry type: {geometry.geom_type}")

# Import datasets

## Import previously identified subglacial lake stationary outlines

In [None]:
# Import stationary subglacial lake outlines
stationary_lakes_gdf = gpd.read_file('output/lake_outlines/stationary_outlines/stationary_outlines_gdf.geojson')

## Import CryoSat-2 SARIn gridded dh data

In [None]:
# Import Smith and Sauthoff, 2025 (CryoSat-2 SARIn Height Change and Reference DEM for Antarctica)
# https://doi.org/10.5281/zenodo.14963550
CryoSat2_SARIn = combine_quadrants_by_coords(DATA_DIR, ds_name='CryoSat2_SARIn_delta_h')
CryoSat2_SARIn

In [None]:
# View time data variable
CryoSat2_SARIn['time']

In [None]:
# Remove time slices that occur during the ICESat-2 era that will not be used 
# to conserve memory when loaded for data analysis

# end_date includes one quarter of overlapping data with ICESat-2 time series
# to allow for cyc-to-cyc differencing to remove datum from delta_h to create cycle-to-cycle dh
end_date = '2019-01-01T00:00:00.000000000'

CryoSat2_SARIn = CryoSat2_SARIn.sel(time=slice(None, end_date))

# Preview temporally subset data set's time variable
CryoSat2_SARIn['time']

## Import the ATL15 gridded dh data

In [None]:
# Find ICESat-2 ATL15 v004 data granules
results = earthaccess.search_data(
    doi='10.5067/ATLAS/ATL15.004',
    bounding_box=(180, -90, -180, -60),  # (lower_left_lon, lower_left_lat , upper_right_lon, upper_right_lat))
    cloud_hosted=True,
)

In [None]:
# Open data granules as s3 files to stream
files = earthaccess.open(results)
files

In [None]:
# After viewing files, index the files you wish to open
print(files[11])

In [None]:
# Open each file, which are quadrants in polar stereographic coordinations 
# around the Geographic South Pole
ATL15_A1 = xr.open_dataset(files[11], group='delta_h')

In [None]:
# Add datasets attributes
ATL15_A1.attrs['identifier_product_DOI'] = 'doi:10.5067/ATLAS/ATL15.004'
ATL15_A1.attrs['shortName'] = 'ATL15'

# View data set
ATL15_A1

# Satellite mission cycle datetimes

In [None]:
CryoSat2_SARIn

In [None]:
ATL15_A1

In [None]:
# Store satellite cycle dates for later use

# Define data sets being used
dataset1 = CryoSat2_SARIn
dataset2 = ATL15_A1

# Create empty lists to store data
cyc_start_datetimes = []
cyc_end_datetimes = []
dataset = []

# CryoSat-2 data
if dataset1.attrs['identifier_product_DOI'] == 'doi:10.5281/zenodo.14963550':
    for idx in range(len(dataset1.delta_h) - 1):  # Less one time step that is used for differencing to get dh values for the last CS2 time step
        cyc_start_datetime = dataset1.time.values[idx]
        if idx < len(dataset1.delta_h)-1:
            cyc_end_datetime = dataset1.time.values[idx+1]
        else:
            # For last cycle, use same length as previous cycle
            last_cycle_length = dataset1.time.values[idx] - dataset1.time.values[idx-1]
            cyc_end_datetime = cyc_start_datetime + last_cycle_length

        cyc_start_datetimes += [cyc_start_datetime]
        cyc_end_datetimes += [cyc_end_datetime]
        dataset += ['CryoSat2_SARIn']

# ICESat-2 data        
if dataset2.attrs['identifier_product_DOI'] == 'doi:10.5067/ATLAS/ATL15.004':
    for idx in range(len(dataset2.delta_h)):
        cyc_start_datetime = dataset2.time.values[idx]
        if idx < len(dataset2.delta_h)-1:
            cyc_end_datetime = dataset2.time.values[idx+1]
        else:
            # For last cycle, use same length as previous cycle
            last_cycle_length = dataset2.time.values[idx] - dataset2.time.values[idx-1]
            cyc_end_datetime = cyc_start_datetime + last_cycle_length

        cyc_start_datetimes += [cyc_start_datetime]
        cyc_end_datetimes += [cyc_end_datetime]
        dataset += ['ICESat2_ATL15']

# Generate list of height change (dh) mid-point datetimes
mid_pt_datetimes = []
for i in range(len(cyc_start_datetimes) - 1):
    mid_pt_date = cyc_start_datetimes[i] + 0.5 * (cyc_end_datetimes[i+1] - cyc_start_datetimes[i])
    mid_pt_datetimes.append(mid_pt_date)

# Add a NaT for the last entry to keep lengths aligned
mid_pt_datetimes.append(np.datetime64('NaT'))
mid_pt_datetimes = np.array(mid_pt_datetimes)

# Concatenate list into pandas dataframe
cyc_dates = pd.DataFrame({'cyc_start_datetimes': cyc_start_datetimes, 
                          'cyc_end_datetimes': cyc_end_datetimes, 
                          'mid_pt_datetimes': mid_pt_datetimes, 
                          'dataset': dataset})
cyc_dates

In [None]:
# Store cycle dates list as csv for future use
cyc_dates.to_csv('output/cycle_dates.csv', index=False)

In [None]:
# Import cyc_dates to ensure writing worked properly 
cyc_dates = pd.read_csv('output/cycle_dates.csv', parse_dates=['cyc_start_datetimes', 'cyc_end_datetimes', 'mid_pt_datetimes'])

# View dates
cyc_dates.head()

# CryoSat-2 SARIn mode mask

In [None]:
# CryoSat-2 Geographical Mode Mask
# https://earth.esa.int/eogateway/instruments/siral/geographical-mode-mask
# Downloaded kml files of each version during pre-ICEsat-2 era of CryoSat-2 missin (version 3.1 to 3.9)
# and coverted to geojson files using terminal commands, e.g.:
# !ogr2ogr -f GeoJSON Cryosat2_mode_mask_v3.1.geojson Cryosat2_mode_mask_v3.1.kml

In [None]:
# Initialize an empty dictionary to store the GeoDataFrames
gdfs = {}

# Loop over the specified range
for idx in range(1, 10):  # Only doing up to version 9 because that temporally covers the pre-ICESat-2 era
    # Construct the file path
    file_path = f'input/CryoSat2_mode_masks/Cryosat2_mode_mask_v3.{idx}.geojson'
    # Read the GeoJSON file and store it in the dictionary with a key corresponding to the current index
    gdfs[f'gdf_3.{idx}'] = gpd.read_file(file_path)
    
# View head of first gdf to ensure readin worked properly
gdfs['gdf_3.1'].head()

In [None]:
# Names to filter on
descrip_1 = 'LRM over Antarctica ice sheet'  # Interior Antarctica LRM limit
descrip_2 = 'SARIn in Antarctica'  # Coastal Antarctic SARIn mode limit

# Ensure geodataframes are not already transformed to EPSG:3031
if gdfs['gdf_3.1'].crs != 'EPSG:3031':
    for key, gdf in gdfs.items():
        # Create a boolean mask for filtering
        mask = gdf['description'].isin([descrip_1, descrip_2])

        # Apply the mask using .loc to filter rows
        gdf_filtered = gdf.loc[mask].copy()  # Using .copy() to explicitly make a copy

        # Apply the transformation to the geometry of the filtered GeoDataFrame
        gdf_filtered['geometry'] = gdf_filtered['geometry'].apply(transform_geometry)

        # Set the new CRS for the filtered GeoDataFrame to EPSG:3031
        gdf_filtered.set_crs("EPSG:3031", inplace=True, allow_override=True)

        # Update the dictionary with the filtered and transformed GeoDataFrame
        gdfs[key] = gdf_filtered
    
# View head of first gdf to ensure transformation worked properly
gdfs['gdf_3.1'].head()

In [None]:
# Plot all versions of CryoSat-2 mode mask to see how it's changed

# Magic function
%matplotlib inline

# Prepare a list of colors, enough to cover the number of GeoDataFrames
colors = ['red', 'green', 'blue', 'orange', 'purple', 'brown', 'pink', 'gray', 'cyan']

# Create a new figure and axis for plotting
fig, ax = plt.subplots()

# Initialize a list to store custom legend entries
legend_patches = []

# Iterate over each GeoDataFrame in the dictionary
for (key, gdf), color in zip(gdfs.items(), colors):
    # Plot each GeoDataFrame with a unique color
    gdf.plot(ax=ax, edgecolor=color, facecolor='none')

    # Create a patch for the legend
    legend_patches.append(mpatches.Patch(color=color, label=key))

# Overlay previously identified lake locations 
stationary_lakes_gdf.boundary.plot(ax=ax, color='blue')

# Add a custom legend for the layers and stationary lakes
legend_patches.append(mpatches.Patch(color='blue', label='stationary lakes'))

# Add the custom legend to the plot
ax.legend(handles=legend_patches)

# Show the plot
plt.show()

In [None]:
# LRM mode limit has migrated during the CryoSat-2 era before the ICESat-2 era (2010-2018)
# We can use area of the LRM mode to find when the mode mask shifted
# Iterate over each GeoDataFrame in the dictionary
for key, gdf in gdfs.items():
    
    # Calculate the area of each geometry and assign it to a new column 'area'
    # Planar area, but we are only using to look at relative areas, so geodesic area isn't necessary here
    gdf['area'] = gdf.geometry.area
    
    # Update the dictionary with the modified GeoDataFrame
    gdfs[key] = gdf

In [None]:
# Check if running in an interactive environment like Jupyter Notebook
# and try to import the display function from IPython.display
try:
    from IPython.display import display
    interactive_environment = True
except ImportError:
    interactive_environment = False
    
# The description to filter by
filter_description = 'LRM over Antarctica ice sheet'

# Iterate over each GeoDataFrame in the dictionary
for key, gdf in gdfs.items():
    print(f"GeoDataFrame Key: {key} - Rows with Description: '{filter_description}'")
    
    # Filter the GeoDataFrame based on the 'description' column
    filtered_gdf = gdf[gdf['description'] == filter_description]
    
    # Check if the filtered GeoDataFrame is not empty
    if not filtered_gdf.empty:
        # If in an interactive environment, use display for a nicer format
        if interactive_environment:
            display(filtered_gdf['area'])
        else:
            print(filtered_gdf['area'])
    else:
        print("No rows match the specified description.")
    print("\n")  # Add a newline for better separation

According to the geodataframes of SARIn mode masks, version 3.6 (October 2014 according to https://earth.esa.int/eogateway/instruments/siral/geographical-mode-mask) is when the LRM mask shrank to smaller area, adding a bit more SARIn coverage in the continental interior of Anartica.

In [None]:
# Requires larger 64 GB server instance

# Plot CS2 counts with the SARIn mode mask

os.makedirs(OUTPUT_DIR + '/CS2_counts', exist_ok=True)

# Set x, y min, max bounds for plotting
x_min=CryoSat2_SARIn['x'].min()
x_max=CryoSat2_SARIn['x'].max()
y_min=CryoSat2_SARIn['y'].min()
y_max=CryoSat2_SARIn['y'].max()

# Get total number of time steps for status tracking
time_values = CryoSat2_SARIn['time'].values
total_time_steps = len(time_values)

for i, time_value in enumerate(time_values, 1):
    # Print status update
    print(f"Plotting time step {i} of {total_time_steps}: {time_value}")
    
    fig, ax = plt.subplots(figsize=(10,10))

    # Check if the time value is less than 2014.75
    if time_value < np.datetime64('2014-10-01T18:00:00.000000000'):
        # Access the part of the dataset corresponding to this time
        # This might involve selecting data based on the 'time' coordinate
        selected_data = CryoSat2_SARIn.sel(time=time_value)
        m = ax.imshow(selected_data['data_count'][:,:], 
        extent=[x_min, x_max, y_min, y_max],
        cmap='viridis', 
        vmax=1,  # Set colormapping max to 1 to highlight data-poor regions
        origin='lower')
    
        # Plotting CS2 mode mask polygons from version 3.1 for time periods during version 3.1 to 3.5 since they don't change
        gdfs['gdf_3.1'].plot(ax=ax, edgecolor='red', facecolor='none', zorder=2, label='SARIn mode mask 3.1')
        # Plotting inland buffer to show how far data_counts extends inland of CS2 SARIn mode mask boundary
        # to include edge case lakes Byrd_s1, Foundation_2, and Mac3 when including CS2 data into evolving outline search
        gdfs['gdf_3.1'].buffer(-15e3).plot(ax=ax, edgecolor='purple', facecolor='none', zorder=2, label='SARIn mode mask 3.1 - 15 km inland buffer')
        gdfs['gdf_3.6'].plot(ax=ax, edgecolor='blue', facecolor='none', zorder=2, label='SARIn mode mask 3.6')
        
        # Plot subglacial lake outlines
        stationary_lakes_gdf.plot(ax=ax, edgecolor='blue', facecolor='none', zorder=3, label='active subglacial lake')

    elif time_value >= np.datetime64('2014-10-01T18:00:00.000000000'):
        # Access the part of the dataset corresponding to this time
        # This might involve selecting data based on the 'time' coordinate
        selected_data = CryoSat2_SARIn.sel(time=time_value)
        m = ax.imshow(selected_data['data_count'][:,:], 
        extent=[x_min, x_max, y_min, y_max],
        cmap='viridis', 
        vmax=1,  # Set colormapping max to 1 to highlight data-poor regions
        origin='lower')
    
        # Plotting CS2 mode mask polygons from version 3.6 for time periods during version 3.6 to 3.9 since they don't change
        gdfs['gdf_3.1'].plot(ax=ax, edgecolor='red', facecolor='none', zorder=2, label='SARIn mode mask 3.1')
        gdfs['gdf_3.6'].plot(ax=ax, edgecolor='blue', facecolor='none', zorder=2, label='SARIn mode mask 3.6')
        gdfs['gdf_3.6'].buffer(-15e3).plot(ax=ax, edgecolor='purple', facecolor='none', zorder=2, label='SARIn mode mask 3.1 - 15 km inland buffer')

        # Plot subglacial lake outlines
        stationary_lakes_gdf.plot(ax=ax, edgecolor='blue', facecolor='none', zorder=3, label='active subglacial lake')
        
    else:
        print('time_value doesn\'t match')

    # Add colorbar
    divider = make_axes_locatable(ax)
    cax = divider.append_axes('right', size='5%', pad=0.2)
    cbar = fig.colorbar(m, cax=cax, extend='max')
    cbar.set_label('count')
    
    # Creating custom legend entries as lines
    line_3_1 = mlines.Line2D([], [], color='red', linewidth=2, label='v3.1')
    line_3_6 = mlines.Line2D([], [], color='blue', linewidth=2, label='v3.6')

    # Adding the custom legend to the plot with the created lines
    ax.legend(handles=[line_3_1, line_3_6], title='CryoSat-2 SARIn mode mask')

    # Add title
    ax.set_title('CryoSat-2 SARIn counts quarter starting {}'.format(time_value))
    
    # Save and close fig
    plt.savefig(OUTPUT_DIR + '/CS2_counts/CS2_counts_{}.png'.format(time_value))
    plt.close()

    # Clear output
    clear_output(wait=True)

print(f"Completed plotting all {total_time_steps} time steps.")

Examining data_count plots at each time slice with SARIn mode boundaries saved in output directory, we observe that CryoSat-2's SARIn mode expanded inland starting 2013.75 and not the release date on mode mask version 3.6, October 2014 (2014.75).

In [None]:
# Read the CSV with date parsing
cyc_dates = pd.read_csv('output/cycle_dates.csv', parse_dates=['cyc_start_datetimes', 'cyc_end_datetimes'])

# Add a new "note" column with default empty strings
cyc_dates['note'] = ''

# Add the specific note to the matching row
target_date = pd.Timestamp('2013-10-01T18:00:00.000000000')
cyc_dates.loc[cyc_dates['cyc_start_datetimes'] == target_date, 'note'] = 'CryoSat-2 SARIn mode mask expands'

# Save it back to CSV
cyc_dates.to_csv('output/cycle_dates.csv', index=False)

# Pre-process CS2 SARIn mode mask for plotting in Fig. 1

In [None]:
# CS2 graphical mode mask in versions 3.6 and later of the original kml have line to pole
gdfs['gdf_3.6'].boundary.plot()
plt.show()

In [None]:
# Investigate aberrant line to near pole
# Extract vertices of polygons
points_list = []

for polygon in gdfs['gdf_3.6'][gdfs['gdf_3.6']['description'] == 'LRM over Antarctica ice sheet'].geometry:
    if polygon.geom_type == 'Polygon':
        exterior_coords = polygon.exterior.coords
        points_list.extend([Point(c) for c in exterior_coords])
    elif polygon.geom_type == 'MultiPolygon':
        for part in polygon:
            exterior_coords = part.exterior.coords
            points_list.extend([Point(c) for c in exterior_coords])

# Create a new GeoDataFrame with these points
points_gdf = gpd.GeoDataFrame(geometry=points_list)

# Plot points
fig, ax = plt.subplots()
points_gdf.plot(ax=ax, marker='o', color='red', markersize=5)
plt.show()

# Delete temp gdf
del points_gdf

In [None]:
# Create functions to remove point closest to the a specified of a polygon
def remove_closest_point_to_centroid(polygon):
    # Calculate the centroid of the polygon
    point_of_interest = Point(0,0)
    # Extract the points (coordinates) of the polygon's exterior
    coords = list(polygon.exterior.coords)
    
    # Find the index of the point closest to the centroid
    closest_point_index = np.argmin([Point(c).distance(point_of_interest) for c in coords[:-1]]) # Exclude the last point because it's the same as the first
    
    # Remove the closest point
    new_coords = [coords[i] for i in range(len(coords)-1) if i != closest_point_index] # Exclude the last repeating point for accurate removal
    
    # Create a new polygon from the remaining points
    new_polygon = Polygon(new_coords)
    
    return new_polygon

In [None]:
# Apply the function to each polygon in the GeoDataFrame
# gdfs['gdf_3.6']['geometry'] = gdfs['gdf_3.6']['geometry'].apply(remove_closest_point_to_centroid)
gdfs['gdf_3.6']['modified_geometry'] = gdfs['gdf_3.6']['geometry'].apply(remove_closest_point_to_centroid)

# If you want to replace the original geometry with the modified one
gdfs['gdf_3.6']['geometry'] = gdfs['gdf_3.6']['modified_geometry']
del gdfs['gdf_3.6']['modified_geometry']

# # Seems you need to run twice to work on all polygons
# Apply the function to each polygon in the GeoDataFrame
gdfs['gdf_3.6']['modified_geometry'] = gdfs['gdf_3.6']['geometry'].apply(remove_closest_point_to_centroid)

# If you want to replace the original geometry with the modified one
gdfs['gdf_3.6']['geometry'] = gdfs['gdf_3.6']['modified_geometry']
del gdfs['gdf_3.6']['modified_geometry']

In [None]:
gdfs['gdf_3.6'].boundary.plot()
plt.show()

In [None]:
# Now to find the area difference between the two boundaries for plotting purposes
gdf_SARIn_3_1 = gdfs['gdf_3.1'][gdfs['gdf_3.1']['description'] == 'SARIn in Antarctica']
gdf_LRM_3_1 = gdfs['gdf_3.1'][gdfs['gdf_3.1']['description'] == 'LRM over Antarctica ice sheet']
gdf_SARIn_3_6 = gdfs['gdf_3.6'][gdfs['gdf_3.6']['description'] == 'SARIn in Antarctica']
gdf_LRM_3_6 = gdfs['gdf_3.6'][gdfs['gdf_3.6']['description'] == 'LRM over Antarctica ice sheet']

# Calculate the difference (area between the circles)
difference_area_3_1 = gdf_SARIn_3_1['geometry'].iloc[0].difference(gdf_LRM_3_1['geometry'].iloc[0])
difference_area_3_6 = gdf_SARIn_3_6['geometry'].iloc[0].difference(gdf_LRM_3_6['geometry'].iloc[0])

# Compute the symmetric difference of the two SARIn masks to show area that SARIn mode increased
symmetric_diff = difference_area_3_1.symmetric_difference(difference_area_3_6)

# Delete variables no longer needed
del gdf_LRM_3_1 
del gdf_LRM_3_6

# Create a new GeoDataFrame with the difference area
gdf_SARIn_3_1 = gpd.GeoDataFrame([{'geometry': difference_area_3_1}], crs='EPSG:3031')
gdf_SARIn_3_6 = gpd.GeoDataFrame([{'geometry': difference_area_3_6}], crs='EPSG:3031')

# Create a new GeoDataFrame with the symmetric difference
gdf_SARIn_3_1_3_6_diff = gpd.GeoDataFrame([{'geometry': symmetric_diff}], crs='EPSG:3031')

In [None]:
# Plot to ensure worked
fix, ax = plt.subplots()
gdf_SARIn_3_1.plot(ax=ax, alpha=0.5, color='blue')
gdf_SARIn_3_6.plot(ax=ax, alpha=0.25, facecolor='none', edgecolor='k', hatch=r'\\')
gdf_SARIn_3_1_3_6_diff.plot(ax=ax, alpha=0.5, color='red')
plt.show()

In [None]:
# Difference of two mode masks has many small polygons 
gdf_SARIn_3_1_3_6_diff.plot()
plt.show()

In [None]:
# Let's remove those to get a cleaner looking plot
# Define function to do the job
def keep_largest_polygon(geometry):
    if isinstance(geometry, MultiPolygon):
        # Use the .geoms attribute to explicitly iterate over polygons in a MultiPolygon
        largest_polygon = max(geometry.geoms, key=lambda p: p.area)
        return largest_polygon
    else:
        # If the geometry is not a MultiPolygon, just return it as is
        return geometry

# Apply the function to each geometry in the GeoDataFrame
gdf_SARIn_3_1_3_6_diff['geometry'] = gdf_SARIn_3_1_3_6_diff['geometry'].apply(keep_largest_polygon)

In [None]:
gdf_SARIn_3_1_3_6_diff.plot()
plt.show()

In [None]:
# Plot to ensure worked
fix, ax = plt.subplots()
gdf_SARIn_3_1.plot(ax=ax, alpha=0.5, color='blue')
gdf_SARIn_3_6.plot(ax=ax, alpha=0.25, facecolor='none', edgecolor='k', hatch=r'\\')
gdf_SARIn_3_1_3_6_diff.plot(ax=ax, alpha=0.5, color='red')
plt.show()

In [None]:
# Export the geodataframes to use in Fig. 1
gdf_SARIn_3_1.to_file('output/CryoSat2_SARIn_mode_masks/gdf_SARIn_3_1.geojson', driver='GeoJSON')
gdf_SARIn_3_1_3_6_diff.to_file('output/CryoSat2_SARIn_mode_masks/gdf_SARIn_3_1_3_6_diff.geojson', driver='GeoJSON')
gdf_SARIn_3_6.to_file('output/CryoSat2_SARIn_mode_masks/gdf_SARIn_3_6.geojson', driver='GeoJSON')