In [1]:
# Code to Fig. S1 of Sauthoff and others, 2024
# This code run continental-scale operations on multiple datasets and
# requires a 64 GB server or local memory
#
# Written 2023-11-11 by W. Sauthoff (wsauthoff.github.io)

In [3]:
# Import libraries
import datetime
# import earthaccess
# import geopandas as gpd
# from IPython.display import clear_output
import matplotlib
# import matplotlib.cm as cm
# from matplotlib.collections import LineCollection
import matplotlib.colors as colors
import matplotlib.dates as mdates
# from matplotlib.patches import Rectangle
# from matplotlib.legend_handler import HandlerTuple
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
# from mpl_toolkits.axes_grid1 import make_axes_locatable
# from mpl_toolkits.axes_grid1.inset_locator import inset_axes
# import numpy as np
import os
# from os import path
# from pyproj import CRS, Transformer
import rioxarray
# from rioxarray.exceptions import NoDataInBounds
# from shapely.geometry import box, Polygon
# from shapely.ops import unary_union
# from skimage import measure
# import xarray as xr

# from IPython.display import Audio, display
# def play_sound():
#     display(Audio(url="http://codeskulptor-demos.commondatastorage.googleapis.com/pang/pop.mp3", autoplay=True))

# Define data directories dependent on home environment
# Replace with your directory file paths
if os.getenv('HOME') == '/home/jovyan':
    DATA_DIR = '/home/jovyan/data'
    SCRIPT_DIR = '/home/jovyan/repos_my/script_dir'
    OUTPUT_DIR = '/home/jovyan/1_outlines_candidates/output/Fig2_lake_reexamination.ipynb'

# # Define constants and coordinate transforms for the geodesic area calculation
# CRS_LL = "EPSG:4326" # wgs84 in lon,lat
# GEOD = CRS(CRS_LL).get_geod() # geod object for calculating geodesic area on defined ellipsoid
# CRS_XY = "EPSG:3031" # Antarctic Polar Stereographic in x, y
# XY_TO_LL = Transformer.from_crs(CRS_XY, CRS_LL, always_xy = True) # make coord transformer

# # Change default font to increase font size
# plt.rcParams.update({'font.size': 8})

# Functions

# Import datasets

In [4]:
# Import subglacial lake outlines 
exec(open(SCRIPT_DIR + '/Sauthoff2024_outlines.py').read())

In [5]:
# Import MODIS Mosaic of Antarctica (MOA) surface imagery
# https://nsidc.org/data/nsidc-0730/versions/1
# Relocate to data_dir
# Open into an xarray.DataArray
# moa_lowres = DATA_DIR + '/surface_imagery/MODIS_MOA/2014/moa750_2014_hp1_v01.tif' 
# moa_lowres_da = rioxarray.open_rasterio(moa_lowres)

moa_highres = DATA_DIR + '/surface_imagery/MODIS_MOA/2014/moa125_2014_hp1_v01.tif' 
moa_highres_da = rioxarray.open_rasterio(moa_highres)



In [6]:
cyc_dates = pd.read_csv('output/cycle_dates.csv', parse_dates=['cyc_start_dates', 'midcyc_dates', 'cyc_end_dates'])

# Fig. 2

In [None]:
fig, ax = plt.subplots(3,4, figsize=(15,12))

# Define colors and linestyles that will be reused and create lines for legend
S09_color = 'cyan'
SF18_color  = 'darkcyan'
S09_linestyle=(0, (1, 2))
SF18_linestyle=(0, (1, 1))
Smith2009 = plt.Line2D((0, 1), (0, 0), color=S09_color, linestyle=S09_linestyle, linewidth=2)
SiegfriedFricker2018 = plt.Line2D((0, 1), (0, 0), color=SF18_color, linestyle=SF18_linestyle, linewidth=2)

# Panel - evolving outlines

# Plot static and evolving outlines onto MOA surface imagery
# Subset dataset to region of interest for plotting
S09_lake_gdf = Smith2009_outlines[Smith2009_outlines['Name'] == 'Whillans_4']
SF18_lake_gdf = SiegfriedFricker2018_outlines[SiegfriedFricker2018_outlines['name'] == 'ConwaySubglacialLake']
outlines_gdf = gpd.read_file('outlines/evolving_outlines/{}.geojson'.format(SF18_lake_gdf['name'].values[0]))
x_min, y_min, x_max, y_max = outlines_gdf.total_bounds
buffer_frac = 0.2
x_buffer = abs(x_max-x_min)*buffer_frac
y_buffer = abs(y_max-y_min)*buffer_frac
mask_x = (moa_highres_da.x >= x_min-x_buffer) & (moa_highres_da.x <= x_max+x_buffer)
mask_y = (moa_highres_da.y >= y_min-y_buffer) & (moa_highres_da.y <= y_max+y_buffer)
moa_highres_da_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
ax[0,0].imshow(moa_highres_da_subset[0,:,:], cmap="gray", clim=[14000, 17000], extent=[x_min-x_buffer, x_max+x_buffer, y_min-y_buffer, y_max+y_buffer])

# Pick colormap and make continuous cmap discrete for evolving outlines
colormap = 'plasma'
continuous_cmap = matplotlib.colormaps[colormap]
discrete_cmap = colors.ListedColormap(continuous_cmap(np.linspace(0, 1, len(cyc_dates['midcyc_dates'])-1)))

# Norm to time variable
norm = plt.Normalize(mdates.date2num(cyc_dates['midcyc_dates'].iloc[0]), 
                     mdates.date2num(cyc_dates['midcyc_dates'].iloc[-1]))

# Use for loop to store each time slice as line segment to use in legend
# And plot each outline in the geopandas geodataframe and color by date
lines = []  # list of lines to be used for the legend
for idx, dt in enumerate(cyc_dates['midcyc_dates'], 0):
    x = 1; y = 1
    line, = ax[0,0].plot(x, y, color=discrete_cmap(norm(mdates.date2num(cyc_dates['midcyc_dates'][idx]))), linewidth=3)
    lines.append(line)
    
    # Filter rows that match the current time slice
    outlines_gdf_dt_sub = outlines_gdf[outlines_gdf['midcyc_datetime'] == dt]

    # Plotting the subset
    outlines_gdf_dt_sub.boundary.plot(ax=ax[0,0], color=discrete_cmap(norm(mdates.date2num(cyc_dates['midcyc_dates'][idx]))), linewidth=0.75)

# Set axes limit
ax[0,0].set(xlim=(x_min-x_buffer, x_max+x_buffer), ylim=(y_min-y_buffer, y_max+y_buffer))

# Panel - da/dt
ax[0,1].axhline(np.divide(S09_lake_gdf['Area (m^2)'], 1e6).values, color=S09_color, linestyle=(0, (1, 2)), linewidth=3)
ax[0,1].axhline(np.divide(SF18_lake_gdf['area (m^2)'], 1e6).values, color=SF18_color, linestyle=(0, (1, 2)), linewidth=3)

# Group by the 'date' column and sum the 'area'
grouped_data = list(gdf_subset.groupby('datetime')['area (m^2)'].sum())
grouped_data_dates = list(pd.unique(gdf_subset['datetime']))

# Plot multi-colored line
x=mdates.date2num(grouped_data_dates)
y=np.divide(grouped_data, 1e6)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
# Create a continuous norm'ed to map from data points to colors
norm = plt.Normalize(x.min(), x.max())
lc = LineCollection(segments, cmap=colormap, norm=norm, linestyle=(0,(1,1)))
# Set the values used for colormapping
lc.set_array(x)
lc.set_linewidth(2)
line = ax[0,1].add_collection(lc)

locator = mdates.AutoDateLocator(minticks=1, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)
ax[0,1].xaxis.set_major_locator(locator)
ax[0,1].xaxis.set_major_formatter(formatter)
# min_area = min(np.divide(sum(lake_S09.area), 1e6), 
#     # np.divide(sum(lake_SF18.area), 1e6), 
#     min(np.divide(areas_var, 1e6)))
# max_area = max(np.divide(sum(lake_S09.area), 1e6), 
#     # np.divide(sum(lake_SF18.area), 1e6), 
#     max(np.divide(areas_var,1e6)))

ax[0,1].scatter(x, y, c=x, cmap=colormap)

ax[0,1].set(xlim=(midcyc_dates[0], midcyc_dates[-1]),
           # ylim=((min_area - (max_area - min_area)*0.1), 
           #       (max_area + (max_area - min_area)*0.1))
          )
ax[0,1].set_ylabel('wetted area [km$^2$]', size=17.5, labelpad=8)
ax[0,1].set_yticks(np.arange(0, 150, 25))



# Panel C - dh/dt
ax[0,2].axhline(0, color='k', linestyle='solid', linewidth=1)
# ax[2].axhline(np.divide(ROI['area (m^2)'], 1e6).values, color=S09_color, linestyle=(0, (1, 2)), linewidth=3)

# Plot static outline time series
ax[0,2].plot(midcyc_dates, np.cumsum(lkavgdhdt_S09), color=S09_color, linestyle=(0, (1, 2)), linewidth=3)

# Group by the 'date' column and sum the 'area'
grouped_data = list(gdf_subset.groupby('datetime')['dh (m)'].sum())
grouped_data_dates = list(pd.unique(gdf_subset['datetime']))

# Plot multi-colored line
cmap_str='plasma'
x=mdates.date2num(grouped_data_dates)
y=np.cumsum(grouped_data)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
# Create a continuous norm to map from data points to colors
norm = plt.Normalize(x.min(), x.max())
lc = LineCollection(segments, cmap=cmap_str, norm=norm, linestyle=(0,(1,1)))
# Set the values used for colormapping
lc.set_array(x)
lc.set_linewidth(2)
line = ax[0,2].add_collection(lc)

locator = mdates.AutoDateLocator(minticks=1, maxticks=7)
formatter = mdates.ConciseDateFormatter(locator)
ax[0,2].xaxis.set_major_locator(locator)
ax[0,2].xaxis.set_major_formatter(formatter)
# min_area = min(np.divide(sum(lake_S09.area), 1e6), 
#     # np.divide(sum(lake_SF18.area), 1e6), 
#     min(np.divide(areas_var, 1e6)))
# max_area = max(np.divide(sum(lake_S09.area), 1e6), 
#     # np.divide(sum(lake_SF18.area), 1e6), 
#     max(np.divide(areas_var,1e6)))

ax[0,2].scatter(x, y, c=x, cmap=cmap_str)

ax[0,2].set(xlim=(midcyc_dates[0], midcyc_dates[-1]),
           # ylim=((min_area - (max_area - min_area)*0.1), 
           #       (max_area + (max_area - min_area)*0.1))
          )
ax[0,2].set_ylabel('cumulative\nheight change [m]', size=17.5, labelpad=8)
# ax[2].set_yticks(np.arange(0, 150, 25))



# Panel D - dv/dt
ax[0,3].axhline(0, color='k', linestyle='solid', linewidth=1)
# ax[2].axhline(np.divide(ROI['area (m^2)'], 1e6).values, color=S09_color, linestyle=(0, (1, 2)), linewidth=3)

# Plot static outline time series
ax[0,3].plot(midcyc_dates, np.divide(np.cumsum(vols_S09), 1e+9), color=S09_color, linestyle=(0, (1, 2)), linewidth=3)

# Group by the 'date' column and sum the 'area'
grouped_data = list(gdf_subset.groupby('datetime')['vol (m^3)'].sum())
grouped_data_dates = list(pd.unique(gdf_subset['datetime']))

# Calc bias and plot
S09_S24_bias = [a_i - b_i for a_i, b_i in zip(np.cumsum(grouped_data), np.cumsum(vols_S09))]
ax[0,3].plot(grouped_data_dates, np.divide(S09_S24_bias, 1e+9), color='red', linestyle='solid', linewidth=2)

# Plot multi-colored line
x=mdates.date2num(grouped_data_dates)
y=np.divide(np.cumsum(grouped_data), 1e9)
points = np.array([x, y]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)
# Create a continuous norm to map from data points to colors
norm = plt.Normalize(x.min(), x.max())
lc = LineCollection(segments, cmap=cmap_str, norm=norm, linestyle=(0,(1,1)))
# Set the values used for colormapping
lc.set_array(x)
lc.set_linewidth(2)
line = ax[0,3].add_collection(lc)









# # Plot static and evolving outlines onto MOA surface imagery
# # Subset dataset to region of interest for plotting
# # lake_gdf = SiegfriedFricker2018_outlines[SiegfriedFricker2018_outlines['name'] == 'Whillans_7']
# lake_gdf = SiegfriedFricker2018_outlines[SiegfriedFricker2018_outlines['name'] == 'Foundation_5']
# outlines_gdf = gpd.read_file('outlines/evolving_outlines/{}.geojson'.format(lake_gdf['name'].values[0]))
# x_min, y_min, x_max, y_max = outlines_gdf.total_bounds
# buffer_frac = 0.2
# x_buffer = abs(x_max-x_min)*buffer_frac
# y_buffer = abs(y_max-y_min)*buffer_frac
# mask_x = (moa_highres_da.x >= x_min-buffer) & (moa_highres_da.x <= x_max+buffer)
# mask_y = (moa_highres_da.y >= y_min-buffer) & (moa_highres_da.y <= y_max+buffer)
# moa_highres_da_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
# ax[1,0].imshow(moa_highres_da_subset[0,:,:], cmap="gray", clim=[14000, 17000], extent=[x_min-x_buffer, x_max+x_buffer, y_min-y_buffer, y_max+y_buffer])

# # Use for loop to plot each outline in the geopandas geodataframe and color by date
# for idx, dt in enumerate(cyc_dates['midcyc_dates'], 0):
#     # Filter rows that match the current time slice
#     outlines_gdf_dt_sub = outlines_gdf[outlines_gdf['midcyc_datetime'] == dt]

#     # Plotting the subset
#     outlines_gdf_dt_sub.boundary.plot(ax=ax[1,0], color=discrete_cmap(norm(mdates.date2num(cyc_dates['midcyc_dates'][idx]))), linewidth=0.75)

# # Set axes limit
# ax[1,0].set(xlim=(x_min-x_buffer, x_max+x_buffer), ylim=(y_min-y_buffer, y_max+y_buffer))

# # Plot static and evolving outlines onto MOA surface imagery
# # Subset dataset to region of interest for plotting
# lake_gdf = SiegfriedFricker2018_outlines[SiegfriedFricker2018_outlines['name'] == 'Slessor_23']
# outlines_gdf = gpd.read_file('outlines/evolving_outlines/{}.geojson'.format(lake_gdf['name'].values[0]))
# x_min, y_min, x_max, y_max = outlines_gdf.total_bounds
# buffer_frac = 0.4
# x_buffer = abs(x_max-x_min)*buffer_frac
# y_buffer = abs(y_max-y_min)*buffer_frac
# mask_x = (moa_highres_da.x >= x_min-buffer) & (moa_highres_da.x <= x_max+buffer)
# mask_y = (moa_highres_da.y >= y_min-buffer) & (moa_highres_da.y <= y_max+buffer)
# moa_highres_da_subset = moa_highres_da.where(mask_x & mask_y, drop=True)
# ax[2,0].imshow(moa_highres_da_subset[0,:,:], cmap="gray", clim=[14000, 17000], extent=[x_min-x_buffer, x_max+x_buffer, y_min-y_buffer, y_max+y_buffer])

# # Use for loop to plot each outline in the geopandas geodataframe and color by date
# for idx, dt in enumerate(cyc_dates['midcyc_dates']):
#     # Filter rows that match the current time slice
#     outlines_gdf_dt_sub = outlines_gdf[outlines_gdf['midcyc_datetime'] == dt]

#     # Plotting the subset
#     # outlines_gdf_dt_sub.boundary.plot(ax=ax[2,0], color=discrete_cmap(norm(mdates.date2num(cyc_dates['midcyc_dates'][idx]))), linewidth=0.75)
#     outlines_gdf_dt_sub.geometry.centroid.plot(ax=ax[2,0], color=discrete_cmap(norm(mdates.date2num(cyc_dates['midcyc_dates'][idx]))), linewidth=1, marker='+')

# # Set axes limit
# ax[2,0].set(xlim=(x_min-x_buffer, x_max+x_buffer), ylim=(y_min-y_buffer, y_max+y_buffer))

# for i in ax: 
#     Smith2009_outlines.boundary.plot(ax=i, edgecolor=S09_color, facecolor='none', linestyle=(0, (1, 2)), linewidth=3, alpha=1, zorder=0)
#     SiegfriedFricker2018_SF18outlines.boundary.plot(ax=i, edgecolor=SF18_color, facecolor='none', linestyle=(0, (1, 1)), linewidth=3, alpha=1, zorder=0)

# # Plot inset map
# axIns = ax.inset_axes([0.01, 0.01, 0.3, 0.3]) # [left, bottom, width, height] (fractional axes coordinates)
# axIns.set_aspect('equal')
# moa_2014_coastline.plot(ax=axIns, color='gray', edgecolor='k', linewidth=0.1, zorder=3)
# moa_2014_groundingline.plot(ax=axIns, color='ghostwhite', edgecolor='k', linewidth=0.1, zorder=3)
# axIns.axis('off')

# # Plot black rectangle to indicate location
# rect = Rectangle((x_min, y_min), (x_max-x_min), (y_max-y_min), fill=False, linewidth=2, color='k', zorder=3)
# axIns.add_artist(rect)

# Label axes
ax[2,0].set_xlabel('x [km]', size=16)
ax[1,0].set_ylabel('y [km]', size=16)

for subplot in [ax[0,0], ax[1,0], ax[2,0]]:
    # Plot previous static outline inventories
    Smith2009_outlines.boundary.plot(ax=subplot, edgecolor=S09_color, facecolor='none', linestyle=S09_linestyle, linewidth=3, alpha=1, zorder=0)
    SiegfriedFricker2018_SF18outlines.boundary.plot(ax=subplot, edgecolor=SF18_color, facecolor='none', linestyle=S18_linestyle, linewidth=3, alpha=1, zorder=0)
    
    # Change polar stereographic m to km
    km_scale = 1e3
    ticks_x = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
    subplot.xaxis.set_major_formatter(ticks_x)
    ticks_y = ticker.FuncFormatter(lambda x, pos: '{0:g}'.format(x/km_scale))
    subplot.yaxis.set_major_formatter(ticks_y)
    
plt.show()

In [None]:
Smith2009_outlines

In [None]:
Smith2009_outlines_lonlat = gpd.read_file(DATA_DIR + '/boundaries/Smith2009_subglacial_lakes/Antarctic_lakes.kml', driver='KML')
Smith2009_outlines_test = Smith2009_outlines_lonlat.to_crs(3031)
Smith2009_outlines_test

In [7]:
for idx, dt in enumerate(cyc_dates['midcyc_dates'], 0):
    print(idx, dt)

0 2010-08-17 06:45:00
1 2010-11-16 11:15:00
2 2011-02-15 15:45:00
3 2011-05-17 23:15:00
4 2011-08-17 06:45:00
5 2011-11-16 11:15:00
6 2012-02-15 15:45:00
7 2012-05-16 23:15:00
8 2012-08-16 06:45:00
9 2012-11-15 23:15:00
10 2013-02-15 15:45:00
11 2013-05-17 23:15:00
12 2013-08-17 06:45:00
13 2013-11-16 11:15:00
14 2014-02-15 15:45:00
15 2014-05-17 23:15:00
16 2014-08-17 06:45:00
17 2014-11-16 11:15:00
18 2015-02-15 15:45:00
19 2015-05-17 23:15:00
20 2015-08-17 06:45:00
21 2015-11-16 11:15:00
22 2016-02-15 15:45:00
23 2016-05-16 23:15:00
24 2016-08-16 06:45:00
25 2016-11-15 23:15:00
26 2017-02-15 15:45:00
27 2017-05-17 23:15:00
28 2017-08-17 06:45:00
29 2017-11-16 11:15:00
30 2018-02-15 15:45:00
31 2018-05-17 23:15:00
32 2018-08-17 06:45:00
33 2018-11-16 14:15:00
34 2019-02-15 21:45:00
35 2019-05-18 05:15:00
36 2019-08-17 12:45:00
37 2019-11-16 20:15:00
38 2020-02-16 03:45:00
39 2020-05-17 11:15:00
40 2020-08-16 18:45:00
41 2020-11-16 02:15:00
42 2021-02-15 09:45:00
43 2021-05-17 17:15:0

In [9]:
for idx in range(len(cyc_dates['midcyc_dates'])-1):  # Less one because cycle-to-cycle differencing reduces length by one
    print(idx, cyc_dates['midcyc_dates'][idx])

0 2010-08-17 06:45:00
1 2010-11-16 11:15:00
2 2011-02-15 15:45:00
3 2011-05-17 23:15:00
4 2011-08-17 06:45:00
5 2011-11-16 11:15:00
6 2012-02-15 15:45:00
7 2012-05-16 23:15:00
8 2012-08-16 06:45:00
9 2012-11-15 23:15:00
10 2013-02-15 15:45:00
11 2013-05-17 23:15:00
12 2013-08-17 06:45:00
13 2013-11-16 11:15:00
14 2014-02-15 15:45:00
15 2014-05-17 23:15:00
16 2014-08-17 06:45:00
17 2014-11-16 11:15:00
18 2015-02-15 15:45:00
19 2015-05-17 23:15:00
20 2015-08-17 06:45:00
21 2015-11-16 11:15:00
22 2016-02-15 15:45:00
23 2016-05-16 23:15:00
24 2016-08-16 06:45:00
25 2016-11-15 23:15:00
26 2017-02-15 15:45:00
27 2017-05-17 23:15:00
28 2017-08-17 06:45:00
29 2017-11-16 11:15:00
30 2018-02-15 15:45:00
31 2018-05-17 23:15:00
32 2018-08-17 06:45:00
33 2018-11-16 14:15:00
34 2019-02-15 21:45:00
35 2019-05-18 05:15:00
36 2019-08-17 12:45:00
37 2019-11-16 20:15:00
38 2020-02-16 03:45:00
39 2020-05-17 11:15:00
40 2020-08-16 18:45:00
41 2020-11-16 02:15:00
42 2021-02-15 09:45:00
43 2021-05-17 17:15:0

In [10]:
cyc_dates

Unnamed: 0,cyc_start_dates,midcyc_dates,cyc_end_dates
0,2010-07-02 15:00:00,2010-08-17 06:45:00,2010-10-01 22:30:00
1,2010-10-01 22:30:00,2010-11-16 11:15:00,2011-01-01 00:00:00
2,2011-01-01 00:00:00,2011-02-15 15:45:00,2011-04-02 07:30:00
3,2011-04-02 07:30:00,2011-05-17 23:15:00,2011-07-02 15:00:00
4,2011-07-02 15:00:00,2011-08-17 06:45:00,2011-10-01 22:30:00
5,2011-10-01 22:30:00,2011-11-16 11:15:00,2012-01-01 00:00:00
6,2012-01-01 00:00:00,2012-02-15 15:45:00,2012-04-01 07:30:00
7,2012-04-01 07:30:00,2012-05-16 23:15:00,2012-07-01 15:00:00
8,2012-07-01 15:00:00,2012-08-16 06:45:00,2012-09-30 22:30:00
9,2012-09-30 22:30:00,2012-11-15 23:15:00,2013-01-01 00:00:00
