In [None]:
# TODO
# Perhaps change date format to CS2_dh instead of just storing it to midcyc_dates list (or do so in repos_my/scripts/Smith_CS2.py)
# Up-down flip CS2_dh so like ATL14 (already code in repos_my/scripts/Smith_CS2.py); turn on to see if works or could try something like this: ds2_clipped = ds2_clipped.isel(y=slice(None, None, -1))
# Make time series of ice-surface actual heights to get ice thicknesses, to export GEOjson of evovling subglacial pathways

# Set up computing environment

In [1]:
# Import libraries
import datetime
import earthaccess
import geopandas as gpd
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy.ndimage import zoom
import xarray as xr

# Define data directories dependent on home environment
# Replace with your directory file paths
if os.getenv('HOME') == '/home/jovyan':
    DATA_DIR = '/home/jovyan/data'
    SCRIPT_DIR = '/home/jovyan/repos/scripts'

# Functions

# Import datasets

## Import the ATL14 DEM

In [10]:
# Log into NASA Earthdata to search for datasets
earthaccess.login()

<earthaccess.auth.Auth at 0x7ff03d387850>

In [11]:
# Find ICESat-2 ATL14 data granules
results = earthaccess.search_data(
    short_name='ATL14',
    version='003',
    cloud_hosted=True,
    bounding_box=(1, -89, -1, -89)  # (lower_left_lon, lower_left_lat , upper_right_lon, upper_right_lat))
)

Granules found: 4


In [12]:
# Open data granules as s3 files to stream
files = earthaccess.open(results)

Opening 4 granules, approx size: 10.78 GB
using provider: NSIDC_CPRD


QUEUEING TASKS | :   0%|          | 0/4 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/4 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/4 [00:00<?, ?it/s]

In [13]:
files

[<File-like object S3FileSystem, nsidc-cumulus-prod-protected/ATLAS/ATL14/003/ATL14_A3_0318_100m_003_01.nc>,
 <File-like object S3FileSystem, nsidc-cumulus-prod-protected/ATLAS/ATL14/003/ATL14_A2_0318_100m_003_01.nc>,
 <File-like object S3FileSystem, nsidc-cumulus-prod-protected/ATLAS/ATL14/003/ATL14_A4_0318_100m_003_01.nc>,
 <File-like object S3FileSystem, nsidc-cumulus-prod-protected/ATLAS/ATL14/003/ATL14_A1_0318_100m_003_01.nc>]

In [14]:
# Open each file, which are quadrants in polar stereographic coordinations around the Geographic South Pole
ATL14_A1 = xr.open_dataset(files[3])
ATL14_A2 = xr.open_dataset(files[1])
ATL14_A3 = xr.open_dataset(files[0])
ATL14_A4 = xr.open_dataset(files[2])

In [15]:
# Specify the variables to keep
variables_to_keep = ['x', 'y', 'h']

# List of xarray datasets
datasets = [ATL14_A1, ATL14_A2, ATL14_A3, ATL14_A4]

# Function to drop variables not in variables_to_keep from a dataset
def drop_unwanted_variables(dataset):
    variables_to_drop = [var for var in dataset.variables if var not in variables_to_keep]
    return dataset.drop_vars(variables_to_drop)

# Apply the function to each dataset
ATL14_A1, ATL14_A2, ATL14_A3, ATL14_A4 = [drop_unwanted_variables(ds) for ds in datasets]

In [16]:
# Use xarray concatenation to stitch two quadrants togethers
# Use xarray index selecting to occlude the duplicated x=0 vector of data
ATL14_A12 = xr.concat([ATL14_A2.isel(x=slice(0,-1)), ATL14_A1], dim="x")

In [17]:
# Delete quadrants no longer needed
del ATL14_A1, ATL14_A2

In [18]:
# Use xarray concatenation to stitch two quadrants togethers
# Use xarray index selecting to occlude the duplicated x=0 vector of data
ATL14_A34 = xr.concat([ATL14_A3.isel(x=slice(0,-1)), ATL14_A4], dim='x')

In [19]:
# Delete quadrants no longer needed
del ATL14_A3, ATL14_A4

In [20]:
# Use xarray concatenation to stitch two quadrants togethers
# Use xarray index selecting to occlude the duplicated x=0 vector of data
ATL14 = xr.concat([ATL14_A34.isel(y=slice(0,-1)), ATL14_A12], dim='y')

In [21]:
# Delete quadrants no longer needed
del ATL14_A12, ATL14_A34

In [22]:
ATL14

In [39]:
# Find ICESat-2 ATL15 r003 data granules
results = earthaccess.search_data(
    doi='10.5067/ATLAS/ATL15.003',
    # short_name='ATL15',
    # version='003',
    bounding_box=(1, -89, -1, -88),  # (lower_left_lon, lower_left_lat , upper_right_lon, upper_right_lat))
    cloud_hosted=True,
)

Granules found: 16


In [40]:
# Open data granules as s3 files to stream
files = earthaccess.open(results)
files

Opening 16 granules, approx size: 5.05 GB


QUEUEING TASKS | :   0%|          | 0/16 [00:00<?, ?it/s]

PROCESSING TASKS | :   0%|          | 0/16 [00:00<?, ?it/s]

COLLECTING RESULTS | :   0%|          | 0/16 [00:00<?, ?it/s]

[<File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A4_0318_40km_003_01.nc>,
 <File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A4_0318_10km_003_01.nc>,
 <File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A4_0318_20km_003_01.nc>,
 <File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A2_0318_01km_003_01.nc>,
 <File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A3_0318_20km_003_01.nc>,
 <File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A2_0318_20km_003_01.nc>,
 <File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A2_0318_40km_003_01.nc>,
 <File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A

In [41]:
# After viewing files, index the files you wish to open
print(files[15])
print(files[3])
print(files[9])
print(files[11])

<File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A1_0318_01km_003_01.nc>
<File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A2_0318_01km_003_01.nc>
<File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A3_0318_01km_003_01.nc>
<File-like object HTTPFileSystem, https://n5eil01u.ecs.nsidc.org/DP5/ATLAS/ATL15.003/2019.03.29/ATL15_A4_0318_01km_003_01.nc>


In [42]:
# Open each file, which are quadrants in polar stereographic coordinations around the Geographic South Pole
ATL15_A1 = xr.open_dataset(files[15], group='delta_h')
ATL15_A2 = xr.open_dataset(files[3], group='delta_h')
ATL15_A3 = xr.open_dataset(files[9], group='delta_h')
ATL15_A4 = xr.open_dataset(files[11], group='delta_h')

In [43]:
# Specify the variables to keep
variables_to_keep = ['time', 'y', 'x', 'delta_h']

# List of xarray datasets
datasets = [ATL15_A1, ATL15_A2, ATL15_A3, ATL15_A4]

# Function to drop variables not in variables_to_keep from a dataset
def drop_unwanted_variables(dataset):
    variables_to_drop = [var for var in dataset.variables if var not in variables_to_keep]
    return dataset.drop_vars(variables_to_drop)

# Apply the function to each dataset
ATL15_A1, ATL15_A2, ATL15_A3, ATL15_A4 = [drop_unwanted_variables(ds) for ds in datasets]

In [None]:
# Use xarray concatenation to stitch two quadrants togethers
# Use xarray index selecting to occlude the duplicated x=0 vector of data
ATL15_A12 = xr.concat([ATL15_A2.isel(x=slice(0,-1)), ATL15_A1], dim="x")

In [45]:
# Use xarray concatenation to stitch two quadrants togethers
# Use xarray index selecting to occlude the duplicated x=0 vector of data
ATL15_A34 = xr.concat([ATL15_A3.isel(x=slice(0,-1)), ATL15_A4], dim='x')

In [46]:
# Use xarray concatenation to stitch two quadrants togethers
# Use xarray index selecting to occlude the duplicated x=0 vector of data
ATL15_dh = xr.concat([ATL15_A34.isel(y=slice(0,-1)), ATL15_A12], dim='y')

In [47]:
# Delete variables to reduce memory consumption
del ATL15_A1, ATL15_A12, ATL15_A2, ATL15_A3, ATL15_A34, ATL15_A4

In [48]:
# Add datasets attributes
ATL15_dh.attrs['identifier_product_DOI'] = '10.5067/ATLAS/ATL15.003'
ATL15_dh.attrs['shortName'] = 'ATL15'

## Import the Smith and others, 2017, TC method CryoSat-2 SARIn gridded DEM and dh data

In [2]:
# Import Smith and others, 2017, TC method CryoSat-2 SARIn DEM data (closed source aquired from Ben Smith)
CS2_DEM = xr.open_dataset(DATA_DIR + '/altimetry/CryoSat2/CS2_SARIn_Smith2017method/mos_z0_2016.0.nc')
CS2_DEM

In [3]:
# Contstruct DEM using z0, SMB, and FAC
# The DEM (z0) is corrected for SMB_a and FAC: 
# To get the actual surface elevation, add SMB_a+FAC.  
# To get the FAC-free elevation, just add SMB_a
CS2_DEM = CS2_DEM.assign(DEM = CS2_DEM['z0'] + CS2_DEM['SMB_a'] + CS2_DEM['FAC'])
CS2_DEM

In [4]:
# Import Smith and others, 2017, TC method CryoSat-2 SARIn delta height data (closed source aquired from Ben Smith)
CS2_dh = xr.open_dataset(DATA_DIR + '/altimetry/CryoSat2/CS2_SARIn_Smith2017method/mos_2010.5_2021.5.nc')
CS2_dh

In [5]:
# Calculate the zoom factors for each dimension
zoom_factor_y = CS2_dh.delta_h.shape[1] / CS2_DEM.DEM.shape[0]
zoom_factor_x = CS2_dh.delta_h.shape[2] / CS2_DEM.DEM.shape[1]

# Resample the dataset
CS2_DEM_agg = zoom(CS2_DEM.DEM, (zoom_factor_y, zoom_factor_x))
# Interpolation: The zoom function by default uses spline interpolation of order 3. 
# You can change the order depending on your requirements (e.g., order=0 for nearest-neighbor, order=1 for bilinear).

In [6]:
# Assign actual heights by combing dh's with the 2016 DEM
CS2_dh = CS2_dh.assign(h = CS2_dh['delta_h'] + CS2_DEM_agg)

# Add a 'description' attribute to the 'h' data variable
CS2_dh['h'].attrs['description'] = 'absolute heights (relative to what?)'

CS2_dh

In [7]:
del CS2_DEM_agg

In [8]:
# Delete delta_h data variable so we can replace with delta_h relative to ATL14 DEM
CS2_dh = CS2_dh.drop_vars('delta_h')

In [23]:
# Assign delta heights differencing absolute heights with the ATL14 DEM
CS2_dh = CS2_dh.assign(delta_h = CS2_dh['h'] - ATL14['h'])

# Add a 'description' attribute to the 'h' data variable
CS2_dh['delta_h'].attrs['description'] = 'Height change relative to the ATL14 datum (Jan 1, 2020) surface'

CS2_dh

In [24]:
# Create lists to store data
cyc_dates = []

# Change time to match ICESat-2 time format
for idx in range(len(CS2_dh['time'])):
    # Smith and others, 2017 method CryoSat-2 SARIn data
    cyc_date = datetime.datetime(int(CS2_dh['time'].values[idx]), 1, 1) + datetime.timedelta(days = (CS2_dh['time'].values[idx] % 1) * 365.25)
    cyc_date_np_dt = np.datetime64(cyc_date)  # Convert to numpy.datetime64 format
    cyc_dates += [cyc_date_np_dt]
CS2_dh = CS2_dh.assign_coords(time=cyc_dates)

# Add a 'description' attribute to the 'time' data variable
CS2_dh.coords['time'].attrs['description'] = 'Time for each node'

# Delete unneeded list
del cyc_dates

CS2_dh

In [33]:
# Remove time slices that occur during the ICESat-2 era that will not be used to conserve memory when loaded in the future
# Subset the dataset for a specific time range
start_date = np.datetime64('2010-07-02T15:00:00.000000000')
# end_date = np.datetime64('2018-07-02T15:00:00.000000000')  # No temporal overlap
end_date = np.datetime64('2018-10-01T22:30:00.000000000')  # Includes one quarter of overlapping data allow for cyc start/end accounting

CS2_dh_subset = CS2_dh.sel(time=slice(start_date, end_date))

CS2_dh_subset

In [None]:
CS2_dh_subset.to_netcdf(DATA_DIR + '/altimetry/CryoSat2/CS2_SARIn_Smith2017method/mos_2010.5_2019.0_relative_to_ATL14.nc')

# Analysis

In [57]:
# Import Smith and others, 2017, TC method CryoSat-2 SARIn height and dheight data (closed source aquired from Ben Smith)
CS2_Smith2017 = xr.open_dataset(DATA_DIR + '/altimetry/CryoSat2/CS2_SARIn_Smith2017method/mos_2010.5_2021.5_relative_to_ATL14.nc')
CS2_Smith2017

In [None]:
# TODO
# change Cryo-TEMPO-EOLIS data type to be icesat2's:
# midcyc_date_np_dt = np.datetime64(midcyc_date)  # Convert to numpy.datetime64 format

# Find start, end, and mid-cycle dates of combined CryoSat-2 and ICESat-2 data

# Define datasets
dataset1 = CS2_dh
dataset2 = ATL14_dh

# Create empty lists to store data
cyc_start_dates = []
cyc_end_dates = []
midcyc_dates = []

for idx in range(len(dataset1.delta_h[:33])):
    # Smith and others, 2017 method CryoSat-2 SARIn data
    if dataset1.identifier_product_DOI == 'doi:10.5194/tc-11-451-2017':
        cyc_start_date = datetime.datetime(int(dataset1.time.values[idx]), 1, 1) + datetime.timedelta(days = (dataset1.time.values[idx] % 1) * 365.25)
        cyc_end_date = datetime.datetime(int(dataset1.time.values[idx+1]), 1, 1) + datetime.timedelta(days = (dataset1.time.values[idx+1] % 1) * 365.25)
        midcyc_days = cyc_end_date - cyc_start_date
        midcyc_date = cyc_start_date + midcyc_days/2
        midcyc_date_np_dt = np.datetime64(midcyc_date)  # Convert to numpy.datetime64 format
        cyc_start_dates += [cyc_start_date]
        cyc_end_dates += [cyc_end_date]
        midcyc_dates += [midcyc_date_np_dt]
    # Cryo-TEMPO-EOLIS Swath Thematic Gridded Product 
    elif dataset1.Title == 'Land Ice Elevation Thematic Gridded Product':
        date_time_str = '70-01-01'
        date_time_obj = datetime.datetime.strptime(date_time_str, '%y-%m-%d')
        cyc_start_date = date_time_obj + datetime.timedelta(seconds = ds_sub.time.values[idx].astype(float))
        cyc_end_date = date_time_obj + datetime.timedelta(seconds = ds_sub.time.values[idx+1].astype(float))
        midcyc_days = cyc_end_date - cyc_start_date
        midcyc_date = cyc_start_date + midcyc_days/2
        cyc_start_dates += [cyc_start_date]
        cyc_end_dates += [cyc_end_date]
        midcyc_dates += [midcyc_date]
for idx in range(len(dataset2.delta_h)-1):
    # ICESat-2 ATL14 r003
    # Why using ATL14 and not ATL15
    if dataset2.identifier_product_DOI == '10.5067/ATLAS/ATL14.003':    
        cyc_start_date = dataset2.time.values[idx]
        cyc_end_date = dataset2.time.values[idx+1]
        midcyc_days = cyc_end_date - cyc_start_date
        midcyc_date = cyc_start_date + midcyc_days/2
        # midcyc_date = pd.to_datetime(midcyc_date)
        cyc_start_dates += [cyc_start_date]
        cyc_end_dates += [cyc_end_date]
        midcyc_dates += [midcyc_date]
    
midcyc_dates

In [None]:
import numpy as np
import datetime

# Assuming CS2_dh['time'] is a numpy array or a pandas Series.
times = CS2_dh['time'].values

# Calculate the start and end dates without using a loop
years = np.floor(times).astype(int)
fractional_years = (times % 1) * 365.25

start_dates = [datetime.datetime(year, 1, 1) + datetime.timedelta(days=days) 
               for year, days in zip(years, fractional_years)]

end_dates = [datetime.datetime(year, 1, 1) + datetime.timedelta(days=days) 
             for year, days in zip(years[1:], fractional_years[1:])]

# Calculate the mid-cycle dates
midcyc_dates = [(start + (end - start) / 2) for start, end in zip(start_dates, end_dates)]
midcyc_dates_np_dt = np.array(midcyc_dates, dtype='datetime64')

# If cyc_start_dates, cyc_end_dates, and midcyc_dates are meant to be lists
cyc_start_dates = start_dates[:-1]  # Exclude the last element which has no corresponding end date
cyc_end_dates = end_dates
midcyc_dates = midcyc_dates_np_dt