**Import the data from ITS_LIVE**

In [3]:

import xarray as xr
import os
import sys
from dask.diagnostics import ProgressBar
import numpy as np
import pandas as pd

# Select Threshold quality (higher the threshold, the least data has to be downloaded)
threshold = 0

sdate = '2022-01-01'
edate = '2025-04-31'
cubes = ['http://its-live-data.s3.amazonaws.com/datacubes/v02/N60W140/ITS_LIVE_vel_EPSG3413_G0120_X-3250000_Y350000.zarr',
 'http://its-live-data.s3.amazonaws.com/datacubes/v02/N50W130/ITS_LIVE_vel_EPSG3413_G0120_X-3350000_Y250000.zarr',
 'http://its-live-data.s3.amazonaws.com/datacubes/v02/N60W130/ITS_LIVE_vel_EPSG3413_G0120_X-3250000_Y250000.zarr',
 'http://its-live-data.s3.amazonaws.com/datacubes/v02/N50W140/ITS_LIVE_vel_EPSG3413_G0120_X-3350000_Y350000.zarr']

xmin =-3340199.841439601
xmax =-3266124.863581888
ymin =273869.3657119706
ymax =364857.29614944384



# Create path to the files
pathsave = 'Datacubes/Subyearly/Alaska/'
os.makedirs(pathsave, exist_ok = True)
variables_to_keep = ['v', 'date_dt', 'acquisition_date_img1', 'acquisition_date_img2']

def load_datacubes(datacube_address, sdate, edate, xmax, xmin, ymax, ymin, variables_drop):

    print('Loading cube....')
    # Grab the time values
    t = xr.open_dataset(datacube_address, engine='zarr').mid_date.values
    # Load indices of slices above the quality threshold
    valid = xr.open_dataset(datacube_address, engine='zarr').roi_valid_percentage.values

    # Create a time mask, based on the validity of layers and the custom date-range
    t_mask = np.logical_and(valid>threshold, np.logical_and(t>np.datetime64(sdate), t<np.datetime64(edate)))
                    

    # Load datacube according to prerequisites (time, space and variables)
    ds = xr.open_zarr(datacube_address,
                chunks=({'mid_date': -1, 'y': 100, 'x': 100}),
                drop_variables=variables_drop
                ).sel(  mid_date = t_mask,
                        x=slice(xmin, xmax),
                        y=slice(ymax, ymin)).drop_duplicates(dim='mid_date')
    ds.attrs={}
    return ds

variables_to_keep += ['mid_date', 'x', 'y']

# List of variables to drop for the download (we drop everything but the variables written below)
variables_drop = [ele for ele in list(
        xr.open_dataset(cubes[0], engine='zarr').variables
        ) if ele not in variables_to_keep
]

datacubes = [load_datacubes(datacube_address, sdate, edate, xmax, xmin, ymax, ymin, variables_drop).sortby(['mid_date']) for datacube_address in cubes]


Loading cube....
Loading cube....
Loading cube....


    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


Loading cube....


**Define the inverse function**

In [4]:
#This function inputs a 1D tensor, aka a pixel from the 3D ITS_LIVE tensor. It returns a smaller 1D tensor
def invert_vel(pt):

    # get indices of non_nan values in the timeseries
    non_nan_idx = np.flatnonzero(~np.isnan(pt))

    # Calculate the length once to avoid repeated calculations
    non_nan_len = len(non_nan_idx)

    # Pickup the dates that don't have NaNs
    dt_start_non_nan = dt_start[non_nan_idx]
    dt_end_non_nan = dt_end[non_nan_idx]

    # Initialize matrix
    A = np.zeros((non_nan_len, dates.shape[0]))

    # Find the indices for dt_start and dt_end in the dates array
    start_indices = np.searchsorted(dates, dt_start_non_nan)
    end_indices = np.searchsorted(dates, dt_end_non_nan, side='right') - 1

    # Fill-in the design matrix 
    for j in range(non_nan_len):
        start = start_indices[j]
        end = end_indices[j]

        if end == A.shape[1] - 2:  # If the mid_date is at the end of the array (acquisition im2 equals last mid_date)
            A[j, start:] = 1 / (1 + A.shape[1] - start)
        else:  # If the measurement is in A's bounds temporally
            A[j, start:end + 1] = 1 / (1 + end - start)

    # --------------- INVERSION --------------- #

    # Calculate the weights and norms
    M = A.shape[1]

    # Simpson rule weights
    dg = np.ones(M) * 1 / 3
    dg[1:M - 1] += 1 / 3
    dg[1:M - 2:2] += 2 / 3
    W = np.sqrt(np.diag(dg))

    # Differencing matrix
    delta = np.diag(np.ones(M)) - np.diag(np.ones(M - 1), -1)
    delta[0, 0] = 0

    # Roughness norm
    R = W @ delta

    # Expression from differencing the minimizing functional with respect to v_i
    LHS = A.T @ A + 1 / lamb * R.T @ R

    # Retrieve velocities
    po, res = nnls(LHS, A.T @ pt[non_nan_idx])
    
    return po



In [5]:

import scipy.sparse as sp
import xarray as xr
import numpy as np
from scipy.optimize import nnls
import time
from datetime import timedelta, date

# Extract the important variables from the datacube and create the template time-interval on which the inverse function will be applied to
def build_output_arrays(dataset, day_interval):
    
    # Round all time arrays to days, because it doesn't make sense to have nanoseconds
    mid_date = dataset.mid_date.values
    
    # Create the date array with the new interval dates
    dates_nonum = np.arange(mid_date[0], mid_date[-1], timedelta(days=day_interval),dtype='datetime64[ns]')

    # Convert to numerical
    dates = pd.to_numeric(dates_nonum)
    dt_start = pd.to_numeric(dataset.acquisition_date_img1.values)
    dt_end = pd.to_numeric(dataset.acquisition_date_img2.values)

    
    return dt_start, dt_end, dates, dates_nonum

In [None]:
# List to store the inverted datacubes
inverted_cubes = []

# For each datacube:
for i in range(len(datacubes)):
    # Print the statement before each datacube computation
    print(f"Computing datacube {i+1} of {len(datacubes)}")
    
    # Grab the important variables and create the inverse timeframe
    dt_start, dt_end, dates, dates_nonum = build_output_arrays(datacubes[i], 5)
    lamb = 5
    
    # Apply the function using apply_ufunc
    output_data = xr.apply_ufunc(invert_vel, datacubes[i].v,
                                 vectorize=True,
                                 input_core_dims=[['mid_date']],
                                 output_core_dims=[['time']],
                                 dask='parallelized',
                                 output_dtypes=[float],
                                 dask_gufunc_kwargs={'output_sizes': {'time': len(dates)}})
    
    # Run in parallel for each chunk the inverse function
    with ProgressBar():
        cube = output_data.compute().assign_coords(
                time=dates_nonum, dims='time').transpose(
                'time', 'y', 'x').chunk(
                    {'time':-1, 'y':100,'x':100})
        
        # Store the results in the list
        inverted_cubes.append(cube.reindex(y=cube['y'], x=cube['x'], time=cube['time']))



Computing datacube 1 of 4
[########################################] | 100% Completed | 114m 28s
Computing datacube 2 of 4
[#                                       ] | 3% Completed | 3.23 s ms

  A[j, start:end + 1] = 1 / (1 + end - start)


[########################################] | 100% Completed | 50m 3ss
Computing datacube 3 of 4
[##                                      ] | 5% Completed | 8.34 s ms

  A[j, start:end + 1] = 1 / (1 + end - start)


[########################################] | 100% Completed | 31m 1ss
Computing datacube 4 of 4
[#############                           ] | 34% Completed | 13m 3sss

  A[j, start:end + 1] = 1 / (1 + end - start)


[##########################              ] | 65% Completed | 58m 42s

In [None]:
import functools
# Combine the data arrays using `combine_first()`
combined_data = functools.reduce(lambda a, b: a.combine_first(b), inverted_cubes).chunk({'time':-1, 'y':100,'x':100})

write_job = combined_data.to_netcdf("/home/jovyan/ITS-LIVE-Downloader-Tracker/test.nc",compute=False)
with ProgressBar():
    print(f"Writing to {pathsave}")
    write_job = write_job.compute()  
    
test = xr.open_dataset(f'/home/jovyan/ITS-LIVE-Downloader-Tracker/test_{threshold}.nc')
plt.figure()
plt.imshow(np.nanmean(test.v.values, axis = 0), origin='lower')