### Compute and store filtered and demodulated velocity fields at each grid point

In [1]:
import numpy as np
import geopandas as gpd

import xarray as xr
from matplotlib import pyplot as plt
%matplotlib inline

from xhistogram.xarray import histogram
import dask.dataframe as dd
#import cartopy.crs as ccrs
#import cartopy.feature as cfeature

import mitequinox.utils as ut
from mitequinox.plot import *
import mitequinox.parcels as pa
from xmitgcm import llcreader

from scipy import signal
import scipy.ndimage as im

from sympy import Symbol, pi, atan, factor, lambdify


import mitequinox.plot as pl
import mitequinox.sigp as sp

from fsspec.implementations.local import LocalFileSystem

In [2]:
from dask.distributed import Client, LocalCluster
#
#cluster = LocalCluster()
#
from dask_jobqueue import PBSCluster
cluster = PBSCluster(processes=1, cores=1,walltime='04:00:00') #processes=7, cores=7
w = cluster.scale(jobs=10
                 )
client = Client(cluster)

  from distributed.utils import tmpfile


In [3]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.148.1.31:8787/status,

0,1
Dashboard: http://10.148.1.31:8787/status,Workers: 7
Total threads: 7,Total memory: 782.32 GiB

0,1
Comm: tcp://10.148.1.31:48453,Workers: 7
Dashboard: http://10.148.1.31:8787/status,Total threads: 7
Started: Just now,Total memory: 782.32 GiB

0,1
Comm: tcp://10.148.0.68:54379,Total threads: 1
Dashboard: http://10.148.0.68:42689/status,Memory: 111.76 GiB
Nanny: tcp://10.148.0.68:59535,
Local directory: /dev/shm/pbs.9153178.datarmor0/dask-worker-space/worker-buwn9i8s,Local directory: /dev/shm/pbs.9153178.datarmor0/dask-worker-space/worker-buwn9i8s

0,1
Comm: tcp://10.148.0.67:52263,Total threads: 1
Dashboard: http://10.148.0.67:56161/status,Memory: 111.76 GiB
Nanny: tcp://10.148.0.67:40656,
Local directory: /dev/shm/pbs.9153172.datarmor0/dask-worker-space/worker-ktr7jheq,Local directory: /dev/shm/pbs.9153172.datarmor0/dask-worker-space/worker-ktr7jheq

0,1
Comm: tcp://10.148.0.64:45708,Total threads: 1
Dashboard: http://10.148.0.64:50072/status,Memory: 111.76 GiB
Nanny: tcp://10.148.0.64:41778,
Local directory: /dev/shm/pbs.9153176.datarmor0/dask-worker-space/worker-7whg3pxd,Local directory: /dev/shm/pbs.9153176.datarmor0/dask-worker-space/worker-7whg3pxd

0,1
Comm: tcp://10.148.1.30:33374,Total threads: 1
Dashboard: http://10.148.1.30:44044/status,Memory: 111.76 GiB
Nanny: tcp://10.148.1.30:58120,
Local directory: /dev/shm/pbs.9153181.datarmor0/dask-worker-space/worker-leqxfvq5,Local directory: /dev/shm/pbs.9153181.datarmor0/dask-worker-space/worker-leqxfvq5

0,1
Comm: tcp://10.148.1.24:40298,Total threads: 1
Dashboard: http://10.148.1.24:53515/status,Memory: 111.76 GiB
Nanny: tcp://10.148.1.24:33431,
Local directory: /dev/shm/pbs.9153180.datarmor0/dask-worker-space/worker-dfim71jn,Local directory: /dev/shm/pbs.9153180.datarmor0/dask-worker-space/worker-dfim71jn

0,1
Comm: tcp://10.148.0.66:41583,Total threads: 1
Dashboard: http://10.148.0.66:51112/status,Memory: 111.76 GiB
Nanny: tcp://10.148.0.66:54388,
Local directory: /dev/shm/pbs.9153177.datarmor0/dask-worker-space/worker-v9iq6bew,Local directory: /dev/shm/pbs.9153177.datarmor0/dask-worker-space/worker-v9iq6bew

0,1
Comm: tcp://10.148.0.60:54454,Total threads: 1
Dashboard: http://10.148.0.60:46611/status,Memory: 111.76 GiB
Nanny: tcp://10.148.0.60:41321,
Local directory: /dev/shm/pbs.9153174.datarmor0/dask-worker-space/worker-o9bs2wdg,Local directory: /dev/shm/pbs.9153174.datarmor0/dask-worker-space/worker-o9bs2wdg


In [4]:
def convolve(x, h=None, hilbert=False):
    """ Convolve an input signal with a kernel
    Optionaly compute the Hilbert transform of the resulting time series
    
    Parameters
    x : input signal
    h : filter 
    hilbert : True for Hilbert transform to be applied to the filtered signal
    
    Returns
    x_f : filtered signal or hilbert transform of the filtered signal
    """
    x_f = signal.filtfilt(h, [1], x, axis=-1,padlen=0)#
    if hilbert:
        return signal.hilbert(x_f)
    else:
        return x_f

def filt(v, h, hilbert=False):
    
    output_dtype = complex if hilbert else float
    gufunc_kwargs = dict(output_sizes={'time': len(v.time)})
    return xr.apply_ufunc(convolve, v, kwargs={'h': h, 'hilbert': hilbert},
                    dask='parallelized', output_dtypes=[output_dtype],
                    input_core_dims=[['time']],
                    output_core_dims=[['time']],
                    dask_gufunc_kwargs = gufunc_kwargs,
                         )

##### First step : Filter and demodulate for each face

In [5]:
# Load Eulerian fields
ds =  xr.open_zarr(ut.work_data_dir+'rechunked/SSU_rot.zarr',consolidated=False)
grd = ut.load_grd(consolidated=False)[['XC', 'YC', 'Depth']]#.persist()

In [5]:
#isel=dict(face=12) #face
#V = ['SSV_rot']#'zonal_velocity','meridional_velocity'
path = '/home1/datawork/zcaspar/mit4320/filtered_itide/'
dsel = 200

In [6]:
dt = 1/24 # time step in days

tidal_omega = sp.get_tidal_frequencies("M2", "K2","S2","N2")
omega_M2,omega_S2,omega_N2,omega_K2, domega, name = tidal_omega["M2"],tidal_omega["S2"],tidal_omega["N2"],tidal_omega["K2"], .2, "semidiurnal"
omega = (omega_M2+omega_S2)/2#center frequency
Tw = 30 #filter length
dband = 0.2 # half-bandwidth
V = ['SSU_rot']

In [8]:
#dt = 1. # in hours
#T = 20
#omega = 1/30
#h = signal.firwin(T*24, cutoff=[omega], pass_zero=True, nyq=1./2/dt, scale=True)
#V = 'SSU_rot'

In [7]:
import dask
def wrap_filter(_ds,dt=dt):
    h = sp.generate_filter(om, T=Tw, dt=dt, bandwidth=dband, normalized_bandwidth=None)
    time = np.arange(0,_ds['time'].values.size*dt,dt)
    exp = np.exp(-1j*om*2*np.pi*time)
#    _ds = ut._reset_chunk_encoding(_ds)
    _ds = _ds.chunk(dict(time=-1,i=100,j=100))
    ds_hat={}
    for v in V:
        ds_hat[v+'_hat'] = filt(_ds[v], h,hilbert=False)#.persist()
        ds_hat[v+'_hat_real'] = ds_hat[v+'_hat'].real
        ds_hat[v+'_hat_imag'] = ds_hat[v+'_hat'].imag
#        ds_hat[v+'_hat'] = ds_hat[v+'_hat'].assign_coords({'lon':ds_hat[v+'_hat'].lon,'lat':ds_hat[v+'_hat'].lat})
#        ds_hat[v+'_demodulated'] = ds_hat[v+'_hat']*exp
#        ds_hat[v+'_demodulated_real'] = ds_hat[v+'_demodulated'].real
#        ds_hat[v+'_demodulated_imag'] = ds_hat[v+'_demodulated'].imag
    ds_hat = xr.merge([ds_hat[v].rename(v) for v in ds_hat.keys()])#.drop([v+'_hat',v+'_demodulated'])
#    ds_hat = ds_hat.chunk(dict(i=100)) #,j=4320//4
    ds_hat = ut._reset_chunk_encoding(ds_hat)
    return ds_hat#.drop(['XC','YC'])

def wrap_filter_low(_ds,dt=dt):
    h = signal.firwin(T*24, cutoff=[omega], pass_zero=True, nyq=1./2/dt, scale=True)
    ds_hat = filt(_ds[v], h,hilbert=False)#.persist()
    ds_hat = ds_hat.rename(v+'_hat').to_dataset()
#    ds_hat = ds_hat.chunk(dict(i=4320//4,j=4320//4))
#    ds_hat = ut._reset_chunk_encoding(ds_hat)
    return ds_hat

In [24]:
def wrap_demodulation(ds_hat,dt=dt):
#    time = np.arange(0,ds_hat['time'].values.size*dt,dt)
    exp = np.exp(-1j*om*2*np.pi*time)
#    _ds = ut._reset_chunk_encoding(_ds)
#    _ds = _ds.chunk(dict(time=16,i=100,j=100))
#    ds_hat={}
    for v in V:
#        ds_hat[v+'_hat'] = ds_hat[v+'_hat'].assign_coords({'lon':ds_hat[v+'_hat'].lon,'lat':ds_hat[v+'_hat'].lat})
        ds_hat[v+'_demodulated'] = ds_hat[v+'_hat']*exp
        ds_hat[v+'_demodulated_real'] = ds_hat[v+'_demodulated'].real
        ds_hat[v+'_demodulated_imag'] = ds_hat[v+'_demodulated'].imag
    ds_hat = xr.merge([ds_hat[v].rename(v) for v in ds_hat.keys()]).drop([v+'_demodulated',v+'_hat'])
 #   ds_hat = ds_hat.chunk(dict(i=100)) #,j=4320//4
    
    return ds_hat#.drop(['XC','YC'])

In [31]:
#Select and rechunk (empirical) original dataset
isel = dict(j=slice(2000,3000))#j=slice(0,4320//10))
dsp = ds.isel(**isel)#.isel(i=slice(4,None,4),j=slice(4, None,4))#.isel(face=0)#.sel(**sel)#.persist()
dsp = dsp.chunk({"time": -1,'i':100})#.persist()#,'i':100,'j':70
dsp

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 214.23 MiB 328.12 kiB Shape (13, 1000, 4320) (1, 840, 100) Count 2679 Tasks 1144 Chunks Type float32 numpy.ndarray",4320  1000  13,

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 214.23 MiB 328.12 kiB Shape (13, 1000, 4320) (1, 840, 100) Count 2679 Tasks 1144 Chunks Type float32 numpy.ndarray",4320  1000  13,

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 214.23 MiB 328.12 kiB Shape (13, 1000, 4320) (1, 840, 100) Count 2679 Tasks 1144 Chunks Type float32 numpy.ndarray",4320  1000  13,

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.77 TiB,632.81 MiB
Shape,"(8640, 13, 1000, 4320)","(8640, 1, 192, 100)"
Count,139621 Tasks,3432 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.77 TiB 632.81 MiB Shape (8640, 13, 1000, 4320) (8640, 1, 192, 100) Count 139621 Tasks 3432 Chunks Type float32 numpy.ndarray",8640  1  4320  1000  13,

Unnamed: 0,Array,Chunk
Bytes,1.77 TiB,632.81 MiB
Shape,"(8640, 13, 1000, 4320)","(8640, 1, 192, 100)"
Count,139621 Tasks,3432 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 214.23 MiB 328.12 kiB Shape (13, 1000, 4320) (1, 840, 100) Count 2679 Tasks 1144 Chunks Type float32 numpy.ndarray",4320  1000  13,

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 214.23 MiB 328.12 kiB Shape (13, 1000, 4320) (1, 840, 100) Count 2679 Tasks 1144 Chunks Type float32 numpy.ndarray",4320  1000  13,

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 214.23 MiB 328.12 kiB Shape (13, 1000, 4320) (1, 840, 100) Count 2679 Tasks 1144 Chunks Type float32 numpy.ndarray",4320  1000  13,

Unnamed: 0,Array,Chunk
Bytes,214.23 MiB,328.12 kiB
Shape,"(13, 1000, 4320)","(1, 840, 100)"
Count,2679 Tasks,1144 Chunks
Type,float32,numpy.ndarray


In [25]:
dsp =  xr.open_zarr(ut.work_data_dir+'filtered_itide/SSU_filtered_2000.zarr',consolidated=False).isel(face=slice(1,None))
dsp

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,11881 Tasks,5720 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.53 TiB 659.18 MiB Shape (13, 1000, 4320, 8640) (1, 100, 100, 8640) Count 11881 Tasks 5720 Chunks Type float64 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,11881 Tasks,5720 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,11881 Tasks,5720 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.53 TiB 659.18 MiB Shape (13, 1000, 4320, 8640) (1, 100, 100, 8640) Count 11881 Tasks 5720 Chunks Type float64 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,11881 Tasks,5720 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,11881 Tasks,5720 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.53 TiB 659.18 MiB Shape (13, 1000, 4320, 8640) (1, 100, 100, 8640) Count 11881 Tasks 5720 Chunks Type float64 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,11881 Tasks,5720 Chunks
Type,float64,numpy.ndarray


In [26]:
time = np.arange(0,dsp['time'].values.size*dt,dt)

In [23]:
dsp.isel(face=0)[V[0]+'_hat']

Unnamed: 0,Array,Chunk
Bytes,278.09 GiB,659.18 MiB
Shape,"(1000, 4320, 8640)","(100, 100, 8640)"
Count,12321 Tasks,440 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 278.09 GiB 659.18 MiB Shape (1000, 4320, 8640) (100, 100, 8640) Count 12321 Tasks 440 Chunks Type float64 numpy.ndarray",8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,278.09 GiB,659.18 MiB
Shape,"(1000, 4320, 8640)","(100, 100, 8640)"
Count,12321 Tasks,440 Chunks
Type,float64,numpy.ndarray


In [27]:
om,Tw,dband,V = omega,Tw,dband,V #define parameters needed in wrap_filter

In [28]:
# Apply wrap_filter on the dataset and store the result for each face (seperated)
import os
#zarr_main = os.path.join(ut.root_data_dir, "filtered_itide/SSU_filtered_face4.zarr")
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ds_out, zarr = ut.custom_distribute(dsp, 
                                        wrap_demodulation,
                                        overwrite=True,
                                        suffix="SSU_demodulated_2000.zarr",
                                        tmp_dir=os.path.join(ut.root_data_dir, "filtered_itide"),
                                        append=True,
                                        face=1
#                                        j=200
                                       )

13it [1:14:05, 341.96s/it]


In [38]:
ds_out

Unnamed: 0,Array,Chunk
Bytes,2.35 TiB,26.37 MiB
Shape,"(4320, 4320, 8640)","(100, 1080, 16)"
Count,95041 Tasks,95040 Chunks
Type,complex128,numpy.ndarray
"Array Chunk Bytes 2.35 TiB 26.37 MiB Shape (4320, 4320, 8640) (100, 1080, 16) Count 95041 Tasks 95040 Chunks Type complex128 numpy.ndarray",8640  4320  4320,

Unnamed: 0,Array,Chunk
Bytes,2.35 TiB,26.37 MiB
Shape,"(4320, 4320, 8640)","(100, 1080, 16)"
Count,95041 Tasks,95040 Chunks
Type,complex128,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,2.35 TiB,26.37 MiB
Shape,"(4320, 4320, 8640)","(100, 1080, 16)"
Count,95041 Tasks,95040 Chunks
Type,complex128,numpy.ndarray
"Array Chunk Bytes 2.35 TiB 26.37 MiB Shape (4320, 4320, 8640) (100, 1080, 16) Count 95041 Tasks 95040 Chunks Type complex128 numpy.ndarray",8640  4320  4320,

Unnamed: 0,Array,Chunk
Bytes,2.35 TiB,26.37 MiB
Shape,"(4320, 4320, 8640)","(100, 1080, 16)"
Count,95041 Tasks,95040 Chunks
Type,complex128,numpy.ndarray


In [29]:
ds_out

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,5721 Tasks,5720 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.53 TiB 659.18 MiB Shape (13, 1000, 4320, 8640) (1, 100, 100, 8640) Count 5721 Tasks 5720 Chunks Type float64 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,5721 Tasks,5720 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,5721 Tasks,5720 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.53 TiB 659.18 MiB Shape (13, 1000, 4320, 8640) (1, 100, 100, 8640) Count 5721 Tasks 5720 Chunks Type float64 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,5721 Tasks,5720 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,5721 Tasks,5720 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.53 TiB 659.18 MiB Shape (13, 1000, 4320, 8640) (1, 100, 100, 8640) Count 5721 Tasks 5720 Chunks Type float64 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,5721 Tasks,5720 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,5721 Tasks,5720 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.53 TiB 659.18 MiB Shape (13, 1000, 4320, 8640) (1, 100, 100, 8640) Count 5721 Tasks 5720 Chunks Type float64 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,659.18 MiB
Shape,"(13, 1000, 4320, 8640)","(1, 100, 100, 8640)"
Count,5721 Tasks,5720 Chunks
Type,float64,numpy.ndarray


In [30]:
#client.restart()
cluster.close()

In [24]:
ds_hat = xr.open_zarr(ut.work_data_dir+'filtered_itide/SSU_filtered.zarr',consolidated=False)

In [25]:
ds_hat['SSU_rot_hat'] = ds_hat['SSU_rot_hat_real']+1j*ds_hat['SSU_rot_hat_imag']

In [26]:
ds_hat

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,1.24 GiB
Shape,"(13, 1000, 4320, 8640)","(1, 192, 100, 8640)"
Count,3433 Tasks,3432 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.53 TiB 1.24 GiB Shape (13, 1000, 4320, 8640) (1, 192, 100, 8640) Count 3433 Tasks 3432 Chunks Type float64 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,1.24 GiB
Shape,"(13, 1000, 4320, 8640)","(1, 192, 100, 8640)"
Count,3433 Tasks,3432 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,1.24 GiB
Shape,"(13, 1000, 4320, 8640)","(1, 192, 100, 8640)"
Count,3433 Tasks,3432 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 3.53 TiB 1.24 GiB Shape (13, 1000, 4320, 8640) (1, 192, 100, 8640) Count 3433 Tasks 3432 Chunks Type float64 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,3.53 TiB,1.24 GiB
Shape,"(13, 1000, 4320, 8640)","(1, 192, 100, 8640)"
Count,3433 Tasks,3432 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.06 TiB,2.47 GiB
Shape,"(13, 1000, 4320, 8640)","(1, 192, 100, 8640)"
Count,13730 Tasks,3432 Chunks
Type,complex128,numpy.ndarray
"Array Chunk Bytes 7.06 TiB 2.47 GiB Shape (13, 1000, 4320, 8640) (1, 192, 100, 8640) Count 13730 Tasks 3432 Chunks Type complex128 numpy.ndarray",13  1  8640  4320  1000,

Unnamed: 0,Array,Chunk
Bytes,7.06 TiB,2.47 GiB
Shape,"(13, 1000, 4320, 8640)","(1, 192, 100, 8640)"
Count,13730 Tasks,3432 Chunks
Type,complex128,numpy.ndarray


In [27]:
import os
#zarr_main = os.path.join(ut.root_data_dir, "filtered_itide/SSU_filtered_face4.zarr")
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ds_out, zarr = ut.custom_distribute(ds_hat, 
                                        wrap_demodulation,
                                        overwrite=True,
                                        suffix="SSU_demodulated.zarr",
                                        tmp_dir=os.path.join(ut.root_data_dir, "filtered_itide"),
                                        append=True,
#                                        face=1
                                        j=200
                                       )

0it [00:00, ?it/s]


KeyError: 'SSU_rot_hat'

##### Second step : Concat filtered and demodulated fields for each face

In [2]:
from dask.distributed import Client, LocalCluster
#
#cluster = LocalCluster()

#
from dask_jobqueue import PBSCluster
cluster = PBSCluster(processes=2, cores=2, walltime='04:00:00') #processes=7, cores=7 ,walltime='03:00:00'
w = cluster.scale(jobs=13
                 )
client = Client(cluster)

  from distributed.utils import tmpfile


In [3]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.PBSCluster
Dashboard: http://10.148.0.246:8787/status,

0,1
Dashboard: http://10.148.0.246:8787/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://10.148.0.246:43985,Workers: 0
Dashboard: http://10.148.0.246:8787/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [3]:
ds0 = xr.open_zarr(ut.work_data_dir+'filtered_itide/SSU_filtered_0.zarr').sel(face=0)
ds1 = xr.open_zarr(ut.work_data_dir+'filtered_itide/SSU_filtered_1.zarr').sel(face=0)

In [7]:
xr.combine_by_coords([ds0,ds1])

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the

ValueError: Resulting object does not have monotonic global indexes along dimension i

In [None]:
import dask
#with dask.config.set(**{'array.slicing.split_large_chunks': False}):
ds = xr.merge([ds0,ds1])

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': Fa

In [None]:
ds

In [27]:
from dask.distributed import wait
path = '/home/datawork-lops-osi/equinox/mit4320/filtered_itide/'
ds = wait(xr.merge([xr.open_zarr(ut.work_data_dir+'filtered_itide/SSU_filtered_%s.zarr'%i) for i in np.arange(0,4)],compat='no_conflicts'))
#ds = xr.concat([xr.open_zarr(ut.work_data_dir+'filtered_itide/SSV_filtered_face%s.zarr'%i
#                           ) for i in np.arange(11,12)],dim='face')#.persist()
#ds = xr.concat([xr.open_zarr(ut.work_data_dir+'filtered_itide/SSV_filtered_%s.zarr'%i
#                           ) for i in np.arange(0,5)],dim='face')
#ds = xr.concat([xr.open_zarr(ut.work_data_dir+'filtered_itide/SSU_filtered_%s.zarr'%i
#                           ) for i in ['00','01']],dim='face')

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
  result = blockwise(
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
  result = blockwise(
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  value = value[(slice(None),) * axis + (subkey,)]
  result = blockwise(
    

Traceback (most recent call last):
  File "/home1/datahome/zcaspar/miniconda3/envs/croco/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3457, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/dev/shm/pbs.8931328.datarmor0/ipykernel_7470/147414552.py", line 3, in <module>
    ds = wait(xr.merge([xr.open_zarr(ut.work_data_dir+'filtered_itide/SSU_filtered_%s.zarr'%i) for i in np.arange(0,4)],compat='no_conflicts'))
  File "/home1/datahome/zcaspar/miniconda3/envs/croco/lib/python3.8/site-packages/xarray/core/merge.py", line 905, in merge
    merge_result = merge_core(
  File "/home1/datahome/zcaspar/miniconda3/envs/croco/lib/python3.8/site-packages/xarray/core/merge.py", line 640, in merge_core
    variables, out_indexes = merge_collected(
  File "/home1/datahome/zcaspar/miniconda3/envs/croco/lib/python3.8/site-packages/xarray/core/merge.py", line 242, in merge_collected
    merged_vars[name] = unique_variable(name, variables, compat)
  File "/

TypeError: object of type 'NoneType' has no len()

In [22]:
def concat(ds):
#    _ds = xr.concat([xr.open_zarr(ut.work_data_dir+'filtered_itide/SSV_filtered_%s.zarr'%i
#                           ) for i in np.arange(0,5)],dim='face')
#    _ds = ds#xr.concat(ds,dim='face')
#    l = [ds]
#    l.append(xr.open_zarr(ut.work_data_dir+'filtered_itide/SSU_filtered_%s.zarr'%i) for i in np.arange(1,4))
#    _ds = xr.merge([xr.open_zarr(ut.work_data_dir+'filtered_itide/SSU_filtered_%s.zarr'%i) for i in np.arange(0,4)],compat='no_conflicts')#
    ds['SSU_rot_demodulated_real'] = ds['SSU_rot_demodulated'].real
    ds['SSU_rot_demodulated_imag'] = ds['SSU_rot_demodulated'].imag
    ds['SSU_rot_hat_real'] = ds['SSU_rot_hat'].real
    ds['SSU_rot_hat_imag'] = ds['SSU_rot_hat'].imag
    _ds = ds.drop(['SSU_rot_demodulated','SSU_rot_hat'])
#    _ds = ds.chunk({'i':-1,'j':-1})
#    _ds = ds.chunk(dict(time=16,i=4320//4,j=4320//4)) #,j=4320//4
    _ds = ut._reset_chunk_encoding(_ds)
    return _ds

In [5]:
def merge(ds):
#    _ds = ds.chunk({'i':-1,'j':-1})
    _ds = ds.chunk(dict(time=16,i=4320//4,j=4320//4)) #,j=4320//4
    _ds = ut._reset_chunk_encoding(_ds)
    return _ds

In [6]:
ds0

Unnamed: 0,Array,Chunk
Bytes,1.91 TiB,911.25 MiB
Shape,"(13, 1080, 1080, 8640)","(1, 48, 144, 8640)"
Count,2393 Tasks,2392 Chunks
Type,complex128,numpy.ndarray
"Array Chunk Bytes 1.91 TiB 911.25 MiB Shape (13, 1080, 1080, 8640) (1, 48, 144, 8640) Count 2393 Tasks 2392 Chunks Type complex128 numpy.ndarray",13  1  8640  1080  1080,

Unnamed: 0,Array,Chunk
Bytes,1.91 TiB,911.25 MiB
Shape,"(13, 1080, 1080, 8640)","(1, 48, 144, 8640)"
Count,2393 Tasks,2392 Chunks
Type,complex128,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.91 TiB,911.25 MiB
Shape,"(13, 1080, 1080, 8640)","(1, 48, 144, 8640)"
Count,2393 Tasks,2392 Chunks
Type,complex128,numpy.ndarray
"Array Chunk Bytes 1.91 TiB 911.25 MiB Shape (13, 1080, 1080, 8640) (1, 48, 144, 8640) Count 2393 Tasks 2392 Chunks Type complex128 numpy.ndarray",13  1  8640  1080  1080,

Unnamed: 0,Array,Chunk
Bytes,1.91 TiB,911.25 MiB
Shape,"(13, 1080, 1080, 8640)","(1, 48, 144, 8640)"
Count,2393 Tasks,2392 Chunks
Type,complex128,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.95 TiB,455.62 MiB
Shape,"(13, 1080, 1080, 8640)","(1, 48, 144, 8640)"
Count,2393 Tasks,2392 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.95 TiB 455.62 MiB Shape (13, 1080, 1080, 8640) (1, 48, 144, 8640) Count 2393 Tasks 2392 Chunks Type float64 numpy.ndarray",13  1  8640  1080  1080,

Unnamed: 0,Array,Chunk
Bytes,0.95 TiB,455.62 MiB
Shape,"(13, 1080, 1080, 8640)","(1, 48, 144, 8640)"
Count,2393 Tasks,2392 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.95 TiB,455.62 MiB
Shape,"(13, 1080, 1080, 8640)","(1, 48, 144, 8640)"
Count,2393 Tasks,2392 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 0.95 TiB 455.62 MiB Shape (13, 1080, 1080, 8640) (1, 48, 144, 8640) Count 2393 Tasks 2392 Chunks Type float64 numpy.ndarray",13  1  8640  1080  1080,

Unnamed: 0,Array,Chunk
Bytes,0.95 TiB,455.62 MiB
Shape,"(13, 1080, 1080, 8640)","(1, 48, 144, 8640)"
Count,2393 Tasks,2392 Chunks
Type,float64,numpy.ndarray


In [9]:
#Apply concat function on ds
face = 1
import os, dask
with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ds_out, zarr = ut.custom_distribute(ds1,#xr.merge([ds0,ds1]), 
                                        merge,
                                        overwrite=True,
                                        suffix="SSU_filtered_1.zarr",
                                        tmp_dir=os.path.join(ut.root_data_dir, "filtered_itide"),
                                        append=True,
                                        face=face
#                                        j=20
                                       )

13it [43:45, 201.93s/it]


In [7]:
ds_out

Unnamed: 0,Array,Chunk
Bytes,7.04 TiB,18.46 MiB
Shape,"(3, 4320, 4320, 8640)","(1, 70, 1080, 16)"
Count,401761 Tasks,401760 Chunks
Type,complex128,numpy.ndarray
"Array Chunk Bytes 7.04 TiB 18.46 MiB Shape (3, 4320, 4320, 8640) (1, 70, 1080, 16) Count 401761 Tasks 401760 Chunks Type complex128 numpy.ndarray",3  1  8640  4320  4320,

Unnamed: 0,Array,Chunk
Bytes,7.04 TiB,18.46 MiB
Shape,"(3, 4320, 4320, 8640)","(1, 70, 1080, 16)"
Count,401761 Tasks,401760 Chunks
Type,complex128,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,7.04 TiB,18.46 MiB
Shape,"(3, 4320, 4320, 8640)","(1, 70, 1080, 16)"
Count,401761 Tasks,401760 Chunks
Type,complex128,numpy.ndarray
"Array Chunk Bytes 7.04 TiB 18.46 MiB Shape (3, 4320, 4320, 8640) (1, 70, 1080, 16) Count 401761 Tasks 401760 Chunks Type complex128 numpy.ndarray",3  1  8640  4320  4320,

Unnamed: 0,Array,Chunk
Bytes,7.04 TiB,18.46 MiB
Shape,"(3, 4320, 4320, 8640)","(1, 70, 1080, 16)"
Count,401761 Tasks,401760 Chunks
Type,complex128,numpy.ndarray


In [28]:
#client.restart()
cluster.close()

distributed.client - ERROR - Failed to reconnect to scheduler after 10.00 seconds, closing client
_GatheringFuture exception was never retrieved
future: <_GatheringFuture finished exception=CancelledError()>
asyncio.exceptions.CancelledError
