In [1]:
import os
from glob import glob
import numpy as np
import dask
import xarray as xr
import xgcm
from xgcm.autogenerate import generate_grid_ds
from cmocean import cm

from matplotlib import pyplot as plt
%matplotlib inline

from mitequinox.utils import *

In [2]:
if True:
    from dask.distributed import Client, LocalCluster
    from dask_jobqueue import PBSCluster
    #cluster = PBSCluster()
    cluster = PBSCluster(cores=6, processes=12) # necessary?
    w = cluster.scale(jobs=6)
    # 3 needed for lagrangian lon/lat binning
    # 15 for eulerian binning
    client = Client(cluster)
    client
else:
    from dask.distributed import Client, LocalCluster
    #
    cluster = LocalCluster()
    client = Client(cluster)
    client

In [4]:
client

0,1
Client  Scheduler: tcp://10.148.0.35:39492  Dashboard: http://10.148.0.35:8787/status,Cluster  Workers: 14  Cores: 14  Memory: 223.58 GiB


________________
### Read first: 
In this notebook, some basic information (e.g., bathymetry of the model domain, record-mean/max/min mixed layer depth, snapshots of u,v,w,b) are given in section 2. 

Section 3 displays some estimates based on the model output such as Rossby number, lateral buoyancy gradient, PV, Frontogenesis function. 

Raw outputs and 1-day low-pass filtered data are used in Section 3.

_________
# 1. Read dataset



In [5]:
out_dir_zarr = '/home1/datawork/xyu/OSMOSIS_llc4320/data_rechunk/'

grid = xr.open_dataset(out_dir_zarr+'Grid.nc', decode_coords=True)
ds_U = xr.open_zarr(out_dir_zarr+'U_total_rechunk.zarr')
ds_V = xr.open_zarr(out_dir_zarr+'V_total_rechunk.zarr')
ds_W = xr.open_zarr(out_dir_zarr+'W_total_rechunk.zarr')
ds_B = xr.open_zarr(out_dir_zarr+'Buoy_total_rechunk.zarr')
ds_MLD = xr.open_zarr(out_dir_zarr+'mld_filter.zarr')
ds_Eta = xr.open_zarr(out_dir_zarr+'Surface_variable/Eta_total_rechunk.zarr')

ds = xr.merge([ds_U,ds_V,ds_W,ds_B,ds_MLD,ds_Eta,grid])
ds = generate_grid_ds(ds, {'Z':'depth'})

# define (real) time
def iters_to_date(iters, delta_t=3600.):
    t0 = datetime(2011,9,13,0)    
    ltime = delta_t * (np.array(iters))
    dtime = [t0+dateutil.relativedelta.relativedelta(seconds=t) for t in ltime]    
    return dtime

time_day = iters_to_date(np.arange(9415))

ds = ds.assign_coords(time=time_day) 
print(ds)
print('\n data size: %.1f GB' %(ds.nbytes / 1e9))

<xarray.Dataset>
Dimensions:     (depth: 52, depth_left: 52, lat: 201, lat_g: 201, lon: 177, lon_g: 177, time: 9415)
Coordinates: (12/22)
  * lat         (lat) float64 47.4 47.42 47.43 47.44 ... 49.95 49.96 49.98 49.99
  * lon_g       (lon_g) float64 -18.04 -18.02 -18.0 ... -14.42 -14.4 -14.38
  * depth       (depth) float64 0.5 1.57 2.79 4.185 ... 855.8 900.1 945.6 992.3
  * time        (time) datetime64[ns] 2011-09-13 ... 2012-10-09T06:00:00
  * lat_g       (lat_g) float64 47.4 47.41 47.42 47.44 ... 49.96 49.97 49.98
  * lon         (lon) float64 -18.03 -18.01 -17.99 ... -14.41 -14.39 -14.36
    ...          ...
    dyG         (lat, lon_g) float32 ...
    rA          (lat, lon) float32 ...
    rAz         (lat_g, lon_g) float32 ...
    rAw         (lat, lon_g) float32 ...
    rAs         (lat_g, lon) float32 ...
  * depth_left  (depth_left) float64 -0.035 1.035 2.18 ... 878.0 922.9 968.9
Data variables:
    U           (time, depth, lat, lon_g) float32 dask.array<chunksize=(9415, 1,

____
## U

In [27]:
ds_sel = ds.U.isel(lat=slice(86,110), lon_g=slice(80,100))
ds_sel

Unnamed: 0,Array,Chunk
Bytes,896.45 MiB,735.55 kiB
Shape,"(9415, 52, 24, 20)","(9415, 1, 1, 20)"
Count,11701 Tasks,1248 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 896.45 MiB 735.55 kiB Shape (9415, 52, 24, 20) (9415, 1, 1, 20) Count 11701 Tasks 1248 Chunks Type float32 numpy.ndarray",9415  1  20  24  52,

Unnamed: 0,Array,Chunk
Bytes,896.45 MiB,735.55 kiB
Shape,"(9415, 52, 24, 20)","(9415, 1, 1, 20)"
Count,11701 Tasks,1248 Chunks
Type,float32,numpy.ndarray


In [30]:
ds_sel = np.real(ds_U_sel).rename('U')
ds_sel = ds_sel.to_dataset()
print(ds_sel)
print('\n data size: %.1f GB' %(ds_sel.nbytes / 1e9))

# save
file_out = out_dir_zarr+'/U_subdomain.zarr'
%time ds_sel.to_zarr(file_out, mode='w',safe_chunks=False)   

<xarray.Dataset>
Dimensions:  (depth: 52, lat: 24, lon_g: 20, time: 9415)
Coordinates:
  * lat      (lat) float64 48.53 48.54 48.56 48.57 ... 48.79 48.8 48.81 48.83
  * lon_g    (lon_g) float64 -16.38 -16.35 -16.33 -16.31 ... -16.02 -16.0 -15.98
  * depth    (depth) float64 0.5 1.57 2.79 4.185 ... 855.8 900.1 945.6 992.3
  * time     (time) datetime64[ns] 2011-09-13 ... 2012-10-09T06:00:00
    dxC      (lat, lon_g) float32 dask.array<chunksize=(1, 1), meta=np.ndarray>
    dyG      (lat, lon_g) float32 dask.array<chunksize=(1, 1), meta=np.ndarray>
    rAw      (lat, lon_g) float32 dask.array<chunksize=(1, 1), meta=np.ndarray>
Data variables:
    U        (time, depth, lat, lon_g) float32 dask.array<chunksize=(9415, 52, 1, 1), meta=np.ndarray>

 data size: 0.9 GB
CPU times: user 15.1 s, sys: 704 ms, total: 15.8 s
Wall time: 1min 59s


<xarray.backends.zarr.ZarrStore at 0x2aabdc92c8e0>

In [33]:
ds_U_sub = xr.open_zarr(out_dir_zarr+'/U_subdomain.zarr')
ds_U_sub

Unnamed: 0,Array,Chunk
Bytes,1.88 kiB,4 B
Shape,"(24, 20)","(1, 1)"
Count,481 Tasks,480 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.88 kiB 4 B Shape (24, 20) (1, 1) Count 481 Tasks 480 Chunks Type float32 numpy.ndarray",20  24,

Unnamed: 0,Array,Chunk
Bytes,1.88 kiB,4 B
Shape,"(24, 20)","(1, 1)"
Count,481 Tasks,480 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.88 kiB,4 B
Shape,"(24, 20)","(1, 1)"
Count,481 Tasks,480 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.88 kiB 4 B Shape (24, 20) (1, 1) Count 481 Tasks 480 Chunks Type float32 numpy.ndarray",20  24,

Unnamed: 0,Array,Chunk
Bytes,1.88 kiB,4 B
Shape,"(24, 20)","(1, 1)"
Count,481 Tasks,480 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.88 kiB,4 B
Shape,"(24, 20)","(1, 1)"
Count,481 Tasks,480 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.88 kiB 4 B Shape (24, 20) (1, 1) Count 481 Tasks 480 Chunks Type float32 numpy.ndarray",20  24,

Unnamed: 0,Array,Chunk
Bytes,1.88 kiB,4 B
Shape,"(24, 20)","(1, 1)"
Count,481 Tasks,480 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,896.45 MiB,735.55 kiB
Shape,"(9415, 52, 24, 20)","(9415, 1, 1, 20)"
Count,1249 Tasks,1248 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 896.45 MiB 735.55 kiB Shape (9415, 52, 24, 20) (9415, 1, 1, 20) Count 1249 Tasks 1248 Chunks Type float32 numpy.ndarray",9415  1  20  24  52,

Unnamed: 0,Array,Chunk
Bytes,896.45 MiB,735.55 kiB
Shape,"(9415, 52, 24, 20)","(9415, 1, 1, 20)"
Count,1249 Tasks,1248 Chunks
Type,float32,numpy.ndarray


In [34]:
file_out = out_dir_zarr+'/U_subdomain.nc'
%time ds_U_sub.to_netcdf(file_out, mode='w')

KeyboardInterrupt: 

distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/home1/datahome/xyu/.miniconda3/envs/equinox/lib/python3.8/site-packages/distributed/core.py", line 497, in handle_comm
    result = handler(comm, **msg)
  File "/home1/datahome/xyu/.miniconda3/envs/equinox/lib/python3.8/site-packages/distributed/scheduler.py", line 3647, in heartbeat_worker
    ws._executing = {
  File "/home1/datahome/xyu/.miniconda3/envs/equinox/lib/python3.8/site-packages/distributed/scheduler.py", line 3648, in <dictcomp>
    parent._tasks[key]: duration for key, duration in executing.items()
KeyError: "('store-8e01d04e-e8f1-11ed-803e-0cc47a3f7877', 6, 12)"
distributed.core - ERROR - Exception while handling op heartbeat_worker
Traceback (most recent call last):
  File "/home1/datahome/xyu/.miniconda3/envs/equinox/lib/python3.8/site-packages/distributed/core.py", line 497, in handle_comm
    result = handler(comm, **msg)
  File "/home1/datahome/xyu/.m

In [37]:
cluster.close()