# Generate Test Data

In [1]:
import xarray as xr
import numpy as np
import dask
import intake
from getpass import getuser
from pathlib import Path

import marEx
import marEx.helper as hpc

In [2]:
# Lustre Scratch Directory
scratch_dir = Path('/scratch') / getuser()[0] / getuser()
save_dir = Path('/home') / getuser()[0] / getuser() / 'opt' / 'marEx' / 'tests' / 'data'

In [3]:
# Start Dask Cluster
client = hpc.start_local_cluster(n_workers=32, threads_per_worker=1,
                                 scratch_dir = scratch_dir / 'clients')  # Specify temporary scratch directory for dask to use

Dask Scratch: '/scratch/b/b382615/clients/tmpydvm_1d5'
Memory per Worker: 15.74 GB
Hostname: l40201
Forward Port: l40201:8787
Dashboard Link: localhost:8787/status


# Raw SST Data

In [4]:
cat = intake.open_catalog("https://raw.githubusercontent.com/eerie-project/intake_catalogues/main/eerie.yaml")

In [5]:
# Import 40 years of OSTIA Observations
dat_regrid = cat['dkrz.disk.observations.OSTIA.OSTIA.daily']
sst_regrid = dat_regrid(chunks={}).to_dask().sst.astype(np.float32).coarsen(lat=5,lon=5).mean().rename('to')
sst_regrid

Unnamed: 0,Array,Chunk
Bytes,57.01 GiB,3.30 MiB
Shape,"(14761, 720, 1440)","(10, 240, 360)"
Dask graph,17724 chunks in 6 graph layers,17724 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 57.01 GiB 3.30 MiB Shape (14761, 720, 1440) (10, 240, 360) Dask graph 17724 chunks in 6 graph layers Data type float32 numpy.ndarray",1440  720  14761,

Unnamed: 0,Array,Chunk
Bytes,57.01 GiB,3.30 MiB
Shape,"(14761, 720, 1440)","(10, 240, 360)"
Dask graph,17724 chunks in 6 graph layers,17724 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [6]:
# Import 40 years of Daily ICON data (ref. EERIE project)
dat_native = cat['dkrz.disk.model-output.icon-esm-er.eerie-control-1950.v20240618']['ocean']['native']
sst_native = dat_native['2d_daily_mean'](chunks={}).to_dask().to.isel(depth=0).drop_vars({'depth','cell_sea_land_mask'}).chunk({'time':32})
sst_native

Unnamed: 0,Array,Chunk
Bytes,0.99 TiB,1.77 GiB
Shape,"(18262, 14886338)","(32, 14886338)"
Dask graph,571 chunks in 4 graph layers,571 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 0.99 TiB 1.77 GiB Shape (18262, 14886338) (32, 14886338) Dask graph 571 chunks in 4 graph layers Data type float32 numpy.ndarray",14886338  18262,

Unnamed: 0,Array,Chunk
Bytes,0.99 TiB,1.77 GiB
Shape,"(18262, 14886338)","(32, 14886338)"
Dask graph,571 chunks in 4 graph layers,571 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,113.57 MiB,113.57 MiB
Shape,"(14886338,)","(14886338,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 113.57 MiB 113.57 MiB Shape (14886338,) (14886338,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",14886338  1,

Unnamed: 0,Array,Chunk
Bytes,113.57 MiB,113.57 MiB
Shape,"(14886338,)","(14886338,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,113.57 MiB,113.57 MiB
Shape,"(14886338,)","(14886338,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 113.57 MiB 113.57 MiB Shape (14886338,) (14886338,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",14886338  1,

Unnamed: 0,Array,Chunk
Bytes,113.57 MiB,113.57 MiB
Shape,"(14886338,)","(14886338,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [7]:
# Define subset
time_slice_ostia = slice('1982-01-01', '2022-01-01')
time_slice_icon = slice('1991-01-01', '2031-01-01')
regrid_slice_ostia = dict(lat=slice(35, 40), lon=slice(-40, -30))
native_slice_icon = slice(0,1000)

In [8]:
sst_regrid_slice = sst_regrid.sel(time=time_slice_ostia, **regrid_slice_ostia).chunk({'time':30, 'lat':-1,'lon':-1})
sst_regrid_slice

Unnamed: 0,Array,Chunk
Bytes,44.59 MiB,93.75 kiB
Shape,"(14611, 20, 40)","(30, 20, 40)"
Dask graph,488 chunks in 8 graph layers,488 chunks in 8 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 44.59 MiB 93.75 kiB Shape (14611, 20, 40) (30, 20, 40) Dask graph 488 chunks in 8 graph layers Data type float32 numpy.ndarray",40  20  14611,

Unnamed: 0,Array,Chunk
Bytes,44.59 MiB,93.75 kiB
Shape,"(14611, 20, 40)","(30, 20, 40)"
Dask graph,488 chunks in 8 graph layers,488 chunks in 8 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [9]:
sst_native_slice = sst_native.sel(time=time_slice_icon).isel(ncells=native_slice_icon).drop_vars({'lat','lon'})
sst_native_slice

Unnamed: 0,Array,Chunk
Bytes,55.74 MiB,125.00 kiB
Shape,"(14611, 1000)","(32, 1000)"
Dask graph,457 chunks in 6 graph layers,457 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 55.74 MiB 125.00 kiB Shape (14611, 1000) (32, 1000) Dask graph 457 chunks in 6 graph layers Data type float32 numpy.ndarray",1000  14611,

Unnamed: 0,Array,Chunk
Bytes,55.74 MiB,125.00 kiB
Shape,"(14611, 1000)","(32, 1000)"
Dask graph,457 chunks in 6 graph layers,457 chunks in 6 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [10]:
# Save to Zarr
sst_regrid_slice.to_zarr(save_dir / 'sst_gridded.zarr', mode='w')
sst_native_slice.to_zarr(save_dir / 'sst_unstructured.zarr', mode='w')

<xarray.backends.zarr.ZarrStore at 0x154eb0314b80>

# Pre-processed Data

In [4]:
time_slice = slice('2010-01-01', '2010-02-01')
extremes_ds = xr.open_zarr(scratch_dir / 'mhws' / 'extremes_binary_gridded_shifting_hobday.zarr', chunks={}).sel(time=time_slice).drop_vars({'thresholds','dat_anomaly','dayofyear'})
extremes_ds

Unnamed: 0,Array,Chunk
Bytes,31.64 MiB,22.74 MiB
Shape,"(32, 720, 1440)","(23, 720, 1440)"
Dask graph,2 chunks in 3 graph layers,2 chunks in 3 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 31.64 MiB 22.74 MiB Shape (32, 720, 1440) (23, 720, 1440) Dask graph 2 chunks in 3 graph layers Data type bool numpy.ndarray",1440  720  32,

Unnamed: 0,Array,Chunk
Bytes,31.64 MiB,22.74 MiB
Shape,"(32, 720, 1440)","(23, 720, 1440)"
Dask graph,2 chunks in 3 graph layers,2 chunks in 3 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(720, 1440)","(720, 1440)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 0.99 MiB 0.99 MiB Shape (720, 1440) (720, 1440) Dask graph 1 chunks in 2 graph layers Data type bool numpy.ndarray",1440  720,

Unnamed: 0,Array,Chunk
Bytes,0.99 MiB,0.99 MiB
Shape,"(720, 1440)","(720, 1440)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,bool numpy.ndarray,bool numpy.ndarray


In [5]:
# Coarsen extremes_ds (lat & lon)
extremes_ds_coarsen = extremes_ds.coarsen(lat=4, lon=4, boundary='trim').any().chunk({'time': 2, 'lat':-1, 'lon':-1}).persist()
extremes_ds_coarsen

Unnamed: 0,Array,Chunk
Bytes,1.98 MiB,126.56 kiB
Shape,"(32, 180, 360)","(2, 180, 360)"
Dask graph,16 chunks in 1 graph layer,16 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 1.98 MiB 126.56 kiB Shape (32, 180, 360) (2, 180, 360) Dask graph 16 chunks in 1 graph layer Data type bool numpy.ndarray",360  180  32,

Unnamed: 0,Array,Chunk
Bytes,1.98 MiB,126.56 kiB
Shape,"(32, 180, 360)","(2, 180, 360)"
Dask graph,16 chunks in 1 graph layer,16 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,63.28 kiB,63.28 kiB
Shape,"(180, 360)","(180, 360)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 63.28 kiB 63.28 kiB Shape (180, 360) (180, 360) Dask graph 1 chunks in 1 graph layer Data type bool numpy.ndarray",360  180,

Unnamed: 0,Array,Chunk
Bytes,63.28 kiB,63.28 kiB
Shape,"(180, 360)","(180, 360)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray


In [6]:
# Clear encoding so we can write to Zarr
extremes_ds_coarsen.encoding = {}
for var in extremes_ds_coarsen.data_vars:
	extremes_ds_coarsen[var].encoding = {}

In [7]:
extremes_ds_coarsen.to_zarr(save_dir / 'extremes_gridded.zarr', mode='w')

<xarray.backends.zarr.ZarrStore at 0x15507dbc92d0>