In [1]:
import gc
import shutil
import tempfile
from pathlib import Path
from getpass import getuser

import dask.array as dsa
import numpy as np
import pandas as pd
import pytest
import xarray as xr
from dask import delayed, persist
from dask.distributed import as_completed

import marEx
import marEx.helper as hpc

In [2]:
# Lustre Scratch Directory
scratch_dir = Path("/scratch") / getuser()[0] / getuser()

In [3]:
# Start Dask Cluster
client = hpc.start_local_cluster(
    n_workers=4, threads_per_worker=1, scratch_dir=scratch_dir / "clients",
    memory_limit='512MB'
)  # Specify temporary scratch directory for dask to use

Perhaps you already have a cluster running?
Hosting the HTTP server on port 43015 instead


Hostname: l20543
Forward Port: l20543:43015
Dashboard Link: localhost:43015/status


In [4]:
test_data_path = "/home/b/b382615/opt/marEx/tests/data/sst_gridded.zarr"
ds = xr.open_zarr(str(test_data_path), chunks={}).persist()
sst_data = ds.to.isel(time=slice(0,1400))

# Define standard dimensions for gridded data
dimensions = {"time": "time", "xdim": "lon", "ydim": "lat"}

sst_data

Unnamed: 0,Array,Chunk
Bytes,4.27 MiB,93.75 kiB
Shape,"(1400, 20, 40)","(30, 20, 40)"
Dask graph,47 chunks in 2 graph layers,47 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.27 MiB 93.75 kiB Shape (1400, 20, 40) (30, 20, 40) Dask graph 47 chunks in 2 graph layers Data type float32 numpy.ndarray",40  20  1400,

Unnamed: 0,Array,Chunk
Bytes,4.27 MiB,93.75 kiB
Shape,"(1400, 20, 40)","(30, 20, 40)"
Dask graph,47 chunks in 2 graph layers,47 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [5]:
da_new = sst_data.copy(deep=True)
da_new


Unnamed: 0,Array,Chunk
Bytes,4.27 MiB,93.75 kiB
Shape,"(1400, 20, 40)","(30, 20, 40)"
Dask graph,47 chunks in 2 graph layers,47 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 4.27 MiB 93.75 kiB Shape (1400, 20, 40) (30, 20, 40) Dask graph 47 chunks in 2 graph layers Data type float32 numpy.ndarray",40  20  1400,

Unnamed: 0,Array,Chunk
Bytes,4.27 MiB,93.75 kiB
Shape,"(1400, 20, 40)","(30, 20, 40)"
Dask graph,47 chunks in 2 graph layers,47 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [6]:
da_re = xr.DataArray(
    da_new.values,
    dims=['T', 'y', 'x'],
    coords={
        'Time': ('T', da_new.time.values),
        'latitude': ('y', da_new.lat.values),
        'longitude': ('x', da_new.lon.values)
    },
    attrs=da_new.attrs
)
da_re

In [7]:
da_re.isel(T=10)

In [8]:
da_re.isel(Time=10)

ValueError: Dimensions {'Time'} do not exist. Expected one or more of ('T', 'y', 'x')

In [9]:
da_re["Time"]

In [10]:
da_re["T"]

In [None]:
extremes_ds = marEx.preprocess_data(
            sst_data,
            method_anomaly="shifting_baseline",
            method_extreme="hobday_extreme",
            threshold_percentile=85,  # Lower threshold for test data
            window_year_baseline=1,  # Reduced for test data duration
            smooth_days_baseline=11,  # Reduced smoothing window
            window_days_hobday=5,  # Reduced hobday window
            dimensions=dimensions,
            dask_chunks={"time": 15},
        )