# Global Daily SST Analysis: Identifying Marine Extremes with `MarEx-Detect`

In [1]:
import xarray as xr
import numpy as np
import dask
import intake
from getpass import getuser
from pathlib import Path

import marEx
import marEx.helper as hpc

In [2]:
test_data_path = "/home/b/b382615/opt/marEx/tests/data/sst_unstructured.zarr"
ds = xr.open_zarr(str(test_data_path), chunks={}).persist()

In [3]:
sst_data = ds.to
sst_data

Unnamed: 0,Array,Chunk
Bytes,54.34 MiB,125.00 kiB
Shape,"(14246, 1000)","(32, 1000)"
Dask graph,446 chunks in 1 graph layer,446 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 54.34 MiB 125.00 kiB Shape (14246, 1000) (32, 1000) Dask graph 446 chunks in 1 graph layer Data type float32 numpy.ndarray",1000  14246,

Unnamed: 0,Array,Chunk
Bytes,54.34 MiB,125.00 kiB
Shape,"(14246, 1000)","(32, 1000)"
Dask graph,446 chunks in 1 graph layer,446 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [4]:
dask_chunks = {'time': 25}
dimensions = {
            'time': 'time',
            'xdim': 'ncells'  # Note: no 'ydim' indicates unstructured grid
        }
ncells = sst_data.sizes.get('ncells', sst_data.sizes.get('cell', 1000))
mock_neighbours = xr.DataArray(
            np.random.randint(0, ncells, (3, ncells)),
            dims=['nv', 'ncells']
        )
mock_cell_areas = xr.DataArray(
            np.ones(ncells) * 1000.0,  # Mock cell areas in m²
            dims=['ncells']
        )

In [5]:
extremes_ds = marEx.preprocess_data(
            sst_data,
            method_anomaly='shifting_baseline',
            method_extreme='hobday_extreme',
            threshold_percentile=95,
            window_year_baseline=5,  # Reduced for test data
            smooth_days_baseline=11,  # Reduced for test data
            window_days_hobday=5,  # Reduced for test data
            dimensions=dimensions,
            dask_chunks=dask_chunks,
            neighbours=mock_neighbours,
            cell_areas=mock_cell_areas
        )

In [6]:
extremes_ds

Unnamed: 0,Array,Chunk
Bytes,54.34 MiB,97.66 kiB
Shape,"(14246, 1000)","(25, 1000)"
Dask graph,570 chunks in 2 graph layers,570 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 54.34 MiB 97.66 kiB Shape (14246, 1000) (25, 1000) Dask graph 570 chunks in 2 graph layers Data type float32 numpy.ndarray",1000  14246,

Unnamed: 0,Array,Chunk
Bytes,54.34 MiB,97.66 kiB
Shape,"(14246, 1000)","(25, 1000)"
Dask graph,570 chunks in 2 graph layers,570 chunks in 2 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,0.98 kiB,0.98 kiB
Shape,"(1000,)","(1000,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 0.98 kiB 0.98 kiB Shape (1000,) (1000,) Dask graph 1 chunks in 1 graph layer Data type bool numpy.ndarray",1000  1,

Unnamed: 0,Array,Chunk
Bytes,0.98 kiB,0.98 kiB
Shape,"(1000,)","(1000,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,13.59 MiB,24.41 kiB
Shape,"(14246, 1000)","(25, 1000)"
Dask graph,570 chunks in 1 graph layer,570 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray
"Array Chunk Bytes 13.59 MiB 24.41 kiB Shape (14246, 1000) (25, 1000) Dask graph 570 chunks in 1 graph layer Data type bool numpy.ndarray",1000  14246,

Unnamed: 0,Array,Chunk
Bytes,13.59 MiB,24.41 kiB
Shape,"(14246, 1000)","(25, 1000)"
Dask graph,570 chunks in 1 graph layer,570 chunks in 1 graph layer
Data type,bool numpy.ndarray,bool numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,11.72 kiB,11.72 kiB
Shape,"(3, 1000)","(3, 1000)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray
"Array Chunk Bytes 11.72 kiB 11.72 kiB Shape (3, 1000) (3, 1000) Dask graph 1 chunks in 1 graph layer Data type int32 numpy.ndarray",1000  3,

Unnamed: 0,Array,Chunk
Bytes,11.72 kiB,11.72 kiB
Shape,"(3, 1000)","(3, 1000)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.91 kiB,3.91 kiB
Shape,"(1000,)","(1000,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 3.91 kiB 3.91 kiB Shape (1000,) (1000,) Dask graph 1 chunks in 1 graph layer Data type float32 numpy.ndarray",1000  1,

Unnamed: 0,Array,Chunk
Bytes,3.91 kiB,3.91 kiB
Shape,"(1000,)","(1000,)"
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [7]:
extremes_ds = extremes_ds.persist()