# Pre-Process global _daily_ SST using `hot_to_blOb` to extract binary features

## Steps:
1. Compute Normalised Detrended Anomaly (cf. `hot_to_blOb.py::compute_normalised_anomaly()`)
2. Identify Extreme Values (i.e. above 95th percentile)

N.B.: Exploits parallelised `Dask` operations with optimised chunking using `flox` \
N.N.B.: This example using 40 years of Daily outputs at 0.25° resolution takes ~4 minutes on 128 cores

In [None]:
import xarray as xr
import dask
import intake
from getpass import getuser
from pathlib import Path

import spot_the_blOb.hot_to_blOb as hot
import spot_the_blOb.helper as hpc

In [None]:
# Start Dask Cluster
client = hpc.StartLocalCluster(n_workers=64, n_threads=2)

In [None]:
# Import 40 years of Daily EERIE ICON data

cat = intake.open_catalog("https://raw.githubusercontent.com/eerie-project/intake_catalogues/main/eerie.yaml")
expid = 'eerie-control-1950'
version = 'v20231106'
model = 'icon-esm-er'
gridspec = 'gr025'

dat = cat['dkrz.disk.model-output'][model][expid][version]['ocean'][gridspec]

In [None]:
# Load the data directly into optimal chunks

da_predictor = dat['2d_daily_mean'](chunks={}).to_dask().to.isel(depth=0).drop_vars('depth') # Test da
time_chunk = hot.rechunk_for_cohorts(da_predictor).chunks[0]

sst = dat['2d_daily_mean'](chunks={'time':time_chunk}).to_dask().to.isel(depth=0).drop_vars('depth')

In [None]:
# Process Data using `hot_to_blOb` helper functions:

extreme_events_ds = hot.preprocess_data(sst, 
                                        detrend_orders=[1,2],    # Detrend the data with Linear + Quadratic polynomial (+ mean & seasonal & subseasonal harmonics)
                                        std_normalise=False,     # Don't re-normalise based on a 30-day rolling STD
                                        threshold_percentile=95)
extreme_events_ds

In [None]:
# Save data to `zarr` for more efficient parallel I/O

file_name = Path('/scratch') / getuser()[0] / getuser() / 'mhws' / 'extreme_events_binary.zarr'
extreme_events_ds.to_zarr(file_name, mode='w')

## Test the De-trended Data

In [None]:
from spot_the_blOb.plotter import *

In [None]:
file_name = Path('/scratch') / getuser()[0] / getuser() / 'mhws' / 'extreme_events_binary_v3.zarr'
dat_detrend = xr.open_zarr(file_name, chunks={}).dat_detrend

In [None]:
# Check mean is identically 0
fig, ax = dat_detrend.mean(dim='time').plotter.pplot(var_units='Mean Anomaly', cmap='RdBu_r')

In [None]:
# Check the spatially-averaged trends are 0
dat_detrend.mean(dim={'lat','lon'}).plot()