# Full domain QDM analysis: preprocessing

This notebook is for preprocessing everything for the full spatial domain quantile delta mapping analysis. This consists of running the QDM bias adjustment algorithm for the full spatial domain of the 4km WRF-downscaled ERA5 data, for multiple values of the following parameters:
* `window`: size of grouping window in days
* `adapt_freq_thresh`: frequency adaptation threshold, threshold used for "frequency adaptation" as described by [Theme√ül et al. 2012](https://doi.org/10.1007/s10584-011-0224-4)
* `nquantiles`: number of quantiles to use in the adjustment

All other parameters will be held fixed, with default values for `window`, `adapt_freq_thresh`, and `nquantiles` being 31, 0.254 mm/day, and 50, respectively. 

In [None]:
# config cell
import warnings
from pathlib import Path
import cftime
import dask
import numpy as np
import pandas as pd
import seaborn as sns
import xarray as xr
import matplotlib.pyplot as plt
from pyproj import Proj
from xclim import units, sdba, indices
from dask.distributed import Client
from dask_jobqueue import SLURMCluster
import hvplot.xarray
import panel as pn
import shutil
from scipy.stats import cramervonmises_2samp

pn.extension(comms="vscode")
# for reloading the baeda module which is actively developing
import baeda
from baeda import (
    tmp_window_fn,
    tmp_adapt_freq_fn,
    tmp_nquantiles_fn,
    window_sizes,
    adapt_freq_threhsolds,
    n_quantiles_list,
)

from importlib import reload

reload(baeda)


cutoff_time = cftime.DatetimeNoLeap(2015, 1, 1, 0, 0, 0, 0, 0)

zarr_dir = Path("/center1/CMIP6/kmredilla/cmip6_4km_downscaling/cmip6_zarr")
era5_dir = Path("/center1/CMIP6/kmredilla/cmip6_4km_downscaling/era5_zarr")

# tmp dir for writing downscaled data
tmp_dir = Path("/center1/CMIP6/kmredilla/downscaling/eda")

# era5 precip indicators file
era5_idx_fp = tmp_dir.joinpath("pridx_era5.zarr")


# default parameter values
# indexed by variable id in case we add more variables
default_params = {
    "pr": {
        "window": 31,
        "adapt_freq_thresh": "0.254 mm d-1",
        "nquantiles": 50,
        "jitter_under_thresh_value": "0.01 mm d-1",
        "kind": "*",
    },
    "tasmax": {
        "window": 31,
        "nquantiles": 50,
        "kind": "+",
    },
    "dtr": {
        "window": 31,
        "nquantiles": 50,
        "jitter_under_thresh_value": "1e-4 K",
        "kind": "*",
    },
}


index_name_lu = {
    "pr": {
        "rx1day": "Max 1-day precip",
        "rx5day": "Max 5-day precip",
        "cdd": "Consecutive dry Days",
        "cwd": "Consecutive wet days",
    }
}

# Processing

This section is for running the downscaling and computing indicators. It saves the output to a temporary directory and so need not be run if outputs for the visualization section already exist. 

In [2]:
# client = Client(n_workers=12, threads_per_worker=2)

# for interactive nb testing
cluster = SLURMCluster(
    cores=28,
    processes=14,
    # n_workers=14,
    memory="128GB",
    # queue="debug",
    queue="t2small",
    # walltime="01:00:00",
    walltime="12:00:00",
    log_directory="/beegfs/CMIP6/kmredilla/tmp/dask_jobqueue_logs",
    account="cmip6",
    interface="ib0",
)
client = Client(cluster)

In [3]:
# big cluster for fast processing!
cluster.scale(n=140)

# lil cluster for testing
# cluster.scale(n=28)

In [4]:
# connect to ERA5 data
era5_stores = {
    "t2max": era5_dir.joinpath("t2max_era5.zarr"),
    "pr": era5_dir.joinpath("pr_era5.zarr"),
}

era5_ds = baeda.open_era5_dataset(era5_stores)

In [None]:
# run ERA5 indicators
def run_era5_indicators():
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # not able to silence large graph threshold warnings
        era5_idx = baeda.run_indicators(era5_ds[var_id], indices=["rx1day", "dpi"])
        shutil.rmtree(era5_idx_fp, ignore_errors=True)
        _ = era5_idx.to_zarr(era5_idx_fp)


var_id = "pr"
if era5_idx_fp.exists():
    if (
        input(f"Delete existing ERA5 indicators data store ({era5_idx_fp})? (y/n)")
        == "y"
    ):
        run_era5_indicators()
    else:
        print(f"Using existing ERA5 indicators data store ({era5_idx_fp})")
        era5_idx = xr.open_zarr(era5_idx_fp)
else:
    run_era5_indicators()

Using existing ERA5 indicators data store (/center1/CMIP6/kmredilla/downscaling/eda/pridx_era5.zarr)


Train and adjust for three different window sizes:

In [None]:
# window profiling funcs
def run_window_adjustments(
    model, scenario, var_id, zarr_dir, era5_ds, tmp_dir, window_sizes, no_clobber=True
):
    print(
        "Running historical adjustments for different window sizes for model: ", model
    )
    hist, sim = baeda.extract_time_series_from_zarr(
        zarr_dir, model, scenario, var_id, coords=None
    )

    # QDM: train the adjustment
    # rechunking to allow for more workers
    chunk_kwargs = {"time": -1, "x": 10, "y": 10}
    train_kwargs = dict(
        ref=era5_ds[var_id].chunk(**chunk_kwargs),
        # think having experiment coordinate may quietly prevent
        # adjustment of data with different coordinates (e.g. ssp's)
        hist=hist.chunk(**chunk_kwargs)
        .isel(Method=0, experiment=0)
        .drop_vars(["Method", "experiment"]),
        group="time.dayofyear",
        **default_params[var_id],
    )

    for window in window_sizes:
        # skipping if exists for now
        out_fp = tmp_dir.joinpath(
            tmp_window_fn.format(
                qm_window=window,
                var_id=var_id,
                model=model,
                scenario="historical",
            )
        )
        if out_fp.exists() & no_clobber:
            print("Skipping existing zarr store: ", out_fp)
            continue

        train_kwargs.update(
            window=window,
        )
        print(f"Training with window: {window}")

        qdm_train = sdba.QuantileDeltaMapping.train(**train_kwargs)

        hist_adj = (
            qdm_train.adjust(
                hist.chunk(**chunk_kwargs),
                extrapolation="constant",
                interp="nearest",
            )
            .isel(Method=0, drop=True)
            .assign_coords(window=window)
            .expand_dims("window")
            .transpose("window", "experiment", "time", "y", "x")
        )
        hist_adj.name = var_id

        # compute
        hist_adj = hist_adj.load()

        # write
        out_fp = tmp_dir.joinpath(
            tmp_window_fn.format(
                qm_window=window,
                var_id=var_id,
                model=model,
                scenario="historical",
            )
        )
        if out_fp.exists():
            print("Deleting existing zarr store: ", out_fp)
            shutil.rmtree(out_fp, ignore_errors=True)
        hist_adj.to_dataset().to_zarr(out_fp)


def run_historical_window_indicators(
    model, var_id, tmp_dir, window_sizes, no_clobber=True
):
    print("Running historical indicators for different window sizes for model: ", model)

    for window in window_sizes:
        hist_idx_fp = tmp_dir.joinpath(
            tmp_window_fn.format(
                qm_window=window,
                var_id=var_id + "idx",
                model=model,
                scenario="historical",
            )
        )
        if hist_idx_fp.exists() & no_clobber:
            print("Skipping existing zarr store: ", hist_idx_fp)
            continue

        hist_adj = xr.open_zarr(
            tmp_dir.joinpath(
                tmp_window_fn.format(
                    qm_window=window,
                    var_id=var_id,
                    model=model,
                    scenario="historical",
                )
            )
        )

        hist_idx = baeda.run_indicators(
            hist_adj[var_id].load(), indices=["rx1day", "dpi"]
        )

        if hist_idx_fp.exists():
            print("Deleting existing zarr store: ", hist_idx_fp)
            shutil.rmtree(hist_idx_fp, ignore_errors=True)
        hist_idx.to_zarr(hist_idx_fp)

In [8]:
scenario = "ssp585"
var_id = "pr"

for model in ["GFDL-ESM4", "NorESM2-MM", "EC-Earth3-Veg"]:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        run_window_adjustments(
            model, scenario, var_id, zarr_dir, era5_ds, tmp_dir, window_sizes
        )

Running historical adjustments for different window sizes for model:  GFDL-ESM4
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw31_pr_GFDL-ESM4_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw45_pr_GFDL-ESM4_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw61_pr_GFDL-ESM4_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw91_pr_GFDL-ESM4_historical.zarr
Running historical adjustments for different window sizes for model:  NorESM2-MM
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw31_pr_NorESM2-MM_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw45_pr_NorESM2-MM_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw61_pr_NorESM2-MM_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw91_pr_NorESM

In [None]:
scenario = "ssp585"
var_id = "pr"

for model in ["GFDL-ESM4", "NorESM2-MM", "EC-Earth3-Veg"]:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        run_historical_window_indicators(
            model, var_id, tmp_dir, window_sizes, no_clobber=False
        )

Running historical indicators for different window sizes for model:  GFDL-ESM4
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw31_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw45_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw61_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw91_pridx_GFDL-ESM4_historical.zarr
Running historical indicators for different window sizes for model:  NorESM2-MM
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw31_pridx_NorESM2-MM_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw45_pridx_NorESM2-MM_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/qmw61_pridx_NorESM2-MM_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/

In [80]:
def run_adapt_freq_adjustments(
    model,
    scenario,
    var_id,
    zarr_dir,
    era5_ds,
    tmp_dir,
    adapt_freq_threhsolds,
    no_clobber=True,
):
    print("Running historical adjustments for adapt_freq thresholds for model: ", model)
    hist, sim = baeda.extract_time_series_from_zarr(
        zarr_dir, model, scenario, var_id, coords=None
    )

    # QDM: train the adjustment
    # rechunking to allow for more workers
    chunk_kwargs = {"time": -1, "x": 10, "y": 10}
    train_kwargs = dict(
        ref=era5_ds[var_id].chunk(**chunk_kwargs),
        # think having experiment coordinate may quietly prevent
        # adjustment of data with different coordinates (e.g. ssp's)
        hist=hist.chunk(**chunk_kwargs)
        .isel(Method=0, experiment=0)
        .drop_vars(["Method", "experiment"]),
        group="time.dayofyear",
        **default_params[var_id],
    )

    for thresh in adapt_freq_threhsolds:
        out_fp = tmp_dir.joinpath(
            tmp_adapt_freq_fn.format(
                adapt_freq=thresh.replace(" ", ""),
                var_id=var_id,
                model=model,
                scenario="historical",
            )
        )
        if out_fp.exists() & no_clobber:
            print("Skipping existing zarr store: ", out_fp)
            continue

        train_kwargs.update(
            adapt_freq_thresh=thresh,
        )
        print(f"Training with adapt_freq threhsold of: {thresh}")

        qdm_train = sdba.QuantileDeltaMapping.train(**train_kwargs)

        hist_adj = (
            qdm_train.adjust(
                hist.chunk(**chunk_kwargs),
                extrapolation="constant",
                interp="nearest",
            )
            .isel(Method=0, drop=True)
            .assign_coords(adapt_freq=thresh)
            .expand_dims("adapt_freq")
            .transpose("adapt_freq", "experiment", "time", "y", "x")
        )
        hist_adj.name = var_id

        # compute
        hist_adj = hist_adj.load()

        # write
        if out_fp.exists():
            print("Deleting existing zarr store: ", out_fp)
            shutil.rmtree(out_fp, ignore_errors=True)
        hist_adj.to_dataset().to_zarr(out_fp)


def run_historical_adapt_freq_indicators(
    model, var_id, tmp_dir, adapt_freq_threhsolds, no_clobber=True
):
    print("Running historical indicators for adapt_freq thresholds for model: ", model)
    for adapt_freq in adapt_freq_threhsolds:
        hist_idx_fp = tmp_dir.joinpath(
            tmp_adapt_freq_fn.format(
                adapt_freq=adapt_freq.replace(" ", ""),
                var_id=var_id + "idx",
                model=model,
                scenario="historical",
            )
        )
        if hist_idx_fp.exists() & no_clobber:
            print("Skipping existing zarr store: ", hist_idx_fp)
            continue

        hist_adj_fp = tmp_dir.joinpath(
            tmp_adapt_freq_fn.format(
                adapt_freq=adapt_freq.replace(" ", ""),
                var_id=var_id,
                model=model,
                scenario="historical",
            )
        )

        hist_adj = xr.open_zarr(hist_adj_fp)
        hist_idx = baeda.run_indicators(
            hist_adj[var_id].load(), indices=["rx1day", "dpi"]
        )

        if hist_idx_fp.exists():
            print("Deleting existing zarr store: ", hist_idx_fp)
            shutil.rmtree(hist_idx_fp, ignore_errors=True)
        hist_idx.to_zarr(hist_idx_fp)

In [None]:
scenario = "ssp585"
var_id = "pr"

for model in ["GFDL-ESM4", "NorESM2-MM", "EC-Earth3-Veg"]:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        run_adapt_freq_adjustments(
            model,
            scenario,
            var_id,
            zarr_dir,
            era5_ds,
            tmp_dir,
            adapt_freq_threhsolds,
            no_clobber=False,
        )

Running historical adjustments for adapt_freq thresholds for model:  GFDL-ESM4
Training with adapt_freq threhsold of: 0.05 mm d-1
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/0.05mmd-1_pr_GFDL-ESM4_historical.zarr
Training with adapt_freq threhsold of: 0.254 mm d-1
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/0.254mmd-1_pr_GFDL-ESM4_historical.zarr
Training with adapt_freq threhsold of: 1 mm d-1
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/1mmd-1_pr_GFDL-ESM4_historical.zarr
Training with adapt_freq threhsold of: 2 mm d-1
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/2mmd-1_pr_GFDL-ESM4_historical.zarr
Running historical adjustments for adapt_freq thresholds for model:  NorESM2-MM
Training with adapt_freq threhsold of: 0.05 mm d-1
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/0.05mmd-1_pr_NorESM2-MM_historical.zarr
Training with adapt_freq threhsold of: 0

In [82]:
scenario = "ssp585"
var_id = "pr"

for model in ["GFDL-ESM4", "NorESM2-MM", "EC-Earth3-Veg"]:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        run_historical_adapt_freq_indicators(
            model, var_id, tmp_dir, adapt_freq_threhsolds, no_clobber=False
        )

Running historical indicators for adapt_freq thresholds for model:  GFDL-ESM4
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/0.05mmd-1_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/0.254mmd-1_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/1mmd-1_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/2mmd-1_pridx_GFDL-ESM4_historical.zarr
Running historical indicators for adapt_freq thresholds for model:  NorESM2-MM
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/0.05mmd-1_pridx_NorESM2-MM_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/0.254mmd-1_pridx_NorESM2-MM_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/1mmd-1_pridx_NorESM2-MM_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmr

In [28]:
def run_quantile_adjustments(
    model,
    scenario,
    var_id,
    zarr_dir,
    era5_ds,
    tmp_dir,
    n_quantiles_list,
    no_clobber=True,
):
    print(
        "Running historical adjustments for different numbers of quantiles for model: ",
        model,
    )
    hist, sim = baeda.extract_time_series_from_zarr(
        zarr_dir, model, scenario, var_id, coords=None
    )

    # QDM: train the adjustment
    # rechunking to allow for more workers
    chunk_kwargs = {"time": -1, "x": 10, "y": 10}
    train_kwargs = dict(
        ref=era5_ds[var_id].chunk(**chunk_kwargs),
        # think having experiment coordinate may quietly prevent
        # adjustment of data with different coordinates (e.g. ssp's)
        hist=hist.chunk(**chunk_kwargs)
        .isel(Method=0, experiment=0)
        .drop_vars(["Method", "experiment"]),
        group="time.dayofyear",
        **default_params[var_id],
    )

    for nquantiles in n_quantiles_list:
        out_fp = tmp_dir.joinpath(
            tmp_nquantiles_fn.format(
                nquantiles=nquantiles,
                var_id=var_id,
                model=model,
                scenario="historical",
            )
        )
        if out_fp.exists() & no_clobber:
            print("Skipping existing zarr store: ", out_fp)
            continue

        train_kwargs.update(
            nquantiles=nquantiles,
        )
        print(f"Training with nquantiles: {nquantiles}")

        qdm_train = sdba.QuantileDeltaMapping.train(**train_kwargs)

        hist_adj = (
            qdm_train.adjust(
                hist.chunk(**chunk_kwargs),
                extrapolation="constant",
                interp="nearest",
            )
            .isel(Method=0, drop=True)
            .assign_coords(nquantiles=nquantiles)
            .expand_dims("nquantiles")
            .transpose("nquantiles", "experiment", "time", "y", "x")
        )
        hist_adj.name = var_id

        # compute
        hist_adj = hist_adj.load()

        if out_fp.exists():
            print("Deleting existing zarr store: ", out_fp)
            shutil.rmtree(out_fp, ignore_errors=True)
        hist_adj.to_dataset().to_zarr(out_fp)


def run_historical_quantile_indicators(
    model, var_id, tmp_dir, n_quantiles_list, no_clobber=True
):
    print("Running historical indicators for different nquantiles for model: ", model)
    for nquantiles in n_quantiles_list:
        hist_idx_fp = tmp_dir.joinpath(
            tmp_nquantiles_fn.format(
                nquantiles=nquantiles,
                var_id=var_id + "idx",
                model=model,
                scenario="historical",
            )
        )
        if hist_idx_fp.exists() & no_clobber:
            print("Skipping existing zarr store: ", hist_idx_fp)
            continue

        hist_adj_fp = tmp_dir.joinpath(
            tmp_nquantiles_fn.format(
                nquantiles=nquantiles,
                var_id=var_id,
                model=model,
                scenario="historical",
            )
        )
        hist_adj = xr.open_zarr(hist_adj_fp)
        hist_idx = baeda.run_indicators(hist_adj[var_id], indices=["rx1day", "dpi"])

        if hist_idx_fp.exists():
            print("Deleting existing zarr store: ", hist_idx_fp)
            shutil.rmtree(hist_idx_fp, ignore_errors=True)
        hist_idx.to_zarr(hist_idx_fp)

In [14]:
scenario = "ssp585"
var_id = "pr"

for model in ["GFDL-ESM4", "NorESM2-MM", "EC-Earth3-Veg"]:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        run_quantile_adjustments(
            model, scenario, var_id, zarr_dir, era5_ds, tmp_dir, n_quantiles_list
        )

Running historical adjustments for different numbers of quantiles for model:  GFDL-ESM4
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq50_pr_GFDL-ESM4_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq100_pr_GFDL-ESM4_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq150_pr_GFDL-ESM4_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq200_pr_GFDL-ESM4_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq250_pr_GFDL-ESM4_historical.zarr
Running historical adjustments for different numbers of quantiles for model:  NorESM2-MM
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq50_pr_NorESM2-MM_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq100_pr_NorESM2-MM_historical.zarr
Skipping existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq

In [None]:
scenario = "ssp585"
var_id = "pr"

for model in ["GFDL-ESM4", "NorESM2-MM", "EC-Earth3-Veg"]:
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        run_historical_quantile_indicators(
            model, var_id, tmp_dir, n_quantiles_list, no_clobber=False
        )

Running historical indicators for different nquantiles for model:  GFDL-ESM4
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq50_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq100_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq150_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq200_pridx_GFDL-ESM4_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq250_pridx_GFDL-ESM4_historical.zarr
Running historical indicators for different nquantiles for model:  NorESM2-MM
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq50_pridx_NorESM2-MM_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq100_pridx_NorESM2-MM_historical.zarr
Deleting existing zarr store:  /center1/CMIP6/kmredilla/downscaling/eda/nq1

## Cramer-von Mises calculations

Conduct the pixelwise Cramer-von Mises tests for all adjusted datasets and write to the `tmp_dir`. 

In [None]:
# run the pixelwise Cramer von mises test for the Rx1day indicator
# for the different frequency adaptation thresholds and combine


def run_cvm(iter, keyname, models, var_id, tmp_dir, tmp_fn, era5_idx, indicators):
    cvm_datasets = []
    for i in iter:
        for model in models:
            fn_format_di = {
                keyname: str(i).replace(" ", ""),
                "var_id": var_id + "idx",
                "model": model,
                "scenario": "historical",
            }
            hist_idx = xr.open_zarr(
                tmp_dir.joinpath(
                    tmp_fn.format(
                        **fn_format_di,
                    )
                )
            )
            hist_idx.load()
            hist_idx = hist_idx.assign_coords(model=model).expand_dims("model")

            for indicator in indicators:
                cvm = cramervonmises_2samp(
                    hist_idx[indicator].values.squeeze(),
                    era5_idx[indicator].values.squeeze(),
                    method="asymptotic",
                )

                dims = {
                    dim: size for dim, size in hist_idx.sizes.items() if dim != "time"
                }
                new_ds = xr.Dataset(
                    {
                        f"{indicator}_pval": (
                            dims.keys(),
                            np.expand_dims(cvm.pvalue, axis=[0, 1, 2]),
                        ),
                    },
                    coords={dim: hist_idx.coords[dim] for dim in dims.keys()},
                )
                cvm_datasets.append(new_ds)

    cvm_ds = xr.merge(cvm_datasets)

    fn_format_di.update(
        {keyname: keyname, "var_id": var_id + "idx_cvm", "model": "allmodels"}
    )
    cvm_fp = tmp_dir.joinpath(
        tmp_fn.format(
            **fn_format_di,
        )
    )
    if cvm_fp.exists():
        print("Deleting existing zarr store: ", cvm_fp)
        shutil.rmtree(cvm_fp, ignore_errors=True)
    cvm_ds.to_zarr(cvm_fp)

    return cvm_ds

In [None]:
# window sizes Cramer-von mises tests
iter = window_sizes
fn_key = "qm_window"
tmp_fn = tmp_window_fn
models = ["GFDL-ESM4", "NorESM2-MM", "EC-Earth3-Veg"]
indicators = ["rx1day", "dpi"]


window_cvm = run_cvm(
    iter=iter,
    keyname=fn_key,
    models=models,
    var_id=var_id,
    tmp_dir=tmp_dir,
    tmp_fn=tmp_fn,
    era5_idx=era5_idx,
    indicators=indicators,
)

In [None]:
# frequency adaptation Cramer-von mises tests
iter = adapt_freq_threhsolds
fn_key = "adapt_freq"
tmp_fn = tmp_adapt_freq_fn
models = ["GFDL-ESM4", "NorESM2-MM", "EC-Earth3-Veg"]
indicators = ["rx1day", "dpi"]


adapt_freq_cvm = run_cvm(
    iter=iter,
    keyname=fn_key,
    models=models,
    var_id=var_id,
    tmp_dir=tmp_dir,
    tmp_fn=tmp_fn,
    era5_idx=era5_idx,
    indicators=indicators,
)

In [None]:
# nquantiles Cramer-von mises tests
iter = n_quantiles_list
fn_key = "nquantiles"
tmp_fn = tmp_nquantiles_fn
models = ["GFDL-ESM4", "NorESM2-MM", "EC-Earth3-Veg"]
indicators = ["rx1day", "dpi"]


nquantiles_cvm = run_cvm(
    iter=iter,
    keyname=fn_key,
    models=models,
    var_id=var_id,
    tmp_dir=tmp_dir,
    tmp_fn=tmp_fn,
    era5_idx=era5_idx,
    indicators=indicators,
)

In [None]:
client.close()