In [None]:
import xarray as xr
import proplot as pplt
import pandas as pd
import nwp_ensemble as ens
from utils.files import OIFSEnsemblePreprocessor, NEMOEnsemblePreprocessor
import numpy as np
import user_context as context

In [None]:
oifs_preprocessor = OIFSEnsemblePreprocessor(pd.Timedelta(-7, "h"))
nemo_preprocessor = NEMOEnsemblePreprocessor(pd.Timedelta(-7, "h"))

In [None]:
plotting_dir = context.plotting_dir / "ensemble" / "forecast_winners"
plotting_dir.mkdir(exist_ok=True)

In [None]:
experiment_directories = []
for date_dir in ens.ensemble_directory.glob("*"):
    for init_dir in date_dir.glob("*"):
        for experiment_dir in init_dir.glob("*"):
            experiment_directories.append(experiment_dir)

In [None]:
progvars_ensemble = [
    experiment_dir / "progvar.nc" for experiment_dir in experiment_directories
]
nemo_t_ensemble = [
    next(experiment_dir.glob("*_grid_T.nc"))
    for experiment_dir in experiment_directories
]

In [None]:
colors = ["m", "c", "y", "k"]
cpl_schemes = ["parallel", "atm-first", "oce-first", "converged SWR"]
labels = ["parallel", "atmosphere-first", "ocean-first", "converged SWR"]
markers = [".", "x", "1", "+"]

# OpenIFS

In [None]:
prog_ensemble = xr.open_mfdataset(
    progvars_ensemble, preprocess=oifs_preprocessor.preprocess_ensemble
)

In [None]:
prog_forecast = prog_ensemble.isel(time=-1)
prog_forecast = prog_forecast.assign_coords(
    start_date=prog_forecast.start_date + prog_forecast.time
)
prog_forecast = prog_forecast.rename(start_date="end_date")

In [None]:
prog_forecast_diff = prog_forecast - prog_forecast.sel(coupling_scheme="converged SWR")

In [None]:
def vector_norm(x, dim, ord=None):
    return xr.apply_ufunc(
        np.linalg.norm, x, input_core_dims=[[dim]], kwargs={"ord": ord, "axis": -1}
    )

## Temperature in Boundary Layer

In [None]:
t_pbl_diff = prog_forecast_diff.t.sel(nlev=np.arange(51, 61))
t_pbl_diff = t_pbl_diff.drop_sel(coupling_scheme="converged SWR")
t_pbl_diff_l2 = vector_norm(t_pbl_diff.load(), "nlev", 2)
argmin_t_pbl_diff = t_pbl_diff_l2.argmin("coupling_scheme")

In [None]:
t_pbl_diff_l2.coupling_scheme.data

In [None]:
argmin_t_pbl_diff.groupby(argmin_t_pbl_diff).count().load()

In [None]:
fig, ax = pplt.subplots(width="50em", height="30em")

ims = []
im = ax.scatter(
    t_pbl_diff_l2.sel(coupling_scheme="atm-first", initial_condition="atm"),
    marker=".",
    label="atm-first",
    color="c",
)
ims.append(im)
ax.scatter(
    t_pbl_diff_l2.sel(coupling_scheme="atm-first", initial_condition="oce"),
    marker=".",
    color="c",
)
ax.scatter(
    t_pbl_diff_l2.sel(coupling_scheme="atm-first", initial_condition="par"),
    marker=".",
    color="c",
)
im = ax.scatter(
    t_pbl_diff_l2.sel(coupling_scheme="oce-first", initial_condition="atm"),
    marker=".",
    label="oce-first",
    color="y",
)
ims.append(im)
ax.scatter(
    t_pbl_diff_l2.sel(coupling_scheme="oce-first", initial_condition="oce"),
    marker=".",
    color="y",
)
ax.scatter(
    t_pbl_diff_l2.sel(coupling_scheme="oce-first", initial_condition="par"),
    marker=".",
    color="y",
)
im = ax.scatter(
    t_pbl_diff_l2.sel(coupling_scheme="parallel", initial_condition="atm"),
    marker=".",
    label="parallel",
    color="m",
)
ims.append(im)
ax.scatter(
    t_pbl_diff_l2.sel(coupling_scheme="parallel", initial_condition="oce"),
    marker=".",
    label=None,
    color="m",
)
ax.scatter(
    t_pbl_diff_l2.sel(coupling_scheme="parallel", initial_condition="par"),
    marker=".",
    label=None,
    color="m",
)
ax.format(
    title=r"$||T-T_{{SWR}}||_2$ in Boundary Layer after 2 Days",
    ylabel="Temperature [°C]",
    xlabel="Time of Forecast",
    xrotation=30,
)
fig.legend(ims, frame=False, loc="b", ncols=3)
fig.savefig(plotting_dir / f"t_differences.pdf")

In [None]:
t_pbl_diff.max().load()

In [None]:
t_diff_bins = [0, 0.052, 0.52, 1.04, 2.6, 5.2]
grouped_t_diffs = t_pbl_diff_l2.groupby_bins(
    t_pbl_diff_l2, t_diff_bins, labels=["A", "B", "C", "D", "E"]
)
group_a = [
    grouped_t_diffs["A"].sel(coupling_scheme="atm-first").shape[0],
    grouped_t_diffs["A"].sel(coupling_scheme="oce-first").shape[0],
    grouped_t_diffs["A"].sel(coupling_scheme="parallel").shape[0],
]
group_b = [
    grouped_t_diffs["B"].sel(coupling_scheme="atm-first").shape[0],
    grouped_t_diffs["B"].sel(coupling_scheme="oce-first").shape[0],
    grouped_t_diffs["B"].sel(coupling_scheme="parallel").shape[0],
]
group_c = [
    grouped_t_diffs["C"].sel(coupling_scheme="atm-first").shape[0],
    grouped_t_diffs["C"].sel(coupling_scheme="oce-first").shape[0],
    grouped_t_diffs["C"].sel(coupling_scheme="parallel").shape[0],
]
group_d = [
    grouped_t_diffs["D"].sel(coupling_scheme="atm-first").shape[0],
    grouped_t_diffs["D"].sel(coupling_scheme="oce-first").shape[0],
    grouped_t_diffs["D"].sel(coupling_scheme="parallel").shape[0],
]
group_e = [
    grouped_t_diffs["E"].sel(coupling_scheme="atm-first").shape[0],
    grouped_t_diffs["E"].sel(coupling_scheme="oce-first").shape[0],
    grouped_t_diffs["E"].sel(coupling_scheme="parallel").shape[0],
]

array = np.array([group_a, group_b, group_c, group_d, group_e])

binned_t_diffs = xr.Dataset(
    {
        "t_error": (
            ("error_range", "coupling_scheme"),
            array,
        )
    },
    coords={
        "error_range": ["A", "B", "C", "D", "E"],
        "coupling_scheme": ["atmosphere-first", "ocean-first", "parallel"],
    },
)

In [None]:
fig, ax = pplt.subplots(width="40em", height="30em")

cycle = pplt.Cycle(["c", "y", "m"])
im = ax.bar(binned_t_diffs.t_error, cycle=cycle, width=0.5)
ax.format(
    xticklabels=[
        "$\leq 0.01$",
        "$[0.01, 0.1]$",
        "$[0.1, 0.2]$",
        "$[0.2, 0.5]$",
        "$[0.5, 1]$",
    ],
    ylabel="Number of Experiments",
    xlabel="Relative Error Range",
)
fig.legend(im, frame=False, ncols=3, loc="b", title="Coupling Scheme")
fig.savefig(plotting_dir / "t_bar.pdf")

In [None]:
max_weighted_t_diff = t_pbl_diff_l2 / t_pbl_diff_l2.max("coupling_scheme")
max_weighted_t_diff.mean(["end_date", "initial_condition"]).load()

In [None]:
max_weighted_t_diff.median(["end_date", "initial_condition"]).load()

In [None]:
max_weighted_t_diff.max(["end_date", "initial_condition"]).load()

In [None]:
max_weighted_t_diff.min(["end_date", "initial_condition"]).load()

## Humidity in Boundary Layer

In [None]:
q_pbl_diff = prog_forecast_diff.q.sel(nlev=np.arange(51, 61)) * 1e3
q_pbl_diff = q_pbl_diff.drop_sel(coupling_scheme="converged SWR")
q_pbl_diff_l2 = vector_norm(q_pbl_diff.load(), "nlev", 2)
argmin_q_pbl_diff = q_pbl_diff_l2.argmin("coupling_scheme")

In [None]:
q_pbl_diff_l2.coupling_scheme.data

In [None]:
argmin_q_pbl_diff.groupby(argmin_q_pbl_diff).count().load()

In [None]:
fig, ax = pplt.subplots(width="50em", height="30em")

ims = []
im = ax.scatter(
    q_pbl_diff_l2.sel(coupling_scheme="atm-first", initial_condition="atm"),
    marker=".",
    label="atm-first",
    color="c",
)
ims.append(im)
ax.scatter(
    q_pbl_diff_l2.sel(coupling_scheme="atm-first", initial_condition="oce"),
    marker=".",
    color="c",
)
ax.scatter(
    q_pbl_diff_l2.sel(coupling_scheme="atm-first", initial_condition="par"),
    marker=".",
    color="c",
)
im = ax.scatter(
    q_pbl_diff_l2.sel(coupling_scheme="oce-first", initial_condition="atm"),
    marker=".",
    label="oce-first",
    color="y",
)
ims.append(im)
ax.scatter(
    q_pbl_diff_l2.sel(coupling_scheme="oce-first", initial_condition="oce"),
    marker=".",
    color="y",
)
ax.scatter(
    q_pbl_diff_l2.sel(coupling_scheme="oce-first", initial_condition="par"),
    marker=".",
    color="y",
)
im = ax.scatter(
    q_pbl_diff_l2.sel(coupling_scheme="parallel", initial_condition="atm"),
    marker=".",
    label="parallel",
    color="m",
)
ims.append(im)
ax.scatter(
    q_pbl_diff_l2.sel(coupling_scheme="parallel", initial_condition="oce"),
    marker=".",
    label=None,
    color="m",
)
ax.scatter(
    q_pbl_diff_l2.sel(coupling_scheme="parallel", initial_condition="par"),
    marker=".",
    label=None,
    color="m",
)
ax.format(
    title=r"$||q-q_{{SWR}}||_2$ in Boundary Layer after 2 Days",
    ylabel=r"Humidity $[g \; kg^{{-1}}]$",
    xlabel="Time of Forecast",
    xrotation=30,
)
fig.legend(ims, frame=False, loc="b", ncols=3)
fig.savefig(plotting_dir / "q_differences.pdf")

In [None]:
q_pbl_diff_l2.max().load()

In [None]:
q_diff_bins = [0, 0.033, 0.33, 0.66, 1.63, 3.3]
grouped_q_diffs = q_pbl_diff_l2.groupby_bins(
    q_pbl_diff_l2, q_diff_bins, labels=["A", "B", "C", "D", "E"]
)
group_a = [
    grouped_q_diffs["A"].sel(coupling_scheme="atm-first").shape[0],
    grouped_q_diffs["A"].sel(coupling_scheme="oce-first").shape[0],
    grouped_q_diffs["A"].sel(coupling_scheme="parallel").shape[0],
]
group_b = [
    grouped_q_diffs["B"].sel(coupling_scheme="atm-first").shape[0],
    grouped_q_diffs["B"].sel(coupling_scheme="oce-first").shape[0],
    grouped_q_diffs["B"].sel(coupling_scheme="parallel").shape[0],
]
group_c = [
    grouped_q_diffs["C"].sel(coupling_scheme="atm-first").shape[0],
    grouped_q_diffs["C"].sel(coupling_scheme="oce-first").shape[0],
    grouped_q_diffs["C"].sel(coupling_scheme="parallel").shape[0],
]
group_d = [
    grouped_q_diffs["D"].sel(coupling_scheme="atm-first").shape[0],
    grouped_q_diffs["D"].sel(coupling_scheme="oce-first").shape[0],
    grouped_q_diffs["D"].sel(coupling_scheme="parallel").shape[0],
]
group_e = [
    grouped_q_diffs["E"].sel(coupling_scheme="atm-first").shape[0],
    grouped_q_diffs["E"].sel(coupling_scheme="oce-first").shape[0],
    grouped_q_diffs["E"].sel(coupling_scheme="parallel").shape[0],
]

array = np.array([group_a, group_b, group_c, group_d, group_e])

binned_q_diffs = xr.Dataset(
    {
        "q_error": (
            ("error_range", "coupling_scheme"),
            array,
        )
    },
    coords={
        "error_range": ["A", "B", "C", "D", "E"],
        "coupling_scheme": ["atmosphere-first", "ocean-first", "parallel"],
    },
)

In [None]:
fig, ax = pplt.subplots(width="40em", height="30em")

cycle = pplt.Cycle(["c", "y", "m"])
im = ax.bar(binned_q_diffs.q_error, cycle=cycle, width=0.5)
ax.format(
    xticklabels=[
        "$\leq 0.01$",
        "$[0.01, 0.1]$",
        "$[0.1, 0.2]$",
        "$[0.2, 0.5]$",
        "$[0.5, 1]$",
    ],
    ylabel="Number of Experiments",
    xlabel="Relative Error Range",
)
fig.legend(im, frame=False, ncols=3, loc="b", title="Coupling Scheme")
fig.savefig(plotting_dir / "q_bar.pdf")

In [None]:
max_weighted_q_diff = q_pbl_diff_l2 / q_pbl_diff_l2.max("coupling_scheme")
max_weighted_q_diff.mean(["end_date", "initial_condition"]).load()

In [None]:
max_weighted_q_diff.median(["end_date", "initial_condition"]).load()

In [None]:
max_weighted_q_diff.min(["end_date", "initial_condition"]).load()

In [None]:
max_weighted_q_diff.max(["end_date", "initial_condition"]).load()

# NEMO

In [None]:
nemo_ensemble = xr.open_mfdataset(
    nemo_t_ensemble, preprocess=nemo_preprocessor.preprocess_ensemble
)

In [None]:
nemo_forecast = nemo_ensemble.isel(time=-1)
nemo_forecast = nemo_forecast.assign_coords(
    start_date=nemo_forecast.start_date + nemo_forecast.time
)
nemo_forecast = nemo_forecast.rename(start_date="end_date")

In [None]:
sst_diff = np.abs(
    nemo_forecast - nemo_forecast.sel(coupling_scheme="converged SWR")
).sosstsst
sst_diff = sst_diff.drop_sel(coupling_scheme="converged SWR")
argmin_sst_diff = sst_diff.argmin("coupling_scheme")

In [None]:
sst_diff.coupling_scheme.data

In [None]:
argmin_sst_diff.groupby(argmin_sst_diff).count().load()

In [None]:
fig, ax = pplt.subplots(width="50em", height="30em")

sst_diff.load()
ims = []
im = ax.scatter(
    sst_diff.sel(coupling_scheme="atm-first", initial_condition="atm"),
    marker=".",
    label="atm-first",
    color="c",
)
ims.append(im)
ax.scatter(
    sst_diff.sel(coupling_scheme="atm-first", initial_condition="oce"),
    marker=".",
    color="c",
)
ax.scatter(
    sst_diff.sel(coupling_scheme="atm-first", initial_condition="par"),
    marker=".",
    color="c",
)
im = ax.scatter(
    sst_diff.sel(coupling_scheme="oce-first", initial_condition="atm"),
    marker=".",
    label="oce-first",
    color="y",
)
ims.append(im)
ax.scatter(
    sst_diff.sel(coupling_scheme="oce-first", initial_condition="oce"),
    marker=".",
    color="y",
)
ax.scatter(
    sst_diff.sel(coupling_scheme="oce-first", initial_condition="par"),
    marker=".",
    color="y",
)
im = ax.scatter(
    sst_diff.sel(coupling_scheme="parallel", initial_condition="atm"),
    marker=".",
    label="parallel",
    color="m",
)
ims.append(im)
ax.scatter(
    sst_diff.sel(coupling_scheme="parallel", initial_condition="oce"),
    marker=".",
    label=None,
    color="m",
)
ax.scatter(
    sst_diff.sel(coupling_scheme="parallel", initial_condition="par"),
    marker=".",
    label=None,
    color="m",
)
ax.format(
    title="SST Difference w.r.t. SWR after 2 Days",
    ylabel="Temperature [°C]",
    xlabel="Time of Forecast",
    xrotation=30,
)
fig.legend(ims, loc="b", frame=False, ncols=3)
fig.savefig(plotting_dir / "sst_differences.pdf")

In [None]:
sst_diff.max().load()

In [None]:
sst_diff_bins = [0, 0.0027, 0.027, 0.054, 0.136, 0.272]
grouped_sst_diffs = sst_diff.groupby_bins(
    sst_diff, sst_diff_bins, labels=["A", "B", "C", "D", "E"]
)
group_a = [
    grouped_sst_diffs["A"].sel(coupling_scheme="atm-first").shape[0],
    grouped_sst_diffs["A"].sel(coupling_scheme="oce-first").shape[0],
    grouped_sst_diffs["A"].sel(coupling_scheme="parallel").shape[0],
]
group_b = [
    grouped_sst_diffs["B"].sel(coupling_scheme="atm-first").shape[0],
    grouped_sst_diffs["B"].sel(coupling_scheme="oce-first").shape[0],
    grouped_sst_diffs["B"].sel(coupling_scheme="parallel").shape[0],
]
group_c = [
    grouped_sst_diffs["C"].sel(coupling_scheme="atm-first").shape[0],
    grouped_sst_diffs["C"].sel(coupling_scheme="oce-first").shape[0],
    grouped_sst_diffs["C"].sel(coupling_scheme="parallel").shape[0],
]
group_d = [
    grouped_sst_diffs["D"].sel(coupling_scheme="atm-first").shape[0],
    grouped_sst_diffs["D"].sel(coupling_scheme="oce-first").shape[0],
    grouped_sst_diffs["D"].sel(coupling_scheme="parallel").shape[0],
]
group_e = [
    grouped_sst_diffs["E"].sel(coupling_scheme="atm-first").shape[0],
    grouped_sst_diffs["E"].sel(coupling_scheme="oce-first").shape[0],
    grouped_sst_diffs["E"].sel(coupling_scheme="parallel").shape[0],
]

array = np.array([group_a, group_b, group_c, group_d, group_e])

binned_sst_diffs = xr.Dataset(
    {
        "sst_error": (
            ("error_range", "coupling_scheme"),
            array,
        )
    },
    coords={
        "error_range": ["A", "B", "C", "D", "E"],
        "coupling_scheme": ["atmosphere-first", "ocean-first", "parallel"],
    },
)

In [None]:
fig, ax = pplt.subplots(width="40em", height="30em")

cycle = pplt.Cycle(["c", "y", "m"])
im = ax.bar(binned_sst_diffs.sst_error, cycle=cycle, width=0.5)
ax.format(
    xticklabels=[
        "$\leq 0.01$",
        "$[0.01, 0.1]$",
        "$[0.1, 0.2]$",
        "$[0.2, 0.5]$",
        "$[0.5, 1]$",
    ],
    ylabel="Number of Experiments",
    xlabel="Relative Error Range",
)
fig.legend(im, frame=False, ncols=3, loc="b", title="Coupling Scheme")
fig.savefig(plotting_dir / "sst_bar.pdf")

In [None]:
max_weighted_sst_diff = sst_diff / sst_diff.max("coupling_scheme")
max_weighted_sst_diff.mean(["end_date", "initial_condition"]).load()

In [None]:
max_weighted_sst_diff.median(["end_date", "initial_condition"]).load()

In [None]:
max_weighted_sst_diff.min(["end_date", "initial_condition"]).load()

In [None]:
max_weighted_sst_diff.max(["end_date", "initial_condition"]).load()

# Bar Chart for Thesis

In [None]:
fig, axs = pplt.subplots(width="70em", height="25em", ncols=3)
cycle = pplt.Cycle(["c", "y", "m"])

ax = axs[0]
im = ax.bar(binned_sst_diffs.sst_error, cycle=cycle, width=0.5)
ax.format(
    xticklabels=[
        "$\leq 0.01$",
        "$[0.01, 0.1]$",
        "$[0.1, 0.2]$",
        "$[0.2, 0.5]$",
        "$[0.5, 1]$",
    ],
    ylabel="Number of Experiments",
    xlabel="Relative Error Range",
    title="Sea Surface Temperature",
)

ax = axs[1]
ax.bar(binned_t_diffs.t_error, cycle=cycle, width=0.5)
ax.format(
    xticklabels=[
        "$\leq 0.01$",
        "$[0.01, 0.1]$",
        "$[0.1, 0.2]$",
        "$[0.2, 0.5]$",
        "$[0.5, 1]$",
    ],
    ylabel="Number of Experiments",
    xlabel="Relative Error Range",
    title="Atmospheric Temperature",
)

ax = axs[2]
ax.bar(binned_q_diffs.q_error, cycle=cycle, width=0.5)
ax.format(
    xticklabels=[
        "$\leq 0.01$",
        "$[0.01, 0.1]$",
        "$[0.1, 0.2]$",
        "$[0.2, 0.5]$",
        "$[0.5, 1]$",
    ],
    ylabel="Number of Experiments",
    xlabel="Relative Error Range",
    title="Humidity",
)
axs.format(abc="a)")
fig.legend(im, frame=False, ncols=3, loc="b", title="")
fig.savefig(plotting_dir / "relative_error_bar.pdf")