In [None]:
import numpy as np
import pandas as pd
import proplot as pplt
import xarray as xr
import nwp_ensemble as ens

import user_context as context
from utils.files import OIFSPreprocessor, OIFSEnsemblePreprocessor

In [None]:
plotting_dir = context.plotting_dir / "ensemble" / "spread"
plotting_dir.mkdir(exist_ok=True)

In [None]:
oifs_preprocessor = OIFSPreprocessor(pd.Timestamp("2014-07-01"))

In [None]:
input_file_par = context.ifs_input_files_dir / "papa_2014-07_par.nc"
input_file_atm = context.ifs_input_files_dir / "papa_2014-07_atm.nc"
input_file_oce = context.ifs_input_files_dir / "papa_2014-07_oce.nc"

In [None]:
input_par = xr.open_mfdataset(input_file_par, preprocess=oifs_preprocessor.preprocess)
input_atm = xr.open_mfdataset(input_file_atm, preprocess=oifs_preprocessor.preprocess)
input_oce = xr.open_mfdataset(input_file_oce, preprocess=oifs_preprocessor.preprocess)
input_par = input_par.assign_coords(source="par")
input_atm = input_atm.assign_coords(source="atm")
input_oce = input_oce.assign_coords(source="oce")

In [None]:
input_atm.height_f.std("time").sel(nlev=51).load()

In [None]:
input = xr.concat([input_par, input_atm, input_oce], dim="source")

In [None]:
mean = np.abs(input.mean("source"))
std = input.std("source")

In [None]:
cov = std / mean
cov_vsum = cov.sum("nlev")
# initial_spread = cov_vsum.u + cov_vsum.t + cov_vsum.v + cov_vsum.q
# initial_spread = cov_vsum.t
initial_spread = std.sum("nlev").t

beginning = pd.date_range("2014-07-01 00:00", "2014-07-02 18:00", freq="6H")
end = pd.date_range("2014-07-29 00:00", "2014-07-30 18:00", freq="6H")
initial_spread = initial_spread.drop_sel(time=beginning)
initial_spread = initial_spread.drop_sel(time=end)

In [None]:
fig, ax = pplt.subplots(width="50em", height="30em")

initial_spread.load()
ax.scatter(initial_spread, marker=".", color="k")
ax.format(
    title="Initial Spread of T",
    xlabel="Time",
    ylabel="Initial Spread [-]",
    xrotation=30,
    yformatter="sci",
    yscale="log",
    ylim=[1e-3, 1e3],
)
fig.savefig(plotting_dir / "initial_spread.pdf")

In [None]:
print(f"Mean: {initial_spread.mean('time').data[()]}")
print(f"Median: {initial_spread.median('time').data[()]}")
print(f"Max: {initial_spread.max('time').data[()]}")
print(f"Min: {initial_spread.min('time').data[()]}")

In [None]:
initial_spread.groupby_bins(initial_spread, [0, 1, 20]).count()

## Final vs. Initial Spread

In [None]:
oifs_preprocessor = OIFSEnsemblePreprocessor(pd.Timedelta(-7, "h"))

In [None]:
max_iters = ens.max_iters
experiment_directories = []
for date_dir in ens.ensemble_directory.glob("*"):
    for init_dir in date_dir.glob("*"):
        for experiment_dir in init_dir.glob("*"):
            experiment_directories.append(experiment_dir)
progvars_ensemble = [
    experiment_dir / "progvar.nc" for experiment_dir in experiment_directories
]
prog_ensemble = xr.open_mfdataset(
    progvars_ensemble, preprocess=oifs_preprocessor.preprocess_ensemble
)

In [None]:
progvar_forecast_abs_mean = np.abs(prog_ensemble.sel(time=pd.Timedelta(2, "D"))).mean(
    "initial_condition"
)
progvar_forecast_std = prog_ensemble.sel(time=pd.Timedelta(2, "D")).std(
    "initial_condition"
)

In [None]:
progvar_forecast_cov = progvar_forecast_std / progvar_forecast_abs_mean
cov_vsum = progvar_forecast_cov.sum("nlev")
# final_spread = cov_vsum.u + cov_vsum.t + cov_vsum.v + cov_vsum.q
# final_spread = cov_vsum.t
final_spread = progvar_forecast_std.sum("nlev").t

In [None]:
initial_spread = initial_spread.assign_coords(time=initial_spread.time.data)
initial_spread = initial_spread.assign_coords(
    time=initial_spread.time.data + np.timedelta64(-7, "h")
)
initial_spread = initial_spread.rename(time="start_date")

spread_ratio = final_spread / initial_spread

In [None]:
fig, ax = pplt.subplots(width="50em", height="30em")

spread_ratio.load()
ax.hlines(
    y=1,
    x1=np.datetime64("2014-07-02 00:00"),
    x2=np.datetime64("2014-07-29 04:00"),
    color="gray",
    ls="--",
)
ax.scatter(spread_ratio.sel(coupling_scheme="parallel"), color="m", marker=".")
ax.scatter(spread_ratio.sel(coupling_scheme="atm-first"), color="c", marker="x")
ax.scatter(spread_ratio.sel(coupling_scheme="oce-first"), color="y", marker="1")
ax.scatter(spread_ratio.sel(coupling_scheme="converged SWR"), color="k", marker="+")
ax.format(
    title="Ratio of Final and Initial Ensemble Spread",
    xlabel="Time",
    ylabel="Spread Ratio [-]",
    xrotation=30,
    yscale="log",
    yformatter="sci",
    ylim=[1e-3, 1e3],
)
fig.savefig(plotting_dir / "spread_ratio.pdf")

In [None]:
print(f"Coupling schemes: {spread_ratio.coupling_scheme.data}")
print(f"Mean: {spread_ratio.mean('start_date').data}")
print(f"Median: {spread_ratio.median('start_date').data}")
print(f"Max: {spread_ratio.max('start_date').data}")
print(f"Min: {spread_ratio.min('start_date').data}")

In [None]:
spread_ratio.groupby_bins(spread_ratio, [0, 1, 45]).count()

## Both Plots in One

In [None]:
fig, axs = pplt.subplots(width="70em", height="40em", nrows=2, spany=False)

ax = axs[0]
ax.scatter(initial_spread, marker=".", color="k")
ax.format(
    title="Initial Spread of T",
    ylabel="Initial Spread",
)

ax = axs[1]
ax.hlines(
    y=1,
    x1=np.datetime64("2014-07-02 00:00"),
    x2=np.datetime64("2014-07-29 04:00"),
    color="gray",
    ls="--",
)
ax.scatter(
    spread_ratio.sel(coupling_scheme="parallel"),
    color="m",
    marker=".",
    label="parallel",
)
ax.scatter(
    spread_ratio.sel(coupling_scheme="atm-first"),
    color="c",
    marker="x",
    label="atm-first",
)
ax.scatter(
    spread_ratio.sel(coupling_scheme="oce-first"),
    color="y",
    marker="1",
    label="oce-first",
)
ax.scatter(
    spread_ratio.sel(coupling_scheme="converged SWR"),
    color="k",
    marker="+",
    label="converged SWR",
)
ax.format(
    title="Ratio of Final and Initial Ensemble Spread",
    ylabel="Spread Ratio",
    ylim=[1e-1, 1e2],
)
ax.legend(alpha=1, ncols=4, loc="lower right")

axs.format(
    xrotation=30,
    yscale="log",
    yformatter="sci",
    abc="a)",
    xlabel="Time",
)
fig.savefig(plotting_dir / "spread_plots.pdf")

# Which Method Leads to the Smallest Final Spread?

In [None]:
final_spread.coupling_scheme.data

In [None]:
argmin_final_spread = final_spread.argmin("coupling_scheme")
argmin_final_spread.groupby(argmin_final_spread).count().load()

# Which Method Leads to the Smallest Spread Ratio?

In [None]:
spread_ratio.coupling_scheme.data

In [None]:
argmin_spread_ratio = spread_ratio.argmin("coupling_scheme")
argmin_spread_ratio.groupby(argmin_spread_ratio).count().load()

In [None]:
relative_spread_ratio = spread_ratio / spread_ratio.max("coupling_scheme")

In [None]:
relative_spread_ratio.mean("start_date").load()

In [None]:
relative_spread_ratio.std("start_date").load()

In [None]:
relative_spread_ratio.median("start_date").load()

In [None]:
relative_spread_ratio.min("start_date").load()

In [None]:
relative_spread_ratio.max("start_date").load()