## Validating the Model

How well do the estimates of CI size that we obtain from the copy model match the CI sizes that we get from bootstrapping?

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append(os.path.abspath(".."))
from utils import simulate_trials_from_copy_model, simulate_trials_exact, fast_cohen

In [None]:
rng = np.random.default_rng()

copy_ecs = []
hard_ecs = []
gt_ecs = []
trial_numbers = []
accs = []

for n_trials in [50, 100, 500, 1_000]:
    for gt_ec in [0, 0.25, 0.5, 0.75, 0.95]:
        for acc in [0.25, 0.5, 0.75, 0.95]:

            # first, generate some real trials
            _, real_trials1, real_trials2 = simulate_trials_exact(
                kappa=gt_ec, acc1=acc, acc2=acc, n_trials=n_trials
            )

            for i in range(1000):

                # sample from real trials
                new_idx = rng.choice(
                    np.arange(len(real_trials1)), size=len(real_trials1), replace=True
                )
                hard_ec = fast_cohen(real_trials1[new_idx], real_trials2[new_idx])

                # sample from copy model
                trials1, trials2 = simulate_trials_from_copy_model(
                    kappa=gt_ec, acc1=acc, acc2=acc, n_trials=n_trials
                )
                copy_ec = fast_cohen(trials1, trials2)

                copy_ecs.append(copy_ec)
                hard_ecs.append(hard_ec)
                gt_ecs.append(gt_ec)
                trial_numbers.append(n_trials)
                accs.append(acc)

df = pd.DataFrame(
    {
        "EC from copy model": copy_ecs,
        "EC from bootstrap": hard_ecs,
        "GT": gt_ecs,
        "Trials": trial_numbers,
        "Accuracy": accs,
    }
)

mdf = df.melt(id_vars=["GT", "Trials", "Accuracy"], var_name="EC type")
display(mdf)

# pdf = df.pivot(
#     columns=['GT', 'Trials', 'Accuracy'],
#     values=['copy EC', 'hard EC']
# )
# display(pdf)

In [None]:
# just getting an overview by plotting everything once
for trials, trial_df in mdf.groupby("Trials"):
    for accuracy, acc_df in trial_df.groupby("Accuracy"):
        fig, ax = plt.subplots(1, 1, figsize=(12, 5))
        ax.grid(axis="y")
        sns.boxplot(data=acc_df, x="GT", y="value", hue="EC type", dodge=True)
        ax.set_title(f"{trials} trials at accuracy {accuracy}")
        ax.set_xlabel("Tround Truth EC")
        ax.set_ylabel("Error Consistency [Kappa]")
        sns.despine()

In [None]:
# now we have established that it makes sense to show the different conditions in which this fails,
# we plot an overview of 4 plots, to show the convergence and issues
#   50 trials at accuracy 0.5   |    50 trials at accuracy 0.95
# 1000 trials at accuracy 0.5   |  1000 trials at accuracy 0.95

font = {"size": 16}
import matplotlib

matplotlib.rc("font", **font)


def plot_sim(ax, df, trials, acc, position, legend=False):
    ax.grid(axis="y")
    sns.boxplot(
        data=df, x="GT", y="value", hue="EC type", dodge=True, legend=legend, ax=ax
    )
    if legend:
        ax.legend(loc="lower right")
    #    ax.set_title(f"{trials} trials at accuracy {accuracy}")
    ax.set_xlabel(f"Ground Truth EC (Accuracy: {acc})")
    ax.set_ylabel(f"EC [Kappa] (Trials: {trials})")

    ax.annotate(
        position,
        # f"{trials} trials at accuracy {accuracy}",
        (-0.4, 0.85),
        fontsize=20,
        color="black",
    )


scale = 1.3
fig, axes = plt.subplots(
    2, 2, figsize=(scale * 12, scale * 7), sharey=True, sharex=True
)
trials_50_df = mdf[mdf["Trials"] == 50]
trials_50_acc_05 = trials_50_df[trials_50_df["Accuracy"] == 0.5]
trials_50_acc_95 = trials_50_df[trials_50_df["Accuracy"] == 0.95]

trials_1k = mdf[mdf["Trials"] == 1000]
trials_1k_acc_05 = trials_1k[trials_1k["Accuracy"] == 0.5]
trials_1k_acc_95 = trials_1k[trials_1k["Accuracy"] == 0.95]

plot_sim(axes[0, 0], trials_50_acc_05, 50, 0.5, "A", True)
plot_sim(axes[0, 1], trials_50_acc_95, 50, 0.95, "B")
plot_sim(axes[1, 0], trials_1k_acc_05, 1000, 0.5, "C")
plot_sim(axes[1, 1], trials_1k_acc_95, 1000, 0.95, "D")
sns.despine()
plt.tight_layout()
plt.savefig("figures/model_validation.pdf")