## Optimal CI size

Figuring out at which accuracy2 one obtains the smallest CI size, given accuracy1 and EC.

In [None]:
# making sure that updates to imported files are immediately available without restarting the kernel
%reload_ext autoreload
%autoreload 2

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sys.path.append(os.path.abspath(".."))
from utils import (
    fast_cohen,
    simulate_trials_from_copy_model,
    calc_accuracy_bounds_from_kappa,
)

In [None]:
def generate_more_data():
    gt_ecs = []
    std_ecs = []
    acc1s = []
    acc2s = []
    its = []
    for acc1 in [0.25, 0.5, 0.75, 0.95]:
        for kappa in [0, 0.25, 0.5, 0.75, 0.95]:
            lower, upper = calc_accuracy_bounds_from_kappa(acc1, kappa)
            for acc2 in np.linspace(lower + 0.001, upper, 10, endpoint=False):

                # bootstrap 1k times
                for i in range(1000):
                    trials1, trials2 = simulate_trials_from_copy_model(
                        kappa, acc1, acc2, 1000
                    )

                    std_ecs.append(fast_cohen(trials1, trials2))

                    gt_ecs.append(kappa)
                    acc1s.append(acc1)
                    acc2s.append(acc2)
                    its.append(i)

    df = pd.DataFrame(
        {
            "True EC": gt_ecs,
            "Accuracy 1": acc1s,
            "Accuracy 2": acc2s,
            "Iteration": its,
            "Empirical EC": std_ecs,
        }
    )

    return df

In [None]:
more_df = generate_more_data()
display(more_df)

In [None]:
def plot_more_cis(df, save=False):

    fig, ax = plt.subplots(1, 1, figsize=(12, 5))
    ax.grid(axis="y")

    sns.pointplot(
        data=df,
        errorbar=("pi", 95),
        capsize=0.1,
        x="Accuracy 1",
        y="Empirical EC",
        hue="Accuracy 2",
        dodge=0.5,
        linestyle="none",
        legend=True,
        log_scale=(False, False),
        native_scale=True,
        ax=ax,
    )
    ax.set_ylim(-0.4, 1.05)
    sns.despine()

    plt.tight_layout()
    fig.subplots_adjust(bottom=0.25)
    # if save:
    #     if not os.path.exists("figures"):
    #         os.makedirs("figures")

    #     plt.savefig(
    #         f"figures/pointplot_{gt_ec}_{'normalized' if normalized else 'standard'}.pdf"
    #     )
    plt.show()
    plt.close()

In [None]:
plot_more_cis(more_df)
# todo fix this plotting
# I interpret this to be saying that the CI grows smallest when the delta between accuracies becomes maximum