In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from ab.agent import AgentRevision
# from ab.git import make_agent_revision_from_repo_path
from ab.run import run_ab
from plumbum import LocalPath

In [None]:
ab_data = LocalPath(".") / "ab_data"
workdir = ab_data / "workdir"

In [None]:
# rev_a = make_agent_revision_from_repo_path(workdir / "sub_a")
# rev_b = make_agent_revision_from_repo_path(workdir / "sub_b")

rev_a = AgentRevision(
    script_path=ab_data / "sub_a" / "main.py",
    revision="A"
)
rev_b = AgentRevision(
    script_path=ab_data / "sub_b" / "main.py",
    revision="B"
)

# Run AB

In [None]:
result = run_ab(rev_a, rev_b, range(10), workdir / "replays", n_jobs=-1)

In [None]:
df = result.get_result_df()

In [None]:
df

In [None]:
df.to_csv(str(workdir / "result.csv"), index=False)

# Compute Metrics

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.stats as sps
from tqdm.auto import tqdm

In [None]:
def bootstrap_mean_distribution(values: np.ndarray, n_iter = 10_000) -> np.ndarray:
    distr = []
    for _ in tqdm(range(n_iter)):
        sample = np.random.choice(values, size=len(values), replace=True)
        distr.append(sample.mean())
    return distr

In [None]:
result = pd.read_csv(str(workdir / "result.csv"))

## Win Rate

In [None]:
sample = result[~result.tie_flag].b_win_flag.values.astype(np.float64)

In [None]:
mean_distr = bootstrap_mean_distribution(sample)

In [None]:
print(sps.jarque_bera(mean_distr))
plt.title("B win rate distribution")
plt.hist(mean_distr, bins=100, density=True)
plt.show()

In [None]:
sps.ttest_1samp(sample, popmean=0.5)

## Mean Lichen Surplus

In [None]:
result["lichen_surplus"] = (result.b_final_n_lichen - result.a_final_n_lichen)
sample = result[result.end_by_running_out_of_turns_flag].lichen_surplus.values.astype(np.float64)

In [None]:
mean_distr = bootstrap_mean_distribution(sample)

In [None]:
print(sps.jarque_bera(mean_distr))
plt.title("B mean lichen surplus distirbution")
plt.hist(mean_distr, bins=100, density=True)
plt.show()

In [None]:
sps.ttest_1samp(sample, popmean=0.0)