In [None]:
from utils import *
from plotly.subplots import make_subplots
from tqdm.auto import tqdm
import json
from loaders import *

In [None]:
res_df, scalars = final_benchmark_loader()
ref_df = reference_loader()
paper_scores, paper_stats = papers_loader()

In [None]:
paper_scores

In [None]:
g = res_df.groupby("env")
scores = g["score"].mean().reset_index()
scores = scores.rename(columns={"env": "Environment", "score": "Ours"})
scores = paper_scores.merge(scores, on="Environment")
scores

In [None]:
df = res_df.merge(ref_df, left_on="env", right_on="task")
df["norm"] = (df["score"] - df["random"]) / (df["human_gamer"] - df["random"])
ours_df = pd.DataFrame.from_records(
    [
        {"Statistic": "Median", "Ours": df["norm"].median()},
        {"Statistic": "Mean", "Ours": df["norm"].mean()},
    ]
)
stats = paper_stats.merge(ours_df, on="Statistic")
stats

In [None]:
selection = [
    "Random",
    "Human",
    "SimPLe",
    "TWM",
    "IRIS",
    "DreamerV3",
    "SR-SPR",
    "EfficientZero",
    "BBF",
    "DreamerV2",
    "Ours",
]

sel_scores = scores[["Environment", *selection]]
sel_scores = sel_scores.rename(columns={"EfficientZero": "EffZero"})
print(sel_scores.to_latex(index=False, float_format="%.0f"))

sel_stats = stats[["Statistic", *selection]]
sel_stats = sel_stats.rename(columns={"EfficientZero": "EffZero"})
print(sel_stats.to_latex(index=False, float_format="%.3f"))

In [None]:
records = []
for _, row in res_df.iterrows():
    task_df = scalars.read(row["path"])
    val_scores = task_df[task_df["tag"] == "val/mean_ep_ret"]
    for _, row2 in val_scores.iterrows():
        records.append(
            {
                "task": row["env"],
                "seed": row["seed"],
                "time": row2["step"],
                "score": row2["value"],
            }
        )

val_df = pd.DataFrame.from_records(records)

In [None]:
val_df["time2"] = int(20e3) * (val_df["time"] // int(20e3))
g = val_df.groupby(["task", "time2"])
avg_df = pd.DataFrame.from_dict(
    {
        "score_mean": g["score"].mean(),
        "score_std": g["score"].std(),
    }
).reset_index()
avg_df = avg_df.rename(columns={"time2": "time"})
avg_df

In [None]:
cols = 4
tasks = sorted(res_df["env"].unique())
rows = (len(tasks) + cols - 1) // cols

fig = make_subplots(
    rows=rows,
    cols=cols,
    subplot_titles=[*tasks],
    vertical_spacing=0.05,
)
pos = np.stack(np.mgrid[:rows, :cols], -1).reshape(-1, 2) + 1

color = next(make_color_iter())
axis = 1
for (row, col), task in zip(pos, tasks):
    task_df = avg_df[avg_df["task"] == task]

    x, y = task_df["time"], task_df["score_mean"]
    y_lower = task_df["score_mean"] - task_df["score_std"]
    y_upper = task_df["score_mean"] + task_df["score_std"]

    traces = [
        go.Scatter(
            x=x,
            y=y,
            mode="lines",
            line=dict(color=color),
            showlegend=False,
        ),
        go.Scatter(
            x=[*x, *x[::-1]],
            y=[*y_upper, *y_lower[::-1]],
            fill="tozerox",
            fillcolor=to_rgba(color),
            line=dict(color="rgba(255, 255, 255, 0)"),
            showlegend=False,
        ),
    ]
    for trace in traces:
        fig.add_trace(trace, row=row, col=col)

    if col == 1:
        fig.update_layout(**{f"yaxis{axis}": dict(title="Score")})
    axis += 1


fig.update_layout(width=800, height=1100)

fig.write_image("../tex/assets/atari_100k.curves.pdf")
fig

In [None]:
res_df2 = res_df.copy()
score2 = []
for idx, row in res_df2.iterrows():
    df = scalars.read(row["path"])
    train_ep_ret = df[df["tag"] == "train/ep_ret"]
    best_step = train_ep_ret["step"].iloc[train_ep_ret["value"].argmax()]
    val_scores = df[df["tag"] == "val/mean_ep_ret"]
    score2.append(
        np.interp(
            best_step, val_scores["step"].to_numpy(), val_scores["value"].to_numpy()
        )
    )
res_df2["score2"] = score2

In [None]:
df = res_df2.merge(ref_df, left_on="env", right_on="task")
df["norm"] = (df["score2"] - df["random"]) / (df["human_gamer"] - df["random"])
ours_df = pd.DataFrame.from_records(
    [
        {"Statistic": "Median", "Ours": df["norm"].median()},
        {"Statistic": "Mean", "Ours": df["norm"].mean()},
    ]
)
ours_df