In [1]:
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from PIL import Image
import plotly.io as pio

pio.kaleido.scope.mathjax = None

In [2]:
with open("scores/atari-dreamerv2.json", "r") as f:
    data = json.load(f)

records = []
for run in data:
    for time, score in zip(run["xs"], run["ys"]):
        record = {
            "task": run["task"],
            "seed": int(run["seed"]),
            "time": time,
            "score": score,
        }
        records.append(record)

raw = pd.DataFrame.from_records(records)

with open("scores/baselines.json", "r") as f:
    data = json.load(f)

records = []
for name, values in data.items():
    records.append({"game": name, **values})

baselines = pd.DataFrame.from_records(records)

raw = pd.merge(
    raw,
    baselines[["game", "random", "human_gamer", "human_record"]],
    left_on="task",
    right_on="game",
)
raw = raw.drop(columns=["game"])

z = raw[f"score"] - raw["random"]
raw[f"human"] = z / (raw["human_gamer"] - raw["random"])
raw[f"wr"] = z / (raw["human_record"] - raw["random"])
raw[f"clip_wr"] = raw[f"wr"].clip(upper=1.0)

g = raw.groupby(["task", "time"])

columns = {}
columns["random"] = g["random"].mean()
for col in ("score", "human", "wr"):
    for func in ("mean", "min", "max", "std"):
        columns[f"{col}_{func}"] = getattr(g[col], func)()

scores = pd.concat(columns, axis=1).reset_index()
scores

Unnamed: 0,task,time,random,score_mean,score_min,score_max,score_std,human_mean,human_min,human_max,human_std,wr_mean,wr_min,wr_max,wr_std
0,atari_alien,1000000.0,228.8,365.181818,268.000000,431.0,52.001573,0.019769,0.005682,0.029309,0.007538,0.000542,0.000156,0.000803,0.000207
1,atari_alien,2000000.0,228.8,894.090909,691.000000,1082.0,132.407292,0.096434,0.066996,0.123672,0.019193,0.002643,0.001836,0.003390,0.000526
2,atari_alien,3000000.0,228.8,1119.148760,823.636364,1575.0,241.020006,0.129057,0.086222,0.195133,0.034936,0.003538,0.002363,0.005349,0.000958
3,atari_alien,4000000.0,228.8,1349.272727,988.000000,2011.0,356.397556,0.162413,0.110047,0.258331,0.051660,0.004452,0.003016,0.007081,0.001416
4,atari_alien,5000000.0,228.8,1284.363636,965.000000,2028.0,305.911514,0.153005,0.106713,0.260795,0.044342,0.004194,0.002925,0.007149,0.001215
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10995,atari_zaxxon,196000000.0,32.5,48962.727273,28580.000000,65840.0,10514.139138,5.352948,3.123086,7.199315,1.150243,0.584818,0.341202,0.786536,0.125666
10996,atari_zaxxon,197000000.0,32.5,44340.000000,38160.000000,57460.0,6915.578067,4.847223,4.171134,6.282546,0.756562,0.529566,0.455703,0.686378,0.082655
10997,atari_zaxxon,198000000.0,32.5,45583.636364,30210.000000,59590.0,8938.096299,4.983277,3.301407,6.515568,0.977824,0.544430,0.360684,0.711836,0.106829
10998,atari_zaxxon,199000000.0,32.5,47744.545455,33590.000000,57100.0,7550.926253,5.219679,3.671178,6.243163,0.826068,0.570258,0.401082,0.682075,0.090249


In [3]:
ATARI_100k = [
    "Alien",
    "Amidar",
    "Assault",
    "Asterix",
    "Bank Heist",
    "Battle Zone",
    "Boxing",
    "Breakout",
    "Chopper Command",
    "Crazy Climber",
    "Demon Attack",
    "Freeway",
    "Frostbite",
    "Gopher",
    "Hero",
    "James bond",
    "Kangaroo",
    "Krull",
    "Kung Fu Master",
    "Ms Pacman",
    "Pong",
    "Private Eye",
    "Qbert",
    "Road Runner",
    "Seaquest",
    "Up N Down",
]

ATARI_100k = [
    "atari_" + "_".join(word.lower() for word in task.split()) for task in ATARI_100k
]

In [4]:
df = raw[(raw["time"] <= 10e6) & (raw["task"] == "atari_assault")]

fig = px.line(df, x="time", y="score", color="seed")
fig.show()

In [5]:
envs = ["pong", "crazy_climber", "assault"]

fig = make_subplots(
    rows=2,
    cols=4,
    specs=[
        [dict(colspan=2), None, dict(colspan=2), None],
        [None, dict(colspan=2), None, None],
    ],
    subplot_titles=["Pong", "Crazy Climber", "Assault"],
    horizontal_spacing=0.1,
    vertical_spacing=0.2,
)

subplots = [(1, 1), (1, 3), (2, 2)]

for (iy, ix), env in zip(subplots, envs):
    df = raw[(raw["time"] <= 6e6) & (raw["task"] == f"atari_{env}")]
    xpr = px.line(df, x="time", y="score", color="seed")
    for trace in xpr.data:
        fig.add_trace(trace, iy, ix)
    rand = df["random"].iloc[0]
    fig.add_shape(
        type="line",
        x0=min(df["time"]),
        y0=rand,
        x1=max(df["time"]),
        y1=rand,
        line=dict(color="black", dash="dot"),
        row=iy,
        col=ix,
    )
    fig.add_trace(trace, iy, ix)

fig.update_layout(
    showlegend=False,
    autosize=False,
    width=1024,
    height=768,
    **{f"xaxis{i+1}": {"title": "Env step"} for i in range(3)},
    **{f"yaxis{i+1}": {"title": "Score"} for i in range(3)},
)

# fig.write_image("perf_curves.svg")
# with open("perf_curves.svg", "rb") as f:
#     svg2png(f.read(), write_to="perf_curves.png")

fig.write_image("../tex/assets/perf_curves.pdf")

fig