# Train summary for a single episode

This notebook plots data of a single episode of an experiment using PPO and one episode played for each iteration.

## Experiment loading

In [None]:
# Common imports.
from pathlib import Path

%matplotlib widget
import base

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import ipywidgets

import dfaas_env
import dfaas_utils

In [None]:
prefix_dir = Path("/home/emanuele/marl-dfaas/results/")

exp_dir = prefix_dir / "DFAAS-MA_2025-05-05_12-09-37_PPO_2_base"

# Raw data dictionary "result.json".
raw_exp_data = dfaas_utils.parse_result_file(exp_dir / "result.json")

# Reference environment.
env = base.get_env(exp_dir)

agents = ["all"] + env.agents

print(f"Loaded experiment: {exp_dir.as_posix()!r}")
print(f"Agents: {agents}")

In [None]:
iter_idx = 0

## Reward

In [None]:
def make_reward_plot(raw_exp_data, env, iter_idx, agents):
    for agent in agents:
        plt.close(fig=f"reward_cum_{agent}")
        fig = plt.figure(num=f"reward_cum_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        if agent == "all":
            reward = np.add(
                *[raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["reward"][0][agent] for agent in env.agents]
            )
        else:
            reward = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["reward"][0][agent]

        ax.plot(reward)
        ax.set_title(f"Reward per step ({agent = })")
        ax.set_ylabel("Reward")
        ax.set_xlabel("Step")
        ax.yaxis.set_major_locator(ticker.MultipleLocator(0.2))
        ax.grid(axis="both")
        ax.set_axisbelow(True)


make_reward_plot(raw_exp_data, env, iter_idx, agents)