# Train Summary Plots/Anaylisis

In [None]:
# Common imports.
from pathlib import Path

%matplotlib ipympl
import base

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import dfaas_env
import dfaas_utils

Global options for the notebook.

* `exp_dir`: the full path for the experiment directory.

In [None]:
# TODO: Make this configurable.
exp_dir = Path("/home/emanuele/marl-dfaas/results/DFAAS-MA_2024-12-20_11-49-29_500_SYN")

## Reward

In [None]:
# Common functions for average reward data.


def _average_reward_step(iter, agent):
    """Returns the average reward per step for the given iteration and agent."""
    episodes = iter["env_runners"]["episodes_this_iter"]

    tmp = np.empty(episodes, dtype=np.float32)
    for epi_idx in range(episodes):
        tmp[epi_idx] = np.average(
            iter["env_runners"]["hist_stats"]["reward"][epi_idx][agent]
        )

    return np.average(tmp)


def _get_data(exp_dir):
    data = {}

    # Read data from experiment directory.
    iters = dfaas_utils.parse_result_file(exp_dir / "result.json")
    agents = base.get_env(exp_dir).agents

    data["agents"] = agents
    data["iterations"] = len(iters)
    data["episodes"] = iters[0]["env_runners"]["episodes_this_iter"]

    reward_total_avg = {}  # Average total reward per episode.
    reward_step_avg = {}  # Average reward per step.

    reward_total_avg["all"] = np.empty(data["iterations"], dtype=np.float32)
    for agent in data["agents"]:
        reward_total_avg[agent] = np.empty(data["iterations"], dtype=np.float32)
        reward_step_avg[agent] = np.empty(data["iterations"], dtype=np.float32)

    # For each iteration, get the average reward, since there are multiple
    # episodes played in each iteration.
    for iter in iters:
        # Index starts from one in log files, but Python list from zero.
        iter_idx = iter["training_iteration"] - 1

        reward_total_avg["all"][iter_idx] = np.average(
            iter["env_runners"]["hist_stats"]["episode_reward"]
        )

        for agent in data["agents"]:
            reward_total_avg[agent][iter_idx] = np.average(
                iter["env_runners"]["hist_stats"][f"policy_policy_{agent}_reward"]
            )
            reward_step_avg[agent][iter_idx] = _average_reward_step(iter, agent)

    data["reward_total_avg"] = reward_total_avg
    data["reward_step_avg"] = reward_step_avg

    return data

Get `data` and `env` variables, used by subsequents plots.

In [None]:
data = _get_data(exp_dir)
env = base.get_env(exp_dir)

### Average reward per episode (all agents)

In [None]:
fig = plt.figure()
fig.canvas.header_visible = False
ax = fig.subplots()

# Limits for the y axis, both for total and single step.
bottom, top = env.reward_range
bottom_total = bottom * env.max_steps
top_total = top * env.max_steps

ax.plot(data["reward_total_avg"]["all"])
ax.set_ylim(bottom=bottom_total, top=top_total * len(data["agents"]))
ax.set_title("Average reward per episode (all agents)")
ax.yaxis.set_major_locator(ticker.MultipleLocator(50))

ax.set_ylabel("Reward per episode")

ax.set_xlabel("Iteration")
ax.xaxis.set_major_locator(
    ticker.MultipleLocator(50)
)  # Show x-axis ticks every 50 iterations.

ax.grid(axis="both")
ax.set_axisbelow(True)  # By default the axis is over the content.

### Average reward per episode (single agents)

In [None]:
for agent in data["agents"]:
    fig = plt.figure()
    fig.canvas.header_visible = False
    ax = fig.subplots()
    
    # Limits for the y axis, both for total and single step.
    bottom, top = env.reward_range
    bottom_total = bottom * env.max_steps
    top_total = top * env.max_steps
    
    ax.plot(data["reward_total_avg"][agent])
    ax.set_ylim(bottom=bottom_total, top=top_total)
    ax.set_title(f"Average reward per episode ({agent = })")
    ax.yaxis.set_major_locator(ticker.MultipleLocator(50))
    
    ax.set_ylabel("Reward per episode")
    
    ax.set_xlabel("Iteration")
    ax.xaxis.set_major_locator(
        ticker.MultipleLocator(50)
    )  # Show x-axis ticks every 50 iterations.
    
    ax.grid(axis="both")
    ax.set_axisbelow(True)  # By default the axis is over the content.