# Train Summary Plots/Anaylisis

In [None]:
# Common imports.
from pathlib import Path

%matplotlib widget
import base

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import ipywidgets

import dfaas_env
import dfaas_utils

Select one or more experiments to view.

**WARNING**: If multiple experiments are selected, they must share the same number of training iterations!

In [None]:
# Get the list of current experiments to be selected by the user.
exp_prefix = Path("/home/emanuele/marl-dfaas/results")
exps = []
for exp in exp_prefix.iterdir():
    if exp.name.startswith("DFAAS"):  # A single experiment.
        exps.append((exp.name, exp))
        continue

    # The experiment is a directory with sub-experiments, add
    # each experiment individually.
    for sub_exp in exp.iterdir():
        exps.append((f"{exp.name}/{sub_exp.name}", sub_exp))
exps = sorted(exps, reverse=True)

# Show the name as the portion of the path after "results",
# but anyway the values are full Path objects.
exp_select = ipywidgets.SelectMultiple(
    options=exps,
    index=[0],
    description="Experiment(s):",
    style={"description_width": "initial"},
    layout=ipywidgets.Layout(width="70%"),
)

ipywidgets.AppLayout(center=exp_select)

## Experiment selection

This section must be run before any of the following sections to load the selected experiments.

In [None]:
exps_dir = exp_select.value
assert len(exps_dir) > 0, "must select at least one experiment"

print("Selected experiments:")
for exp_dir in exps_dir:
    print(f"  - {exp_dir.name}")

## Reward

In [None]:
# Common functions for average reward data.


def _average_reward_step(iter, agent):
    """Returns the average reward per step for the given iteration and agent."""
    episodes = iter["env_runners"]["episodes_this_iter"]

    tmp = np.empty(episodes, dtype=np.float32)
    for epi_idx in range(episodes):
        tmp[epi_idx] = np.average(
            iter["env_runners"]["hist_stats"]["reward"][epi_idx][agent]
        )

    return np.average(tmp)


def _get_data(exps_dir):
    final_data = {}

    for exp_dir in exps_dir:
        data = {}
        # Read data from experiment directory.
        iters = dfaas_utils.parse_result_file(exp_dir / "result.json")
        agents = base.get_env(exp_dir).agents

        data["agents"] = agents
        data["iterations"] = len(iters)
        data["episodes"] = iters[0]["env_runners"]["episodes_this_iter"]

        reward_total_avg = {}  # Average total reward per episode.
        reward_step_avg = {}  # Average reward per step.

        reward_total_avg["all"] = np.empty(data["iterations"], dtype=np.float32)
        for agent in data["agents"]:
            reward_total_avg[agent] = np.empty(data["iterations"], dtype=np.float32)
            reward_step_avg[agent] = np.empty(data["iterations"], dtype=np.float32)

        # For each iteration, get the average reward, since there are multiple
        # episodes played in each iteration.
        for iter in iters:
            # Index starts from one in log files, but Python list from zero.
            iter_idx = iter["training_iteration"] - 1

            reward_total_avg["all"][iter_idx] = np.average(
                iter["env_runners"]["hist_stats"]["episode_reward"]
            )

            for agent in data["agents"]:
                reward_total_avg[agent][iter_idx] = np.average(
                    iter["env_runners"]["hist_stats"][f"policy_policy_{agent}_reward"]
                )
                reward_step_avg[agent][iter_idx] = _average_reward_step(iter, agent)

        data["reward_total_avg"] = reward_total_avg
        data["reward_step_avg"] = reward_step_avg

        final_data[exp_dir] = data

    return final_data

Get `data` and `env` variables, used by subsequents plots.

In [None]:
data = _get_data(exps_dir)
env = base.get_env(exps_dir[0])

### Average reward per episode (all agents)

In [None]:
fig = plt.figure(layout="constrained")
fig.canvas.header_visible = False
ax = fig.subplots()

# Limits for the y axis, both for total and single step.
bottom, top = env.reward_range
bottom_total = bottom * env.max_steps
top_total = top * env.max_steps

for exp_dir in exps_dir:
    ax.plot(data[exp_dir]["reward_total_avg"]["all"], label=exp_dir.name)

ax.set_ylim(bottom=bottom_total, top=top_total * len(env.agents))
ax.set_title("Average reward per episode (all agents)")
ax.yaxis.set_major_locator(ticker.MultipleLocator(50))

ax.set_ylabel("Reward per episode")

ax.set_xlabel("Iteration")
ax.xaxis.set_major_locator(
    ticker.MultipleLocator(50)
)  # Show x-axis ticks every 50 iterations.

ax.legend(loc="lower center")
ax.grid(axis="both")
ax.set_axisbelow(True)  # By default the axis is over the content.

### Average reward per episode (single agents)

In [None]:
# Limits for the y axis, both for total and single step.
bottom, top = env.reward_range
bottom_total = bottom * env.max_steps
top_total = top * env.max_steps

for agent in env.agents:
    fig = plt.figure(layout="constrained")
    fig.canvas.header_visible = False
    ax = fig.subplots()

    for exp_dir in exps_dir:
        ax.plot(data[exp_dir]["reward_total_avg"][agent], label=exp_dir.name)

    ax.set_ylim(bottom=bottom_total, top=top_total)
    ax.set_title(f"Average reward per episode ({agent = })")
    ax.yaxis.set_major_locator(ticker.MultipleLocator(50))

    ax.set_ylabel("Reward per episode")

    ax.set_xlabel("Iteration")
    ax.xaxis.set_major_locator(
        ticker.MultipleLocator(50)
    )  # Show x-axis ticks every 50 iterations.

    ax.legend(loc="lower center")
    ax.grid(axis="both")
    ax.set_axisbelow(True)  # By default the axis is over the content.

## Processed requests

In [None]:
# Common functions for average reward data.


def _processable_requests_episode(iters, iter_idx, epi_idx):
    """Returns the average reward per step for the given iteration and agent."""
    episodes = iter["env_runners"]["episodes_this_iter"]

    tmp = np.empty(episodes, dtype=np.float32)
    for epi_idx in range(episodes):
        tmp[epi_idx] = np.average(
            iter["env_runners"]["hist_stats"]["reward"][epi_idx][agent]
        )

    return np.average(tmp)


def _get_data(exps_dir):
    final_data = {}

    for exp_dir in exps_dir:
        data = {}

        # Read data from experiment directory.
        iters = dfaas_utils.parse_result_file(exp_dir / "result.json")
        agents = base.get_env(exp_dir).agents

        data["agents"] = agents
        data["iterations"] = len(iters)
        data["episodes"] = iters[0]["env_runners"]["episodes_this_iter"]

        final_data[exp_dir] = data

    return final_data

In [None]:
# WIP TEST
iters = dfaas_utils.parse_result_file(
    "/home/emanuele/marl-dfaas/results/DFAAS-MA_2025-01-17_18-02-33_SYNT_NEW_MODEL/result.json"
)

_get_processable_requests_episode(iters, 0, 0)

Get `data` and `env` variables, used by subsequents plots.

In [None]:
data = _get_data(exps_dir)
env = base.get_env(exps_dir[0])

### Average processed requests per episode (all agents)