# Train summary for a single episode

This notebook presents plots related to a single episode. It can display plots about an episode from two sources:

1. **Training iterations**: each iteration plays the same number of episodes, but the episodes are different as they use randomly generated seeds.

2. **Evaluation iterations**: each iteration also plays the same number of episodes, but they use the same list of seeds across iterations. This is to evaluate the agents over time.

## Experiment loading

In [None]:
# Common imports.
from pathlib import Path

%matplotlib widget
import base

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import dfaas_env
import dfaas_utils

In [None]:
# Base directory where the experiments are located..
prefix_dir = Path("/home/emanuele/marl-dfaas/results/")

# Experiment directory.
exp_dir = prefix_dir / "DF_20250618_142445_PPO_linear_growth"

# Which training statistics to show: from training iterations or evaluation
# iterations.
mode = "eval"  # Or "eval"

if mode == "train":
    # The result file is a JSON with each entry separated by newline.
    raw_exp_data = dfaas_utils.parse_result_file(exp_dir / "result.json")
elif mode == "eval":
    # The evaluation file is instead a normal JSON.
    raw_exp_data = dfaas_utils.json_to_dict(exp_dir / "evaluation.json")
else:
    raise ValueError(f"Invalid {mode = }")

# Reference environment.
env = base.get_env(exp_dir)

In [None]:
print(f"Experiment prefix dir: {prefix_dir.as_posix()!r}")
print(f"Experiment name:       {exp_dir.name!r}")
print(f"Agents:                {env.agents} ({len(env.agents)})")
print(f"Mode:                  {mode}")
if mode == "train":
    print(f"Iterations:            {len(raw_exp_data)}")
else:
    print(f"Evaluations:           {len(raw_exp_data)}")

In [None]:
# Which iteration (training or evaluation) to select?
iteration_idx = 19

# Which episode from the single iteration to select?
episode_idx = 0

assert 0 <= iteration_idx <= len(raw_exp_data) - 1, "iteration_idx must be a valid iteration index!"

assert (
    0 <= episode_idx < raw_exp_data[iteration_idx]["env_runners"]["num_episodes"]
), "episode_idx must be a valid episode index!"

In [None]:
print(f"Selected iteration:    {iteration_idx}")
print(f"Selected episode:      {episode_idx}")

## Reward

In [None]:
def make_reward_plot(raw_exp_data, env, iteration_idx, episode_idx):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"reward_{agent}")
        fig = plt.figure(num=f"reward_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        if agent == "all":
            reward = np.add(
                *[
                    raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["reward"][episode_idx][agent]
                    for agent in env.agents
                ]
            )
        else:
            reward = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["reward"][episode_idx][agent]

        ax.plot(reward)
        ax.set_title(f"Reward per step ({agent = })")
        ax.set_ylabel("Reward")
        ax.set_xlabel("Step")
        # ax.yaxis.set_major_locator(ticker.MultipleLocator(0.2))

        ax.grid(axis="both")
        ax.set_axisbelow(True)


make_reward_plot(raw_exp_data, env, iteration_idx, episode_idx)

## Input rate

In [None]:
def make_input_rate_plot(raw_exp_data, env, iteration_idx, episode_idx):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"input_rate_{agent}")
        fig = plt.figure(num=f"input_rate_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        if agent == "all":
            input_rate = np.add(
                *[
                    raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["observation_input_rate"][episode_idx][
                        agent
                    ]
                    for agent in env.agents
                ]
            )
        else:
            input_rate = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["observation_input_rate"][
                episode_idx
            ][agent]

        ax.plot(input_rate)
        ax.set_title(f"Input rate per step ({agent = })")
        ax.set_ylabel("Input rate")
        ax.set_xlabel("Step")
        ax.xaxis.set_major_locator(ticker.MultipleLocator(25))
        ax.grid(axis="both")
        ax.set_axisbelow(True)


make_input_rate_plot(raw_exp_data, env, iteration_idx, episode_idx)

## Absolute Action

In [None]:
def get_absolute_action(raw_exp_data, env, iteration_idx, episode_idx):
    data = {}
    for agent in env.agents:
        data[agent] = {}
        data[agent]["input_rate"] = np.array(
            raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["observation_input_rate"][episode_idx][agent]
        )
        data[agent]["action_local"] = np.array(
            raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_local"][episode_idx][agent]
        )
        data[agent]["action_forward"] = np.array(
            raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_forward"][episode_idx][agent]
        )
        data[agent]["action_reject"] = np.array(
            raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_reject"][episode_idx][agent]
        )

        forward_reject = np.array(
            raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["forward_reject_rate"][episode_idx][agent]
        )
        local_reject = np.array(
            raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["incoming_rate_local_reject"][episode_idx][agent]
        )
        data[agent]["reject_rate"] = forward_reject + local_reject + data[agent]["action_reject"]

    data["all"] = {}
    data["all"]["input_rate"] = np.add(*[data[agent]["input_rate"] for agent in env.agents])
    data["all"]["action_local"] = np.add(*[data[agent]["action_local"] for agent in env.agents])
    data["all"]["action_forward"] = np.add(*[data[agent]["action_forward"] for agent in env.agents])
    data["all"]["action_reject"] = np.add(*[data[agent]["action_reject"] for agent in env.agents])
    data["all"]["reject_rate"] = np.add(*[data[agent]["reject_rate"] for agent in env.agents])

    return data


absolute_action = get_absolute_action(raw_exp_data, env, iteration_idx, episode_idx)

In [None]:
def make_absolute_action_plot(absolute_action):
    for agent, agent_absolute_action in sorted(absolute_action.items()):
        plt.close(fig=f"absolute_action_{agent}")
        fig = plt.figure(num=f"absolute_action_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        steps = np.arange(len(agent_absolute_action["input_rate"]))

        ax.plot(agent_absolute_action["input_rate"], label="Input rate")
        ax.stackplot(
            steps,
            agent_absolute_action["action_local"],
            agent_absolute_action["action_forward"],
            agent_absolute_action["action_reject"],
            labels=["Action Local", "Action Forward", "Action Reject"],
            alpha=0.5,
        )
        ax.plot(agent_absolute_action["reject_rate"], label="Reject rate")

        ax.set_title(f"Action distribution per episode ({agent = })")
        ax.set_ylabel("Requests")
        ax.set_xlabel("Step")
        # Use only integer ticks and let matplotlib to choose how many ticks to show.
        # ax.xaxis.set_major_locator(ticker.MaxNLocator(integer=True))

        ax.legend()
        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_absolute_action_plot(absolute_action)

## Action

In [None]:
def make_action_distribution_plot(raw_exp_data, env, iteration_idx, episode_idx):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"action_distribution_{agent}")
        fig = plt.figure(num=f"action_distribution_{agent}", layout="constrained")
        fig.canvas.header_visible = False

    action_local_tmp, action_forward_tmp, action_reject_tmp = [], [], []
    for agent in env.agents + ["all"]:
        fig = plt.figure(num=f"action_distribution_{agent}")
        ax = fig.subplots()

        steps = np.arange(env.max_steps)

        if agent == "all":  # Always executed at the end.
            # Sum all columns (one row for each agent).
            action_local = np.array(action_local_tmp).sum(axis=0)
            action_forward = np.array(action_forward_tmp).sum(axis=0)
            action_reject = np.array(action_reject_tmp).sum(axis=0)
        else:
            action_local = np.array(
                raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_local"][episode_idx][agent]
            )
            action_forward = np.array(
                raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_forward"][episode_idx][agent]
            )
            action_reject = np.array(
                raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_reject"][episode_idx][agent]
            )

            action_local_tmp.append(action_local)
            action_forward_tmp.append(action_forward)
            action_reject_tmp.append(action_reject)

        input_rate = action_local + action_forward + action_reject

        # Convert to ratios, but make sure to avoid division by zero (when input_rate is zero).
        action_local = np.divide(action_local, input_rate, out=np.zeros(env.max_steps), where=input_rate != 0) * 100
        action_forward = np.divide(action_forward, input_rate, out=np.zeros(env.max_steps), where=input_rate != 0) * 100
        action_reject = np.divide(action_reject, input_rate, out=np.zeros(env.max_steps), where=input_rate != 0) * 100

        ax.stackplot(
            steps, action_local, action_forward, action_reject, labels=["Local", "Forward", "Reject"], alpha=0.8
        )

        ax.set_title(f"Action distribution per step ({agent = })")
        ax.set_ylabel("Action")
        ax.set_xlabel("Step")
        ax.yaxis.set_major_formatter(ticker.PercentFormatter())
        ax.xaxis.set_major_locator(ticker.MultipleLocator(25))
        ax.legend()
        ax.grid(axis="both")
        ax.set_axisbelow(True)


episode_idx

make_action_distribution_plot(raw_exp_data, env, iteration_idx, episode_idx)

## Rejection

In [None]:
def make_reject_rate_plot(raw_exp_data, env, iteration_idx):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"reject_rate_{agent}")
        fig = plt.figure(num=f"reject_rate_{agent}", layout="constrained")
        fig.canvas.header_visible = False

    total_reject_tmp, input_rate_tmp = [], []
    for agent in env.agents + ["all"]:
        fig = plt.figure(num=f"reject_rate_{agent}")
        ax = fig.subplots()

        if agent == "all":  # Always at the end.
            # Sum all columns (one row for each agent).
            input_rate = np.array(input_rate_tmp).sum(axis=0)
            total_reject = np.array(total_reject_tmp).sum(axis=0)
        else:
            input_rate = np.array(
                raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["observation_input_requests"][0][agent]
            )
            action_reject = np.array(
                raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_reject"][0][agent]
            )
            excess_local = np.array(
                raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["incoming_rate_local_reject"][0][agent]
            )
            excess_forward = np.array(
                raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["forward_reject_rate"][0][agent]
            )
            total_reject = action_reject + excess_local + excess_forward

            total_reject_tmp.append(total_reject)
            input_rate_tmp.append(input_rate)

        # Convert to percentual.
        total_reject = np.divide(total_reject, input_rate, out=np.zeros(env.max_steps), where=input_rate != 0) * 100

        ax.plot(total_reject)

        ax.set_title(f"Reject rate¹ (as % over input rate) per step ({agent = })")
        ax.text(
            0.5,
            -0.2,
            "¹reject rate = action reject + local reject + forward reject",
            fontsize=10,
            ha="center",
            transform=ax.transAxes,
        )
        ax.set_ylabel("Reject rate")
        ax.set_xlabel("Step")
        ax.yaxis.set_major_formatter(ticker.PercentFormatter())
        ax.xaxis.set_major_locator(ticker.MultipleLocator(25))
        ax.grid(axis="both")
        ax.set_axisbelow(True)


make_reject_rate_plot(raw_exp_data, env, iteration_idx)

## A single step

In [None]:
for step in [174]:
    print(f"Iteration nr. {iteration_idx}  Step nr {step}")
    for agent in env.agents:
        observation_input_rate = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["observation_input_requests"][
            0
        ][agent][step]
        observation_prev_local_rate = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"][
            "observation_prev_local_requests"
        ][0][agent][step]
        observation_prev_local_reject = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"][
            "observation_prev_local_rejects"
        ][0][agent][step]
        observation_prev_forward_rate = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"][
            "observation_prev_forward_requests"
        ][0][agent][step]
        observation_prev_forward_reject = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"][
            "observation_prev_forward_rejects"
        ][0][agent][step]

        action_local = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_local"][0][agent][step]
        action_forward = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_forward"][0][agent][step]
        action_reject = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["action_reject"][0][agent][step]

        excess_local = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["incoming_rate_local_reject"][0][agent][
            step
        ]

        forward_reject = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["forward_reject_rate"][0][agent][step]

        reward = raw_exp_data[iteration_idx]["env_runners"]["hist_stats"]["reward"][0][agent][step]

        print(f"\n--- Agent: {agent} ---")
        print("-- Observation")
        print(f"Input rate:           {observation_input_rate:.1f}")
        print(f"Prev. local rate:     {observation_prev_local_rate:.1f}")
        print(f"Prev. local reject:   {observation_prev_local_reject:.1f}")
        print(f"Prev. forward rate:   {observation_prev_forward_rate:.1f}")
        print(f"Prev. forward reject: {observation_prev_forward_reject:.1f}")

        print("-- Action")
        print(f"Local:   {action_local:.1f}")
        print(f"Forward: {action_forward:.1f}")
        print(f"Reject:  {action_reject:.1f}")

        print("-- Additional rejects")
        print(f"Local reject rate:   {excess_local:.1f}")
        print(f"Forward reject rate: {forward_reject:.1f}")

        print(f"-- Reward: {reward:.1f}")