# Train summary for a single episode

This notebook displays graphs and data about a single episode in the training process.

## Experiment loading

In [None]:
# Common imports.
from pathlib import Path

%matplotlib widget
import base

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import ipywidgets

import dfaas_env
import dfaas_utils

In [None]:
prefix_dir = Path("/home/emanuele/marl-dfaas/results/")

exp_dir = prefix_dir / "DFAAS-MA_2025-05-13_17-49-24_PPO_2_constant_rate_200_iters"

# Raw data dictionary "result.json".
raw_exp_data = dfaas_utils.parse_result_file(exp_dir / "result.json")

# Reference environment.
env = base.get_env(exp_dir)

agents = ["all"] + env.agents

In [None]:
iter_idx = 199
assert 0 <= iter_idx <= len(raw_exp_data) - 1, "iter_idx must be a valid iteration index!"

In [None]:
print(f"Experiment prefix dir: {prefix_dir.as_posix()!r}")
print(f"Experiment name:       {exp_dir.name!r}")
print(f"Agents:                {env.agents} ({len(env.agents)})")
print(f"Selected iteration:    {iter_idx}")

## Reward

In [None]:
def make_reward_plot(raw_exp_data, env, iter_idx):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"reward_cum_{agent}")
        fig = plt.figure(num=f"reward_cum_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        if agent == "all":
            reward = np.add(
                *[raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["reward"][0][agent] for agent in env.agents]
            )
        else:
            reward = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["reward"][0][agent]

        ax.plot(reward)
        ax.set_title(f"Reward per step ({agent = })")
        ax.set_ylabel("Reward")
        ax.set_xlabel("Step")
        ax.yaxis.set_major_locator(ticker.MultipleLocator(0.2))
        ax.grid(axis="both")
        ax.set_axisbelow(True)


make_reward_plot(raw_exp_data, env, iter_idx)

## Input rate

In [None]:
def make_input_rate_plot(raw_exp_data, env, iter_idx):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"input_rate_{agent}")
        fig = plt.figure(num=f"input_rate_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        if agent == "all":
            input_rate = np.add(
                *[
                    raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["observation_input_requests"][0][agent]
                    for agent in env.agents
                ]
            )
        else:
            input_rate = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["observation_input_requests"][0][agent]

        ax.plot(input_rate)
        ax.set_title(f"Input rate per step ({agent = })")
        ax.set_ylabel("Input rate")
        ax.set_xlabel("Step")
        ax.xaxis.set_major_locator(ticker.MultipleLocator(25))
        ax.grid(axis="both")
        ax.set_axisbelow(True)


make_input_rate_plot(raw_exp_data, env, iter_idx)

## Action

In [None]:
def make_action_distribution_plot(raw_exp_data, env, iter_idx):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"action_distribution_{agent}")
        fig = plt.figure(num=f"action_distribution_{agent}", layout="constrained")
        fig.canvas.header_visible = False

    action_local_tmp, action_forward_tmp, action_reject_tmp = [], [], []
    for agent in env.agents + ["all"]:
        fig = plt.figure(num=f"action_distribution_{agent}")
        ax = fig.subplots()

        steps = np.arange(env.max_steps)

        if agent == "all":  # Always executed at the end.
            # Sum all columns (one row for each agent).
            action_local = np.array(action_local_tmp).sum(axis=0)
            action_forward = np.array(action_forward_tmp).sum(axis=0)
            action_reject = np.array(action_reject_tmp).sum(axis=0)
        else:
            action_local = np.array(raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_local"][0][agent])
            action_forward = np.array(raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_forward"][0][agent])
            action_reject = np.array(raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_reject"][0][agent])

            action_local_tmp.append(action_local)
            action_forward_tmp.append(action_forward)
            action_reject_tmp.append(action_reject)

        input_rate = action_local + action_forward + action_reject

        # Convert to ratios, but make sure to avoid division by zero (when input_rate is zero).
        action_local = np.divide(action_local, input_rate, out=np.zeros(env.max_steps), where=input_rate != 0)
        action_forward = np.divide(action_forward, input_rate, out=np.zeros(env.max_steps), where=input_rate != 0)
        action_reject = np.divide(action_reject, input_rate, out=np.zeros(env.max_steps), where=input_rate != 0)

        ax.stackplot(
            steps, action_local, action_forward, action_reject, labels=["Local", "Forward", "Reject"], alpha=0.8
        )

        ax.set_title(f"Action distribution per step ({agent = })")
        ax.set_ylabel("Action")
        ax.set_xlabel("Step")
        ax.yaxis.set_major_locator(ticker.MultipleLocator(0.1))
        ax.xaxis.set_major_locator(ticker.MultipleLocator(25))
        ax.legend()
        ax.grid(axis="both")
        ax.set_axisbelow(True)


make_action_distribution_plot(raw_exp_data, env, iter_idx)

## Rejection

In [None]:
def make_reject_rate_plot(raw_exp_data, env, iter_idx):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"reject_rate_{agent}")
        fig = plt.figure(num=f"reject_rate_{agent}", layout="constrained")
        fig.canvas.header_visible = False

    total_reject_tmp, input_rate_tmp = [], []
    for agent in env.agents + ["all"]:
        fig = plt.figure(num=f"reject_rate_{agent}")
        ax = fig.subplots()

        if agent == "all":  # Always at the end.
            # Sum all columns (one row for each agent).
            input_rate = np.array(input_rate_tmp).sum(axis=0)
            total_reject = np.array(total_reject_tmp).sum(axis=0)
        else:
            input_rate = np.array(
                raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["observation_input_requests"][0][agent]
            )
            action_reject = np.array(raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_reject"][0][agent])
            excess_local = np.array(
                raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["incoming_rate_local_reject"][0][agent]
            )
            excess_forward = np.array(
                raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["forward_reject_rate"][0][agent]
            )
            total_reject = action_reject + excess_local + excess_forward

            total_reject_tmp.append(total_reject)
            input_rate_tmp.append(input_rate)

        # Convert to percentual.
        total_reject = np.divide(total_reject, input_rate, out=np.zeros(env.max_steps), where=input_rate != 0) * 100

        ax.plot(total_reject)

        ax.set_title(f"Reject rate¹ (as % over input rate) per step ({agent = })")
        ax.text(
            0.5,
            -0.2,
            "¹reject rate = action reject + local reject + forward reject",
            fontsize=10,
            ha="center",
            transform=ax.transAxes,
        )
        ax.set_ylabel("Reject rate")
        ax.set_xlabel("Step")
        ax.yaxis.set_major_formatter(ticker.PercentFormatter())
        ax.xaxis.set_major_locator(ticker.MultipleLocator(25))
        ax.grid(axis="both")
        ax.set_axisbelow(True)


make_reject_rate_plot(raw_exp_data, env, iter_idx)

## A single step

In [None]:
for step in [71]:
    print(f"Iteration nr. {iter_idx}  Step nr {step}")
    for agent in env.agents:
        observation_input_rate = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["observation_input_requests"][0][
            agent
        ][step]
        observation_prev_local_rate = raw_exp_data[iter_idx]["env_runners"]["hist_stats"][
            "observation_prev_local_requests"
        ][0][agent][step]
        observation_prev_local_reject = raw_exp_data[iter_idx]["env_runners"]["hist_stats"][
            "observation_prev_local_rejects"
        ][0][agent][step]
        observation_prev_forward_rate = raw_exp_data[iter_idx]["env_runners"]["hist_stats"][
            "observation_prev_forward_requests"
        ][0][agent][step]
        observation_prev_forward_reject = raw_exp_data[iter_idx]["env_runners"]["hist_stats"][
            "observation_prev_forward_rejects"
        ][0][agent][step]

        action_local = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_local"][0][agent][step]
        action_forward = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_forward"][0][agent][step]
        action_reject = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_reject"][0][agent][step]

        excess_local = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["incoming_rate_local_reject"][0][agent][step]

        forward_reject = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["forward_reject_rate"][0][agent][step]

        reward = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["reward"][0][agent][step]

        print(f"\n--- Agent: {agent} ---")
        print("-- Observation")
        print(f"Input rate:           {observation_input_rate:.1f}")
        print(f"Prev. local rate:     {observation_prev_local_rate:.1f}")
        print(f"Prev. local reject:   {observation_prev_local_reject:.1f}")
        print(f"Prev. forward rate:   {observation_prev_forward_rate:.1f}")
        print(f"Prev. forward reject: {observation_prev_forward_reject:.1f}")

        print("-- Action")
        print(f"Local:   {action_local:.1f}")
        print(f"Forward: {action_forward:.1f}")
        print(f"Reject:  {action_reject:.1f}")

        print("-- Additional rejects")
        print(f"Local reject rate:   {excess_local:.1f}")
        print(f"Forward reject rate: {forward_reject:.1f}")

        print(f"-- Reward: {reward:.1f}")