# Evaluation summary of a single experiment

This notebook contains plots and analyses of evaluations performed during an experiment's training process. Note that the evaluation is done after some training iterations.

## Experiment loading

In [None]:
# Common imports.
from pathlib import Path

%matplotlib widget
import base

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from scipy.signal import savgol_filter

import dfaas_env
import dfaas_utils

In [None]:
prefix_dir = Path("/home/emanuele/marl-dfaas/results/")

exp_dir = prefix_dir / "DFAAS-MA_2025-05-21_17-45-58_PPO_constant_rate_det_5_100"

evaluation_data = dfaas_utils.json_to_dict(exp_dir / "evaluation.json")

# Reference environment.
env = base.get_env(exp_dir)

In [None]:
print(f"Experiment prefix dir: {prefix_dir.as_posix()!r}")
print(f"Experiment name:       {exp_dir.name!r}")
print(f"Agents:                {env.agents} ({len(env.agents)})")
print(f"Evaluations:           {len(evaluation_data)}")

## Reward

In [None]:
def get_reward_data_avg(eval_data, env):
    """Returns the average/min/max cumulative reward per evaluation for each agent and all agents."""
    episodes_per_eval = eval_data[0]["env_runners"]["num_episodes"]

    reward_avg = {"mean": {}, "min": {}, "max": {}}
    for key in reward_avg:
        reward_avg[key]["all"] = np.empty(len(eval_data))
        for agent in env.agents:
            reward_avg[key][agent] = np.empty_like(reward_avg[key]["all"])

    for eval_idx in range(len(eval_data)):
        assert (
            eval_data[eval_idx]["env_runners"]["episodes_this_iter"] == episodes_per_eval
        ), "Episodes per eval must be the same for all evaluations!"

        for agent in env.agents:
            policy_name = f"policy_{agent}"

            reward_avg["mean"][agent][eval_idx] = eval_data[eval_idx]["env_runners"]["policy_reward_mean"][policy_name]
            reward_avg["min"][agent][eval_idx] = eval_data[eval_idx]["env_runners"]["policy_reward_min"][policy_name]
            reward_avg["max"][agent][eval_idx] = eval_data[eval_idx]["env_runners"]["policy_reward_max"][policy_name]

        reward_avg["mean"]["all"][eval_idx] = eval_data[eval_idx]["env_runners"]["episode_reward_mean"]
        reward_avg["min"]["all"][eval_idx] = eval_data[eval_idx]["env_runners"]["episode_reward_min"]
        reward_avg["max"]["all"][eval_idx] = eval_data[eval_idx]["env_runners"]["episode_reward_max"]

    return reward_avg


reward_data_avg = get_reward_data_avg(evaluation_data, env)

In [None]:
def make_cumulative_reward_plot(data, env):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"reward_cum_{agent}")
        fig = plt.figure(num=f"reward_cum_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        reward = data["mean"][agent]
        reward_min = data["min"][agent]
        reward_max = data["max"][agent]

        eval_steps = np.arange(len(reward))

        smoothed = savgol_filter(reward, 15, 3)  # window size 15, polynomial order 3

        ax.plot(eval_steps, reward, label="Original")
        ax.plot(eval_steps, smoothed, label="Smoothed")

        plt.fill_between(eval_steps, reward_min, reward_max, alpha=0.2, color="blue", label="Min/Max")

        ax.set_title(f"Average cumulative reward per episode ({agent = })")
        ax.set_ylabel("Reward")
        ax.set_xlabel("Evaluation")

        ax.legend()
        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_cumulative_reward_plot(reward_data_avg, env)

## Agent rejection rate

In [None]:
def get_agent_reject_rate(eval_data, env):
    """Returns the average/min/max agent rejection rate per evaluation for each agent and all agents. Returns two results: one with absolute values and one with ratios over the input rate.

    Two notes:
        1. It is an average because multiple episodes are played for each evaluation.

        2. The agent rejection rate is the sum of the agent rejection rate, the locally routed rate that was rejected, and the forwarded rate that was rejected.
    """
    episodes_per_eval = eval_data[0]["env_runners"]["num_episodes"]

    reject_avg = {"mean": {}, "min": {}, "max": {}}
    reject_ratio = {"mean": {}, "min": {}, "max": {}}
    for key in reject_avg:
        reject_avg[key]["all"] = np.empty(len(eval_data))
        reject_ratio[key]["all"] = np.empty(len(eval_data))
        for agent in env.agents:
            reject_avg[key][agent] = np.empty_like(reject_avg[key]["all"])
            reject_ratio[key][agent] = np.empty_like(reject_ratio[key]["all"])

    for eval_idx in range(len(eval_data)):
        assert (
            eval_data[eval_idx]["env_runners"]["episodes_this_iter"] == episodes_per_eval
        ), "Episodes per eval must be the same for all evaluations!"

        reject_epi = {agent: np.zeros(episodes_per_eval) for agent in ["all"] + env.agents}
        input_epi = {agent: np.zeros(episodes_per_eval) for agent in ["all"] + env.agents}
        for epi_idx in range(episodes_per_eval):
            for agent in env.agents:
                input_rate = sum(
                    eval_data[eval_idx]["env_runners"]["hist_stats"]["observation_input_requests"][epi_idx][agent]
                )
                input_epi[agent][epi_idx] = input_rate
                input_epi["all"][epi_idx] += input_rate

                action_reject = sum(eval_data[eval_idx]["env_runners"]["hist_stats"]["action_reject"][epi_idx][agent])
                local_reject = sum(
                    eval_data[eval_idx]["env_runners"]["hist_stats"]["incoming_rate_local_reject"][epi_idx][agent]
                )
                forward_reject = sum(
                    eval_data[eval_idx]["env_runners"]["hist_stats"]["forward_reject_rate"][epi_idx][agent]
                )

                reject_rate = action_reject + local_reject + forward_reject
                reject_epi[agent][epi_idx] = reject_rate
                reject_epi["all"][epi_idx] += reject_rate

        for agent in ["all"] + env.agents:
            reject_avg["mean"][agent][eval_idx] = np.average(reject_epi[agent])
            reject_avg["min"][agent][eval_idx] = np.min(reject_epi[agent])
            reject_avg["max"][agent][eval_idx] = np.max(reject_epi[agent])

            reject_ratio_eval = reject_epi[agent] / input_epi[agent]
            reject_ratio["mean"][agent][eval_idx] = np.average(reject_ratio_eval)
            reject_ratio["min"][agent][eval_idx] = np.min(reject_ratio_eval)
            reject_ratio["max"][agent][eval_idx] = np.max(reject_ratio_eval)

    return reject_avg, reject_ratio


agent_reject_rate_abs, agent_reject_rate_ratio = get_agent_reject_rate(evaluation_data, env)

### Absolute values

In [None]:
def make_average_agent_reject_rate_abs(data, env):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"average_agent_reject_rate_abs_{agent}")
        fig = plt.figure(num=f"average_agent_reject_rate_abs_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        reject = data["mean"][agent]
        reject_min = data["min"][agent]
        reject_max = data["max"][agent]

        eval_steps = np.arange(len(reject))

        smoothed = savgol_filter(reject, 15, 3)  # window size 15, polynomial order 3

        ax.plot(reject, label="Original")
        ax.plot(smoothed, label="Smoothed")

        plt.fill_between(eval_steps, reject_min, reject_max, alpha=0.2, color="blue", label="Min/Max")

        ax.set_title(f"Average agent reject¹ rate ({agent = })\n(cumulative value per episode)")
        ax.text(
            0.5,
            -0.2,
            "¹reject rate = action reject + local reject + forward reject",
            fontsize=10,
            ha="center",
            transform=ax.transAxes,
        )
        ax.set_ylabel("Reject rate")
        ax.set_xlabel("Evaluation")

        ax.legend()
        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_average_agent_reject_rate_abs(agent_reject_rate_abs, env)

### Percent values

In [None]:
def make_average_agent_reject_rate_perc(data, env):
    for agent in ["all"] + env.agents:
        plt.close(fig=f"average_agent_reject_rate_perc_{agent}")
        fig = plt.figure(num=f"average_agent_reject_rate_perc_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        reject = data["mean"][agent] * 100
        reject_min = data["min"][agent] * 100
        reject_max = data["max"][agent] * 100

        eval_steps = np.arange(len(reject))

        smoothed = savgol_filter(reject, 15, 3)  # window size 15, polynomial order 3

        ax.plot(reject, label="Original")
        ax.plot(smoothed, label="Smoothed")

        plt.fill_between(eval_steps, reject_min, reject_max, alpha=0.2, color="blue", label="Min/Max")

        ax.set_title(f"Average agent reject¹ rate ({agent = })\n(cumulative % value over the input rate per episode)")
        ax.text(
            0.5,
            -0.2,
            "¹reject rate = action reject + local reject + forward reject",
            fontsize=10,
            ha="center",
            transform=ax.transAxes,
        )
        ax.set_ylabel("Reject rate")
        ax.yaxis.set_major_formatter(ticker.PercentFormatter())
        ax.set_xlabel("Evaluation")

        ax.legend()
        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_average_agent_reject_rate_perc(agent_reject_rate_ratio, env)