# Train summary of a single experiment

This notebook is for the experiment with PPO and one episode played for iteration.

## Experiment loading

In [None]:
# Common imports.
from pathlib import Path

%matplotlib widget
import base

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import dfaas_env
import dfaas_utils

In [None]:
prefix_dir = Path("/home/emanuele/marl-dfaas/results/")

exp_dir = prefix_dir / "DF_20250612_163451_PPO_two_agents_constant_3000_no_exp"

# Raw data dictionary "result.json".
raw_exp_data = dfaas_utils.parse_result_file(exp_dir / "result.json")

# Reference environment.
env = base.get_env(exp_dir)

In [None]:
print(f"Experiment prefix dir: {prefix_dir.as_posix()!r}")
print(f"Experiment name:       {exp_dir.name!r}")
print(f"Agents:                {env.agents} ({len(env.agents)})")
print(f"Iterations:            {len(raw_exp_data)}")

## Reward

In [None]:
def get_reward_data_step(raw_exp_data, env):
    """Returns the average reward per step for each agent and all agents."""
    iters_n = len(raw_exp_data)

    reward_step = {}
    reward_step["all"] = np.empty(iters_n)
    for agent in env.agents:
        reward_step[agent] = np.empty(iters_n)

    for iter_idx in range(len(raw_exp_data)):
        episodes = raw_exp_data[iter_idx]["env_runners"]["episodes_this_iter"]
        assert episodes > 0

        agent_rewards = {agent: [] for agent in env.agents}
        for epi in range(episodes):
            for agent in env.agents:
                agent_rewards[agent].extend(raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["reward"][epi][agent])

        reward_step_tmp = []
        for agent in env.agents:
            # Average over all steps from all episodes
            reward_step[agent][iter_idx] = np.average(agent_rewards[agent])
            reward_step_tmp.append(reward_step[agent][iter_idx])

        reward_step["all"][iter_idx] = np.average(reward_step_tmp)

    return reward_step


def get_reward_data_sum(raw_exp_data, env):
    """Returns the cumulative reward per episode for each agent and all agents."""
    iters_n = len(raw_exp_data)

    reward_sum = {agent: np.empty(iters_n) for agent in env.agents}
    reward_sum["all"] = np.empty(iters_n)
    for iter_idx in range(len(raw_exp_data)):
        episodes = raw_exp_data[iter_idx]["env_runners"]["episodes_this_iter"]
        assert episodes > 0

        reward_iter = {agent: np.empty(episodes) for agent in env.agents}
        reward_iter["all"] = np.empty(episodes)
        for epi in range(episodes):
            reward_iter["all"][epi] = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["episode_reward"][epi]
            for agent in env.agents:
                reward_iter[agent][epi] = raw_exp_data[iter_idx]["env_runners"]["hist_stats"][
                    f"policy_policy_{agent}_reward"
                ][epi]

        reward_sum["all"][iter_idx] = np.average(reward_iter["all"])
        for agent in env.agents:
            reward_sum[agent][iter_idx] = np.average(reward_iter[agent])

    return reward_sum


reward_sum = get_reward_data_sum(raw_exp_data, env)
reward_step = get_reward_data_step(raw_exp_data, env)

### Cumulative reward per episode

In [None]:
def make_cumulative_reward_plot(reward_sum):
    for agent, reward in sorted(reward_sum.items()):
        plt.close(fig=f"reward_cum_{agent}")
        fig = plt.figure(num=f"reward_cum_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        ax.plot(reward)

        ax.set_title(f"Average cumulative reward per episode ({agent = })")
        ax.set_ylabel("Reward")
        ax.set_xlabel("Iteration")

        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_cumulative_reward_plot(reward_sum)

### Average reward per step

In [None]:
def make_average_reward_step_plot(reward_step):
    for agent, reward in reward_step.items():
        plt.close(fig=f"reward_step_{agent}")
        fig = plt.figure(num=f"reward_step_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        ax.plot(reward)

        ax.set_title(f"Average reward per step per episode ({agent = })")
        ax.set_ylabel("Reward")
        ax.set_xlabel("Iteration")

        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_average_reward_step_plot(reward_step)

## Action distribution

In [None]:
def get_normalized_action_iter(raw_exp_data, iter_idx, env, agent):
    episodes = raw_exp_data[iter_idx]["env_runners"]["episodes_this_iter"]
    assert episodes > 0

    action_dist_iter = {"local": np.zeros(episodes), "forward": np.zeros(episodes), "reject": np.zeros(episodes)}

    for epi in range(episodes):
        action_dist_step = {
            "local": np.zeros(env.max_steps),
            "forward": np.zeros(env.max_steps),
            "reject": np.zeros(env.max_steps),
        }

        # Before calculating the average, we need to normalize the steps.
        for step in range(env.max_steps):
            local = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_local"][epi][agent][step]
            forward = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_forward"][epi][agent][step]
            reject = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_reject"][epi][agent][step]

            action_sum = local + forward + reject
            assert action_sum > 0

            action_dist_step["local"] = local / action_sum
            action_dist_step["forward"] = forward / action_sum
            action_dist_step["reject"] = reject / action_sum

        action_dist_iter["local"][epi] = np.average(action_dist_step["local"])
        action_dist_iter["forward"][epi] = np.average(action_dist_step["forward"])
        action_dist_iter["reject"][epi] = np.average(action_dist_step["reject"])

    return action_dist_iter


def get_action_distribution(raw_exp_data, env):
    """Returns the average action distribution (normalized to 1) for each iteration for all agents."""
    iters_n = len(raw_exp_data)

    action_dist = {}
    for agent in ["all"] + env.agents:
        action_dist[agent] = {}
        action_dist[agent]["local"] = np.empty(iters_n)
        action_dist[agent]["forward"] = np.empty(iters_n)
        action_dist[agent]["reject"] = np.empty(iters_n)

    for iter_idx in range(len(raw_exp_data)):
        action_dist_tmp = {}
        action_dist_tmp["local"] = []
        action_dist_tmp["forward"] = []
        action_dist_tmp["reject"] = []

        for agent in env.agents:
            if agent == "all":
                continue

            action_dist_iter = get_normalized_action_iter(raw_exp_data, iter_idx, env, agent)

            action_dist[agent]["local"][iter_idx] = np.average(action_dist_iter["local"])
            action_dist[agent]["forward"][iter_idx] = np.average(action_dist_iter["forward"])
            action_dist[agent]["reject"][iter_idx] = np.average(action_dist_iter["reject"])

            action_dist_tmp["local"].append(action_dist[agent]["local"][iter_idx])
            action_dist_tmp["forward"].append(action_dist[agent]["forward"][iter_idx])
            action_dist_tmp["reject"].append(action_dist[agent]["reject"][iter_idx])

        action_dist["all"]["local"][iter_idx] = np.average(action_dist_tmp["local"])
        action_dist["all"]["forward"][iter_idx] = np.average(action_dist_tmp["forward"])
        action_dist["all"]["reject"][iter_idx] = np.average(action_dist_tmp["reject"])

    return action_dist


action_dist = get_action_distribution(raw_exp_data, env)

In [None]:
def make_action_distribution_plot(action_dist):
    for agent, dist in action_dist.items():
        plt.close(fig=f"action_dist_{agent}")
        fig = plt.figure(num=f"action_dist_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        steps = np.arange(len(raw_exp_data))

        ax.stackplot(
            steps, dist["local"], dist["forward"], dist["reject"], labels=["Local", "Forward", "Reject"], alpha=0.8
        )

        ax.set_title(f"Average action distribution per step per episode ({agent = })")
        ax.set_ylabel("Action proportion")
        ax.set_xlabel("Iteration")

        ax.legend()
        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_action_distribution_plot(action_dist)

## Rejections

Differences between reject rates:

* **Node reject rate**: it considers the reject rate of the incoming local rate and the _incoming forwarded rate from the neighbors_.
* **Agent reject rate**: it is the sum of reject rate decide by the agent (`action_reject`), the rejected incoming local rate (`incoming_rate_reject`) and the rejected forwarded rate (`forward_reject_rate`).

### Node reject rate

In [None]:
def get_reject_data(raw_exp_data, env):
    """Returns the average node reject rate for each step for each iteration for all agents (including "all" agents, which is the sum of all agents)."""
    iters_n = len(raw_exp_data)

    reject_data = {agent: np.empty(iters_n) for agent in env.agents}
    reject_data["all"] = np.empty(iters_n)

    for iter_idx in range(len(raw_exp_data)):
        episodes = raw_exp_data[iter_idx]["env_runners"]["episodes_this_iter"]
        assert episodes > 0

        average_reject_iter = {agent: np.empty(episodes) for agent in env.agents}
        for epi in range(episodes):
            for agent in env.agents:
                percent_reject = np.zeros(env.max_steps)
                for step in range(env.max_steps):
                    incoming_rate = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["incoming_rate"][epi][agent][
                        step
                    ]
                    incoming_rate_reject = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["incoming_rate_reject"][
                        epi
                    ][agent][step]

                    if incoming_rate == 0:
                        continue  # The agent has forwarded/rejected all requests.

                    percent_reject[step] = incoming_rate_reject / incoming_rate * 100

                average_reject_iter[agent][epi] = np.average(percent_reject)

        reject_data_tmp = []
        for agent in env.agents:
            reject_data[agent][iter_idx] = np.average(average_reject_iter[agent])
            reject_data_tmp.append(reject_data[agent][iter_idx])
        reject_data["all"][iter_idx] = np.average(reject_data_tmp)

    return reject_data


reject_data = get_reject_data(raw_exp_data, env)

In [None]:
def make_average_reject_step_plot(reject_data):
    for agent, reject in sorted(reject_data.items()):
        plt.close(fig=f"average_reject_step_{agent}")
        fig = plt.figure(num=f"average_reject_step_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        ax.plot(reject)

        ax.set_title(f"Average node reject rate ({agent = })\n(average node reject rate¹ per step per episode)")
        ax.text(
            0.5,
            -0.2,
            "¹reject rate = local reject rate + incoming forwarded reject rate",
            fontsize=10,
            ha="center",
            transform=ax.transAxes,
        )
        ax.set_ylabel("Reject rate")
        ax.yaxis.set_major_formatter(ticker.PercentFormatter())
        ax.set_xlabel("Iteration")

        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_average_reject_step_plot(reject_data)

### Agent reject rate

In [None]:
def get_agent_reject_rate(raw_exp_data, env):
    """Returns the average agent reject rate for each step for each iteration for all agents (including "all" agents, which is the sum of all agents)."""
    iters_n = len(raw_exp_data)

    reject_data = {agent: np.empty(iters_n) for agent in env.agents}
    reject_data["all"] = np.empty(iters_n)

    for iter_idx in range(len(raw_exp_data)):
        episodes = raw_exp_data[iter_idx]["env_runners"]["episodes_this_iter"]
        assert episodes > 0

        average_reject_iter = {agent: np.empty(episodes) for agent in env.agents}
        for epi in range(episodes):
            for agent in env.agents:
                percent_reject = np.zeros(env.max_steps)
                for step in range(env.max_steps):
                    input_rate = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["observation_input_rate"][epi][
                        agent
                    ][step]

                    action_reject = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_reject"][epi][agent][
                        step
                    ]
                    local_reject = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["incoming_rate_local_reject"][
                        epi
                    ][agent][step]
                    forward_reject = raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["forward_reject_rate"][epi][
                        agent
                    ][step]
                    reject_rate = action_reject + local_reject + forward_reject

                    assert input_rate > 0

                    percent_reject[step] = reject_rate / input_rate * 100

                average_reject_iter[agent][epi] = np.average(percent_reject)

        reject_data_tmp = []
        for agent in env.agents:
            reject_data[agent][iter_idx] = np.average(average_reject_iter[agent])
            reject_data_tmp.append(reject_data[agent][iter_idx])
        reject_data["all"][iter_idx] = np.average(reject_data_tmp)

    return reject_data


agent_reject_rate = get_agent_reject_rate(raw_exp_data, env)

In [None]:
def make_average_agent_reject_step_plot(reject_data):
    for agent, reject in sorted(reject_data.items()):
        plt.close(fig=f"average_agent_reject_step_{agent}")
        fig = plt.figure(num=f"average_agent_reject_step_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        ax.plot(reject)

        ax.set_title(f"Average agent reject rate ({agent = })\n(average agent reject rate¹ per step per episode)")
        ax.text(
            0.5,
            -0.2,
            "¹reject rate = action reject + local reject + forward reject",
            fontsize=10,
            ha="center",
            transform=ax.transAxes,
        )
        ax.set_ylabel("Reject rate")
        ax.yaxis.set_major_formatter(ticker.PercentFormatter())
        ax.set_xlabel("Iteration")

        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_average_agent_reject_step_plot(agent_reject_rate)

## Rejections by type

In [None]:
def get_reject_rate_dist_by_type(raw_exp_data, env):
    """Returns the reject rate distribution by type as a ratio to the total reject rate for each step, for each iteration, and for all agents (where "all" agent is just the sum of all agent values). Each value is the average for one iteration (= one episode)."""
    iters_n = len(raw_exp_data)

    reject_types = ["excess_local", "action", "excess_forward"]

    # The output dictionary has two level of nesting: agent -> reject type -> values.
    reject_data = {
        agent: {reject_type: np.zeros(iters_n) for reject_type in reject_types} for agent in ["all"] + env.agents
    }

    for iter_idx in range(iters_n):
        episodes = raw_exp_data[iter_idx]["env_runners"]["episodes_this_iter"]
        assert episodes > 0

        # A temporary dictionary used to collect data for this iteration.
        reject_data_iter = {
            agent: {reject_type: np.zeros(episodes) for reject_type in reject_types} for agent in ["all"] + env.agents
        }
        for epi in range(episodes):
            action_reject_tmp, local_reject_tmp, forward_reject_tmp = [], [], []
            for agent in env.agents:
                # Reject rate arrays (each index is one step).
                action_reject = np.array(
                    raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["action_reject"][epi][agent]
                )
                local_reject = np.array(
                    raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["incoming_rate_local_reject"][epi][agent]
                )
                forward_reject = np.array(
                    raw_exp_data[iter_idx]["env_runners"]["hist_stats"]["forward_reject_rate"][epi][agent]
                )
                total_reject = action_reject + local_reject + forward_reject

                if total_reject.sum() > 0:
                    reject_data_iter[agent]["action"][epi] = action_reject.sum() / total_reject.sum()
                    reject_data_iter[agent]["excess_local"][epi] = local_reject.sum() / total_reject.sum()
                    reject_data_iter[agent]["excess_forward"][epi] = forward_reject.sum() / total_reject.sum()

                action_reject_tmp.append(reject_data_iter[agent]["action"][epi])
                local_reject_tmp.append(reject_data_iter[agent]["excess_local"][epi])
                forward_reject_tmp.append(reject_data_iter[agent]["excess_forward"][epi])

            # Calculate the average distribution for the "all" agent.
            action_reject = np.array(action_reject_tmp)
            local_reject = np.array(local_reject_tmp)
            forward_reject = np.array(forward_reject_tmp)
            total_reject = action_reject + local_reject + forward_reject
            if total_reject.sum() > 0:
                reject_data_iter["all"]["action"][epi] = action_reject.sum() / total_reject.sum()
                reject_data_iter["all"]["excess_local"][epi] = local_reject.sum() / total_reject.sum()
                reject_data_iter["all"]["excess_forward"][epi] = forward_reject.sum() / total_reject.sum()

        # Average the statistics for a single iteration.
        for agent in reject_data_iter.keys():
            for reject_type in reject_data_iter[agent].keys():
                reject_data[agent][reject_type][iter_idx] = np.average(reject_data_iter[agent][reject_type])

    return reject_data


reject_rate_dist_by_type = get_reject_rate_dist_by_type(raw_exp_data, env)

In [None]:
def make_reject_type_distribution_plot(reject_rate_dist_by_type):
    for agent, dist in reject_rate_dist_by_type.items():
        plt.close(fig=f"reject_type_distribution_{agent}")
        fig = plt.figure(num=f"reject_type_distribution_{agent}", layout="constrained")
        fig.canvas.header_visible = False
        ax = fig.subplots()

        iter_n = np.arange(len(raw_exp_data))

        ax.stackplot(
            iter_n,
            dist["action"],
            dist["excess_local"],
            dist["excess_forward"],
            labels=["By action", "Excess local", "Excess forward"],
            alpha=0.8,
        )

        ax.set_title(f"Average reject type per step per episode ({agent = })")
        ax.set_ylabel("Reject distribution")
        ax.set_xlabel("Iteration")

        ax.legend()
        ax.grid(axis="both")
        ax.set_axisbelow(True)  # By default the axis is over the content.


make_reject_type_distribution_plot(reject_rate_dist_by_type)