In [None]:
import json

import angorapy as ang
import angorapy.environments
import bokeh
import numpy as np
from bokeh.io import output_notebook, show

from angorapy.analysis.investigation import Investigator
from angorapy.common.const import PATH_TO_EXPERIMENTS
from angorapy.utilities.monitor import training_plots as plots
output_notebook()

agent = ang.agent.PPOAgent.from_agent_state(1653053413, from_iteration="best")
investigator = Investigator.from_agent(agent)

## Learning Curves & Statistics
First things first, let us see how training of this agent went. We can easily plot the reward over training cycles, as follows

In [None]:
with open(f"{PATH_TO_EXPERIMENTS}/{agent.agent_id}/progress.json", "r") as f:
    progress_data = json.load(f)

reward_plot = plots.plot_reward_progress(progress_data["rewards"], [])
show(reward_plot)

And the same we can do with the average episode length per cycle.

In [None]:
with open(f"{PATH_TO_EXPERIMENTS}/{agent.agent_id}/progress.json", "r") as f:
    progress_data = json.load(f)

length_plot = plots.plot_length_progress(progress_data["lengths"], [])
show(length_plot)

## Evaluation

The above plots already show how performance progressed throughout training. Note, however, that AngoraPy trains agents with stochastic policies and, during training, encourages exploration. Thus, the above performance at every cycle is not entirely representative of the agents true potential. To evaluate its real performance, let us run 10 episodes without random exploration.

In [None]:
stats_eval = agent.evaluate(10, act_confidently=False)[0]  # todo act confidently

In [None]:
print(np.mean(stats_eval.episode_rewards))

## Rendering
Lastly, let us inspect qualitatively the behaviour of the agent, by rendering a few episodes.

In [None]:
for _ in range(10):
    investigator.render_episode(agent.env)