In [None]:
import gymnasium as gym
import matplotlib.pyplot as plt
import panda_gym
from agents.on_policy.a2c_agent import A2CAgent
from agents.off_policy.ddpg_agent import DDPGAgent
from agents.on_policy.a2c_discrete_agent import A2CDiscreteAgent
from agents.on_policy.a2c_n_step_ahead_agent import A2CNStepAheadAgent

In [None]:
env = gym.make("PandaPushDense-v3")

episodes = 40000
verbose = 2

num_states = env.observation_space['observation'].shape[0] + env.observation_space['desired_goal'].shape[0]
num_actions = env.action_space.shape[0]
upper_bound = env.action_space.high[0]
lower_bound = env.action_space.low[0]

a2c_agent = A2CAgent(num_states, num_actions, lower_bound, upper_bound)

rewards_a2c = a2c_agent.train_agent(env, episodes, verbose)

discrete_agent = A2CDiscreteAgent(num_states, num_actions, lower_bound, upper_bound)

rewards_discrete = discrete_agent.train_agent(env, episodes, verbose)

ddpg_agent = DDPGAgent(num_states, num_actions, lower_bound, upper_bound) 

rewards_ddpg = ddpg_agent.train_agent(env, episodes, verbose)

env.close()

In [None]:
import pandas as pd

def plot_rewards_with_average(rewards_list, colors, labels_list, window_size=100):
    plt.figure(figsize=(10,5))
    
    for i, rewards in enumerate(rewards_list):
        rewards_series = pd.Series(rewards)
        avg_rewards = rewards_series.rolling(window=window_size, min_periods=1).mean()
        plt.plot(range(len(rewards)), avg_rewards, color=colors[i], label=labels_list[i])
        
        indexes = [i * 3001 for i in range(int(len(rewards_ddpg)/3001+1))]
        indexes.append(len(avg_rewards)-1)
        val = [avg_rewards[k] for k in indexes]
        
        for ind, v in zip(indexes, val):
            plt.annotate(f'{v:.2f}', (ind, v),
                         textcoords="offset points",
                         xytext=(0,20),
                         ha='center',
                         color=colors[i],
                         arrowprops=dict(arrowstyle='->', color=colors[i]))


    plt.title('Rewards and Rolling Average ({})'.format(window_size))
    plt.xlabel('Episodes')
    plt.ylabel('Rewards')
    plt.legend()
    plt.show()

# Now you can use this function like this:
colors = ['blue', 'orange', 'red', 'green', 'purple']
labels_list = ['A2C', 'Discrete A2C', 'DDPG']
rewards_list = [rewards_a2c, rewards_discrete, rewards_ddpg]
plot_rewards_with_average(rewards_list, colors, labels_list, 3000)