# Loading an Agent for Inference

In [1]:
import os

import imageio
import gymnasium as gym
import numpy as np
import torch
from agilerl.algorithms.ppo import PPO

## Define the path were model is saved

In [2]:
save_path = "PPO_cartpole_trained_agent.pt"

In [3]:
# Set-up the device
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
ppo = PPO.load(save_path, device=device)

## Test loop for inference

In [5]:
test_env = gym.make("CartPole-v1", render_mode="rgb_array")
rewards = []
frames = []
testing_eps = 7
max_testing_steps = 1000
state_is_a_RGB_image = False

with torch.no_grad():
    for ep in range(testing_eps):
        state = test_env.reset()[0]  # Reset environment at start of episode
        score = 0

        for step in range(max_testing_steps):
            # If your state is an RGB image
            if state_is_a_RGB_image:
                state = np.moveaxis(state, [-1], [-3])

            # Get next action from agent
            action, *_ = ppo.get_action(state)
            action = action.squeeze()

            # Save the frame for this step and append to frames list
            frame = test_env.render()
            frames.append(frame)

            # Take the action in the environment
            state, reward, terminated, truncated, _ = test_env.step(action)

            # Collect the score
            score += reward

            # Break if environment 0 is done or truncated
            if terminated or truncated:
                break

        # Collect and print episodic reward
        rewards.append(score)
        print("-" * 15, f"Episode: {ep}", "-" * 15)
        print("Episodic Reward: ", rewards[-1])

    test_env.close()

--------------- Episode: 0 ---------------
Episodic Reward:  500.0
--------------- Episode: 1 ---------------
Episodic Reward:  500.0
--------------- Episode: 2 ---------------
Episodic Reward:  500.0
--------------- Episode: 3 ---------------
Episodic Reward:  500.0
--------------- Episode: 4 ---------------
Episodic Reward:  500.0
--------------- Episode: 5 ---------------
Episodic Reward:  500.0
--------------- Episode: 6 ---------------
Episodic Reward:  500.0


## Save and render episodes as a gif

In [6]:
gif_path = "./videos/"
os.makedirs(gif_path, exist_ok=True)
imageio.mimwrite(os.path.join("./videos/", "ppo_agilerl_cartpole.gif"), frames, loop=0)
mean_fitness = np.mean(rewards)