In [2]:
from models.actor_critic import PPOActorCritic
import gymnasium as gym
import numpy as np
import torch
from itertools import count

## Model From Scratch

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

env = gym.make('Pendulum-v1', render_mode='human')
ac = PPOActorCritic(in_features=4).to(device)
ac.load_state_dict(torch.load("saved_models/ac_model.pth"))
ac.eval()

num_episodes = 1

for episode in range(num_episodes):
    states, info = env.reset()
    for t in count():
        states = torch.from_numpy(states).float().to(device)
        t_feature = torch.tensor([t], device=device)
        states = torch.cat((states, t_feature), dim=-1)

        with torch.no_grad():
            action, log_prob, pre_squashed_action, state_values = ac.get_action_and_value(states, deterministic=True)

        new_states, rew, terminated, truncated, info = env.step(action.to('cpu').numpy())
        states = new_states

        if truncated or terminated:
            break

## Model built with TorchRL

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

env = gym.make('Pendulum-v1', render_mode='human')
model = torch.export.load("saved_models/model.pt2").module().to(device)

num_episodes = 5

for episode in range(num_episodes):
    states, info = env.reset()
    for t in count():
        states = torch.from_numpy(states).float().to(device)
        t_feature = torch.tensor([t/200], device=device)
        states = torch.cat((states, t_feature), dim=-1)


        action = model(observation=states)

        new_states, rew, terminated, truncated, info = env.step(action.to('cpu').numpy())
        states = new_states

        if truncated or terminated:
            break

In [13]:
env.close()