In [1]:
import torch
import gymnasium as gym
from Dreamer import Dreamer
import matplotlib.pyplot as plt

In [2]:
# Force CPU for debugging to avoid memory issues
if torch.cuda.is_available():
    device = 'cuda'
    print("GPU selected")
else:
    device = 'cpu'
    print("CPU selected for debugging")

GPU selected


In [3]:
dreamer_agent = Dreamer(
    hidden_state_dims=100,
    latent_state_dims=(32, 32),
    observation_dims=(96, 96),
    action_dims=3,
    world_model_lr=2e-4,
    world_model_betas=(0.9,0.999),
    world_model_eps=1e-7,
    WM_epochs=100,
    beta_prediction=1.0,
    beta_dynamics=0.1,
    beta_representation=1.0,
    critic_reward_buckets=255,
    encoder_filter_num_1=32,
    encoder_filter_num_2=64,
    encoder_hidden_layer_nodes=400,
    decoder_filter_num_1=64,
    decoder_filter_num_2=32,
    decoder_hidden_layer_nodes=100,
    dyn_pred_hidden_num_nodes_1=100,
    dyn_pred_hidden_num_nodes_2=100,
    rew_pred_hidden_num_nodes_1=100,
    rew_pred_hidden_num_nodes_2=100,
    cont_pred_hidden_num_nodes_1=100,
    cont_pred_hidden_num_nodes_2=100,
    actor_lr=8e-5,
    actor_betas=(0.9,0.999),
    actor_eps=1e-7,
    critic_lr=8e-5,
    critic_betas=(0.9,0.999),
    critic_eps=1e-7,
    AC_epochs=10,
    hidden_layer_actor_1_size=100,
    hidden_layer_actor_2_size=100,
    hidden_layer_critic_1_size=100,
    hidden_layer_critic_2_size=100,
    horizon=15,
    batch_size=30,
    training_iterations=10,
    random_iterations=50,
    nu=0.995,
    lambda_=0.95,
    gamma=0.99,
    buffer_size=100000,
    sequence_length=30,
    seed=42,
    device=device
)

In [4]:
print(f"  - WorldModel params: {sum(p.numel() for p in dreamer_agent.world_model.parameters())}")
print(f"  - Agent params: {sum(p.numel() for p in dreamer_agent.agent.parameters())}")
print(f"  - Actor params: {sum(p.numel() for p in dreamer_agent.agent.actor.parameters())}")
print(f"  - Critic params: {sum(p.numel() for p in dreamer_agent.agent.critic.parameters())}")
print(f"  - Encoder params: {sum(p.numel() for p in dreamer_agent.world_model.encoder.parameters())}")
print(f"  - Decoder params: {sum(p.numel() for p in dreamer_agent.world_model.decoder.parameters())}")
print(f"  - Sequence model params: {sum(p.numel() for p in dreamer_agent.world_model.sequence_model.parameters())}")
print(f"  - Dynamics predictor params: {sum(p.numel() for p in dreamer_agent.world_model.dynamics_predictor.parameters())}")
print(f"  - Reward predictor params: {sum(p.numel() for p in dreamer_agent.world_model.reward_predictor.parameters())}")
print(f"  - Continue predictor params: {sum(p.numel() for p in dreamer_agent.world_model.continue_predictor.parameters())}")

  - WorldModel params: 3319310
  - Agent params: 271561
  - Actor params: 123206
  - Critic params: 148355
  - Encoder params: 572816
  - Decoder params: 2013114
  - Sequence model params: 338700
  - Dynamics predictor params: 123624
  - Reward predictor params: 148355
  - Continue predictor params: 122701


In [4]:
env_id = "CarRacing-v3"
env = gym.make(env_id, continuous=True)
evaluation_env = gym.make(env_id, continuous=True)

In [5]:
WM_loss_list, actor_loss_list, critic_loss_list, evaluation_list = dreamer_agent.train_dreamer(env, evaluation_env)

Starting Training...
Starting Random Kickstart.


Kickstarting Dreamer Agent.:   0%|          | 0/50 [00:00<?, ?it/s]

                                                                   

RuntimeError: Tensors must have same number of dimensions: got 3 and 1

In [None]:
plt.figure()
plt.plot(WM_loss_list)
plt.plot(actor_loss_list)
plt.plot(critic_loss_list)
plt.plot(evaluation_list)