In [2]:
import torch
import gymnasium as gym
from Dreamer import Dreamer
import matplotlib.pyplot as plt

In [3]:
# Force CPU for debugging to avoid memory issues
device = 'cpu'
print("CPU selected for debugging")

CPU selected for debugging


In [4]:
# Debug: Test each component separately to find the issue
print("Testing component creation to isolate the error...")

try:
    from WorldModel import WorldModel
    print("Testing WorldModel creation...")
    wm = WorldModel(
        hidden_dims=200,
        latent_dims=(32, 32),
        observation_dims=(96, 96),
        action_dims=3,
        training_horizon=15,
        batch_size=50,
        WM_lr=2e-4,
        WM_betas=(0.9,0.999),
        WM_eps=1e-7,
        beta_pred=1.0,
        beta_dyn=0.1,
        beta_rep=1.0,
        num_encoder_filters_1=32,
        num_encoder_filters_2=64,
        encoder_hidden_layer_nodes=400,
        num_decoder_filters_1=64,
        num_decoder_filters_2=32,
        decoder_hidden_layer_nodes=400,
        dyn_pred_hidden_num_nodes_1=400,
        dyn_pred_hidden_num_nodes_2=400,
        rew_pred_hidden_num_nodes_1=400,
        rew_pred_hidden_num_nodes_2=400,
        reward_buckets=255,
        cont_pred_hidden_num_nodes_1=400,
        cont_pred_hidden_num_nodes_2=400,
        device=device
    )
    print(f"✓ WorldModel created successfully with {sum(p.numel() for p in wm.parameters())} parameters")
    
    # Check individual components
    print(f"  - Encoder params: {sum(p.numel() for p in wm.encoder.parameters())}")
    print(f"  - Decoder params: {sum(p.numel() for p in wm.decoder.parameters())}")
    print(f"  - Sequence model params: {sum(p.numel() for p in wm.sequence_model.parameters())}")
    print(f"  - Dynamics predictor params: {sum(p.numel() for p in wm.dynamics_predictor.parameters())}")
    print(f"  - Reward predictor params: {sum(p.numel() for p in wm.reward_predictor.parameters())}")
    print(f"  - Continue predictor params: {sum(p.numel() for p in wm.continue_predictor.parameters())}")
    
except Exception as e:
    print(f"✗ WorldModel failed: {e}")

try:
    from Agent import Agent
    print("\nTesting Agent creation...")
    agent = Agent(
        action_dim=3,
        latent_dims=(32, 32),
        hidden_state_dim=200,
        HL_A1=400,
        HL_A2=400,
        HL_C1=400,
        HL_C2=400,
        critic_buckets=255,
        A_lr=8e-5,
        A_betas=(0.9,0.999),
        A_eps=1e-7,
        C_lr=8e-5,
        C_betas=(0.9,0.999),
        C_eps=1e-7,
        nu=0.995,
        lambda_=0.95,
        gamma=0.99,
        device=device
    )
    print(f"✓ Agent created successfully with {sum(p.numel() for p in agent.parameters())} parameters")
    print(f"  - Actor params: {sum(p.numel() for p in agent.actor.parameters())}")
    print(f"  - Critic params: {sum(p.numel() for p in agent.critic.parameters())}")
    
except Exception as e:
    print(f"✗ Agent failed: {e}")

try:
    from Buffer import Buffer
    print("\nTesting Buffer creation...")
    buffer = Buffer(
        buffer_size=500000,
        sequence_length=50,
        action_size=3,  # Fixed: changed from action_dims to action_size
        observation_dims=(96, 96),
        device=device
    )
    print("✓ Buffer created successfully")
    
except Exception as e:
    print(f"✗ Buffer failed: {e}")

print("\nComponent testing complete!")

Testing component creation to isolate the error...
Testing WorldModel creation...
✓ WorldModel created successfully with 11325310 parameters
  - Encoder params: 612816
  - Decoder params: 7920214
  - Sequence model params: 737400
  - Dynamics predictor params: 651424
  - Reward predictor params: 752655
  - Continue predictor params: 650801

Testing Agent creation...
✓ Agent created successfully with 1405461 parameters
  - Actor params: 652806
  - Critic params: 752655

Testing Buffer creation...
✓ WorldModel created successfully with 11325310 parameters
  - Encoder params: 612816
  - Decoder params: 7920214
  - Sequence model params: 737400
  - Dynamics predictor params: 651424
  - Reward predictor params: 752655
  - Continue predictor params: 650801

Testing Agent creation...
✓ Agent created successfully with 1405461 parameters
  - Actor params: 652806
  - Critic params: 752655

Testing Buffer creation...
✓ Buffer created successfully

Component testing complete!
✓ Buffer created succ

In [5]:
dreamer_agent = Dreamer(
    hidden_state_dims=200,
    latent_state_dims=(32, 32),
    observation_dims=(96, 96),
    action_dims=3,
    world_model_lr=2e-4,
    world_model_betas=(0.9,0.999),
    world_model_eps=1e-7,
    WM_epochs=100,
    beta_prediction=1.0,
    beta_dynamics=0.1,
    beta_representation=1.0,
    critic_reward_buckets=255,
    encoder_filter_num_1=32,
    encoder_filter_num_2=64,
    encoder_hidden_layer_nodes=400,
    decoder_filter_num_1=64,
    decoder_filter_num_2=32,
    decoder_hidden_layer_nodes=400,
    dyn_pred_hidden_num_nodes_1=400,
    dyn_pred_hidden_num_nodes_2=400,
    rew_pred_hidden_num_nodes_1=400,
    rew_pred_hidden_num_nodes_2=400,
    cont_pred_hidden_num_nodes_1=400,
    cont_pred_hidden_num_nodes_2=400,
    actor_lr=8e-5,
    actor_betas=(0.9,0.999),
    actor_eps=1e-7,
    critic_lr=8e-5,
    critic_betas=(0.9,0.999),
    critic_eps=1e-7,
    AC_epochs=100,
    hidden_layer_actor_1_size=400,
    hidden_layer_actor_2_size=400,
    hidden_layer_critic_1_size=400,
    hidden_layer_critic_2_size=400,
    horizon=15,
    batch_size=50,
    training_iterations=10,
    random_iterations=50,
    nu=0.995,
    lambda_=0.95,
    gamma=0.99,
    buffer_size=500000,
    sequence_length=50,
    seed=42,
    device=device
)

In [6]:
env_id = "CarRacing-v3"
env = gym.make(env_id, continuous=True)
evaluation_env = gym.make(env_id, continuous=True)

In [None]:
WM_loss_list, actor_loss_list, critic_loss_list, evaluation_list = dreamer_agent.train_dreamer(env, evaluation_env)

In [None]:
plt.figure()
plt.plot(WM_loss_list)
plt.plot(actor_loss_list)
plt.plot(critic_loss_list)
plt.plot(evaluation_list)