In [None]:
import gymnasium as gym
import numpy as np
import os
from stable_baselines3 import DDPG
from stable_baselines3.common.callbacks import CheckpointCallback, BaseCallback
from stable_baselines3.common.noise import NormalActionNoise
from stable_baselines3.common.monitor import Monitor

# Custom callback to log rewards
class RewardLogger(BaseCallback):
    def __init__(self, log_file_path, verbose=0):
        super(RewardLogger, self).__init__(verbose)
        # Create log directory if it doesn't exist
        os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
        self.log_file = open(log_file_path, "w")
        self.log_file.write("timestep,reward\n")  # CSV header
        self.episode_rewards = []
        self.episode_lengths = []
        self.total_timesteps = 0

    def _on_step(self) -> bool:

        if self.locals.get("dones"):
          
            if self.locals.get("dones")[0]:
                # Get episode info
                episode_info = self.locals.get("infos")[0].get("episode")
                if episode_info:
                    self.episode_rewards.append(episode_info["r"])
                    self.episode_lengths.append(episode_info["l"])
                    self.total_timesteps += episode_info["l"]
                    
                  
                    self.log_file.write(f"{self.total_timesteps},{episode_info['r']}\n")
                    self.log_file.flush()  
                    
                    if self.verbose > 0:
                        print(f"Timestep: {self.total_timesteps}, Episode Reward: {episode_info['r']}")
        return True
    
    def close(self):
        if self.log_file is not None:
            self.log_file.close()


env = gym.make("HalfCheetah-v5")

env = Monitor(env)


n_actions = env.action_space.shape[-1]
action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))


model = DDPG(
    "MlpPolicy", 
    env, 
    action_noise=action_noise,
    learning_rate=1e-3,
    buffer_size=700000,
    learning_starts=10000,
    batch_size=256,
    tau=0.005,
    gamma=0.99,
    train_freq=(1, "episode"),
    gradient_steps=-1,
    verbose=1
)


os.makedirs("./DDPGlogs/", exist_ok=True)

# Initialize our custom callback to log rewards
reward_logger = RewardLogger(log_file_path="./logs/ddpg_rewards.csv", verbose=1)

# Create a callback that saves the model every 100,000 steps
checkpoint_callback = CheckpointCallback(
    save_freq=100000, 
    save_path='./DDPGlogs/',
    name_prefix='ddpg_halfcheetah'
)

# Combine callbacks
callbacks = [checkpoint_callback, reward_logger]

# Train the model with the callbacks
model.learn(total_timesteps=700000, callback=callbacks)

# Save the final model
model.save("ddpg_halfcheetah_final")

# Make sure to close the logger
reward_logger.close()

Using cuda device
Wrapping the env in a DummyVecEnv.
Timestep: 1000, Episode Reward: -365.123669
Timestep: 2000, Episode Reward: -332.606008
Timestep: 3000, Episode Reward: -83.56559
Timestep: 4000, Episode Reward: -272.759661
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -264     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 3523     |
|    time_elapsed    | 1        |
|    total_timesteps | 4000     |
---------------------------------
Timestep: 5000, Episode Reward: -224.622577
Timestep: 6000, Episode Reward: -274.958887
Timestep: 7000, Episode Reward: -312.808609
Timestep: 8000, Episode Reward: -241.482016
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -263     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 3572     |
|    time_elapsed  

Collecting gymnasium
  Downloading Gymnasium-0.26.3-py3-none-any.whl (836 kB)
     |████████████████████████████████| 836 kB 5.3 MB/s            
[?25hCollecting gymnasium-notices>=0.0.1
  Downloading gymnasium_notices-0.0.1-py3-none-any.whl (2.8 kB)
Installing collected packages: gymnasium-notices, gymnasium
Successfully installed gymnasium-0.26.3 gymnasium-notices-0.0.1
