In [None]:
import gymnasium as gym
import numpy as np
import os
from stable_baselines3 import DDPG
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.noise import NormalActionNoise

class TrainingLogger(BaseCallback):
    def __init__(self, log_filepath="halfcheetah_ddpg_training.txt", save_dir="model_checkpoints", 
                 save_freq=50000, model_prefix="halfcheetah_ddpg"):
        super().__init__(verbose=0)
        self.log_filepath = log_filepath
        self.save_dir = save_dir
        self.save_freq = save_freq
        self.model_prefix = model_prefix
        self.episode_total_reward = 0
        self.control_costs = 0
        self.last_save = 0
        
        # Create save directory if it doesn't exist
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        
    def _on_step(self):
        # Save model at regular intervals
        if self.num_timesteps >= self.last_save + self.save_freq:
            try:
                # Save the model
                model_path = os.path.join(self.save_dir, 
                                        f"{self.model_prefix}_{self.num_timesteps}")
                self.model.save(model_path)
                print(f"Model saved at {model_path}")
                self.last_save = self.num_timesteps
            except Exception as e:
                print(f"Error saving model at timestep {self.num_timesteps}: {e}")
        
        # Log rewards and metrics
        current_reward = self.locals.get("rewards")[0] if self.locals.get("rewards") is not None else 0
        self.episode_total_reward += current_reward
        
        environment_info = self.locals.get("infos")[0] if self.locals.get("infos") is not None else {}
        episode_done = self.locals.get("dones")[0] if self.locals.get("dones") is not None else False
        
        # Track control costs
        if "reward_ctrl" in environment_info:
            self.control_costs += abs(environment_info["reward_ctrl"])
        
        if episode_done:
            distance_traveled = environment_info.get("x_position", 0)
            with open(self.log_filepath, "a") as log_file:
                log_file.write(f"{self.num_timesteps},{self.episode_total_reward:.4f},{distance_traveled:.4f},{self.control_costs:.4f}\n")
            self.episode_total_reward = 0
            self.control_costs = 0
        return True
    
    def _on_training_start(self):
        with open(self.log_filepath, "w") as log_file:
            log_file.write("timestep,reward,distance,control_cost\n")

def train_halfcheetah_with_ddpg():
    try:
        # Setup environment - using standard environment without reward modification
        print("Creating HalfCheetah environment...")
        cheetah_environment = gym.make("HalfCheetah-v4")

        # Action noise for exploration (important for DDPG)
        n_actions = cheetah_environment.action_space.shape[0]
        action_noise = NormalActionNoise(
            mean=np.zeros(n_actions),
            sigma=0.1 * np.ones(n_actions)
        )

        # Initialize DDPG model with default hyperparameters
        print("Initializing DDPG model...")
       
        training_model = DDPG(
            policy="MlpPolicy",
            env=cheetah_environment,
            action_noise=action_noise,
            buffer_size=100000,
            learning_rate=1e-3,
            batch_size=256,
            gamma=0.99,
            verbose=1
        )

        # Setup logger with checkpoint saving capability
        print("Setting up training logger...")
        progress_logger = TrainingLogger(
            log_filepath="halfcheetah_ddpg_training.txt",
            save_dir="model_checkpoints",
            save_freq=50000,
            model_prefix="halfcheetah_ddpg"
        )

        # Train the model
        print("Starting training for 1,000,000 timesteps...")
        training_model.learn(total_timesteps=1000000, callback=progress_logger)

        # Save the final model
        print("Saving final trained model...")
        training_model.save("halfcheetah_ddpg_final")

        print("Training completed successfully!")
        print(f"Training logs saved to {progress_logger.log_filepath}")
        print(f"Model checkpoints saved in {progress_logger.save_dir}")
        
    except Exception as e:
        print(f"Training error occurred: {str(e)}")
        print("Training was interrupted, but intermediate models should be saved.")

if __name__ == "__main__":
    train_halfcheetah_with_ddpg()

Creating HalfCheetah environment...


  logger.deprecation(


Initializing DDPG model...
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Setting up training logger...
Starting training for 1,000,000 timesteps...
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -478     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 13       |
|    time_elapsed    | 291      |
|    total_timesteps | 4000     |
| train/             |          |
|    actor_loss      | 0.357    |
|    critic_loss     | 0.224    |
|    learning_rate   | 0.001    |
|    n_updates       | 3899     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -430     |
| time/              |          |
|    episodes        | 8        |
|    fps             | 13       |
|    time_elapsed    | 590      |
|    total_timesteps 

In [None]:
import gymnasium as gym
import numpy as np
import os
from stable_baselines3 import DDPG
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.noise import NormalActionNoise

class TrainingLogger(BaseCallback):
    def __init__(self, log_filepath="halfcheetah_ddpg_training2.txt", save_dir="model_checkpoints", 
                 save_freq=50000, model_prefix="halfcheetah_ddpg2"):
        super().__init__(verbose=0)
        self.log_filepath = log_filepath
        self.save_dir = save_dir
        self.save_freq = save_freq
        self.model_prefix = model_prefix
        self.episode_total_reward = 0
        self.control_costs = 0
        self.last_save = 0
        
        # Create save directory if it doesn't exist
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        
    def _on_step(self):
        # Save model at regular intervals
        if self.num_timesteps >= self.last_save + self.save_freq:
            try:
                # Save the model
                model_path = os.path.join(self.save_dir, 
                                        f"{self.model_prefix}_{self.num_timesteps}")
                self.model.save(model_path)
                print(f"Model saved at {model_path}")
                self.last_save = self.num_timesteps
            except Exception as e:
                print(f"Error saving model at timestep {self.num_timesteps}: {e}")
        
        # Log rewards and metrics
        current_reward = self.locals.get("rewards")[0] if self.locals.get("rewards") is not None else 0
        self.episode_total_reward += current_reward
        
        environment_info = self.locals.get("infos")[0] if self.locals.get("infos") is not None else {}
        episode_done = self.locals.get("dones")[0] if self.locals.get("dones") is not None else False
        
        # Track control costs
        if "reward_ctrl" in environment_info:
            self.control_costs += abs(environment_info["reward_ctrl"])
        
        if episode_done:
            distance_traveled = environment_info.get("x_position", 0)
            with open(self.log_filepath, "a") as log_file:
                log_file.write(f"{self.num_timesteps},{self.episode_total_reward:.4f},{distance_traveled:.4f},{self.control_costs:.4f}\n")
            self.episode_total_reward = 0
            self.control_costs = 0
        return True
    
    def _on_training_start(self):
        with open(self.log_filepath, "w") as log_file:
            log_file.write("timestep,reward,distance,control_cost\n")

def train_halfcheetah_with_ddpg():
    try:
        # Setup environment - using standard environment without reward modification
        print("Creating HalfCheetah environment...")
        cheetah_environment = gym.make("HalfCheetah-v4")

        # Action noise for exploration (important for DDPG)
        n_actions = cheetah_environment.action_space.shape[0]
        action_noise = NormalActionNoise(
            mean=np.zeros(n_actions),
            sigma=0.1 * np.ones(n_actions)
        )

        # Initialize DDPG model with default hyperparameters
        print("Initializing DDPG model...")
       
        training_model = DDPG(
            policy="MlpPolicy",
            env=cheetah_environment,
            action_noise=action_noise,
            buffer_size=100000,
            learning_rate=1e-3,
            batch_size=256,
            gamma=0.99,
            verbose=1
        )

        # Setup logger with checkpoint saving capability
        print("Setting up training logger...")
        progress_logger = TrainingLogger(
            log_filepath="halfcheetah_ddpg_training2.txt",
            save_dir="model_checkpoints",
            save_freq=50000,
            model_prefix="halfcheetah_ddpg2"
        )

        # Train the model
        print("Starting training for 1,000,000 timesteps...")
        training_model.learn(total_timesteps=1000000, callback=progress_logger)

        # Save the final model
        print("Saving final trained model...")
        training_model.save("halfcheetah_ddpg_final2")

        print("Training completed successfully!")
        print(f"Training logs saved to {progress_logger.log_filepath}")
        print(f"Model checkpoints saved in {progress_logger.save_dir}")
        
    except Exception as e:
        print(f"Training error occurred: {str(e)}")
        print("Training was interrupted, but intermediate models should be saved.")

if __name__ == "__main__":
    train_halfcheetah_with_ddpg()

In [1]:
import gymnasium as gym
import numpy as np
import os
from stable_baselines3 import DDPG
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.noise import NormalActionNoise

class TrainingLogger(BaseCallback):
    def __init__(self, log_filepath="halfcheetah_ddpg_training3.txt", save_dir="model_checkpoints", 
                 save_freq=50000, model_prefix="halfcheetah_ddpg3"):
        super().__init__(verbose=0)
        self.log_filepath = log_filepath
        self.save_dir = save_dir
        self.save_freq = save_freq
        self.model_prefix = model_prefix
        self.episode_total_reward = 0
        self.control_costs = 0
        self.last_save = 0
        
        # Create save directory if it doesn't exist
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        
    def _on_step(self):
        # Save model at regular intervals
        if self.num_timesteps >= self.last_save + self.save_freq:
            try:
                # Save the model
                model_path = os.path.join(self.save_dir, 
                                        f"{self.model_prefix}_{self.num_timesteps}")
                self.model.save(model_path)
                print(f"Model saved at {model_path}")
                self.last_save = self.num_timesteps
            except Exception as e:
                print(f"Error saving model at timestep {self.num_timesteps}: {e}")
        
        # Log rewards and metrics
        current_reward = self.locals.get("rewards")[0] if self.locals.get("rewards") is not None else 0
        self.episode_total_reward += current_reward
        
        environment_info = self.locals.get("infos")[0] if self.locals.get("infos") is not None else {}
        episode_done = self.locals.get("dones")[0] if self.locals.get("dones") is not None else False
        
        # Track control costs
        if "reward_ctrl" in environment_info:
            self.control_costs += abs(environment_info["reward_ctrl"])
        
        if episode_done:
            distance_traveled = environment_info.get("x_position", 0)
            with open(self.log_filepath, "a") as log_file:
                log_file.write(f"{self.num_timesteps},{self.episode_total_reward:.4f},{distance_traveled:.4f},{self.control_costs:.4f}\n")
            self.episode_total_reward = 0
            self.control_costs = 0
        return True
    
    def _on_training_start(self):
        with open(self.log_filepath, "w") as log_file:
            log_file.write("timestep,reward,distance,control_cost\n")

def train_halfcheetah_with_ddpg():
    try:
        # Setup environment - using standard environment without reward modification
        print("Creating HalfCheetah environment...")
        cheetah_environment = gym.make("HalfCheetah-v4")

        # Action noise for exploration (important for DDPG)
        n_actions = cheetah_environment.action_space.shape[0]
        action_noise = NormalActionNoise(
            mean=np.zeros(n_actions),
            sigma=0.1 * np.ones(n_actions)
        )

        # Initialize DDPG model with default hyperparameters
        print("Initializing DDPG model...")
       
        training_model = DDPG(
            policy="MlpPolicy",
            env=cheetah_environment,
            action_noise=action_noise,
            buffer_size=100000,
            learning_rate=1e-3,
            batch_size=256,
            gamma=0.99,
            verbose=1
        )

        # Setup logger with checkpoint saving capability
        print("Setting up training logger...")
        progress_logger = TrainingLogger(
            log_filepath="halfcheetah_ddpg_training3.txt",
            save_dir="model_checkpoints",
            save_freq=50000,
            model_prefix="halfcheetah_ddpg3"
        )

        # Train the model
        print("Starting training for 1,000,000 timesteps...")
        training_model.learn(total_timesteps=1000000, callback=progress_logger)

        # Save the final model
        print("Saving final trained model...")
        training_model.save("halfcheetah_ddpg_final2")

        print("Training completed successfully!")
        print(f"Training logs saved to {progress_logger.log_filepath}")
        print(f"Model checkpoints saved in {progress_logger.save_dir}")
        
    except Exception as e:
        print(f"Training error occurred: {str(e)}")
        print("Training was interrupted, but intermediate models should be saved.")

if __name__ == "__main__":
    train_halfcheetah_with_ddpg()

Creating HalfCheetah environment...


  logger.deprecation(


Initializing DDPG model...
Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Setting up training logger...
Starting training for 1,000,000 timesteps...
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -200     |
| time/              |          |
|    episodes        | 4        |
|    fps             | 135      |
|    time_elapsed    | 29       |
|    total_timesteps | 4000     |
| train/             |          |
|    actor_loss      | -3.98    |
|    critic_loss     | 0.266    |
|    learning_rate   | 0.001    |
|    n_updates       | 3899     |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1e+03    |
|    ep_rew_mean     | -57.5    |
| time/              |          |
|    episodes        | 8        |
|    fps             | 120      |
|    time_elapsed    | 66       |
|    total_timesteps 