DDPG, TD3, SAC, A2C, PPO for continuous actions

# Import Packages

In [None]:
import gymnasium as gym
import matplotlib.pyplot as plt
from IPython import display
from stable_baselines3 import SAC, DDPG, TD3,PPO,A2C
from stable_baselines3.common.evaluation import evaluate_policy

# Create environment

In [None]:
# env = gym.make("LunarLander-v2", render_mode="human")
env = gym.make("LunarLanderContinuous", render_mode="")
"""
{'Swimmer', 'Taxi', 'InvertedPendulum',
 'Walker2d', 'Ant', 'Blackjack',
  'Pendulum', 'LunarLanderContinuous',
   'Pusher', 'CliffWalking', 'Acrobot',
    'MountainCar', 'BipedalWalker',
     'GymV21Environment', 'LunarLander',
      'InvertedDoublePendulum', 'FrozenLake8x8',
       'HumanoidStandup', 'Hopper', 'FrozenLake',
        'CarRacing', 'HalfCheetah', 'BipedalWalkerHardcore',
        'CartPole', 'MountainCarContinuous', 'Humanoid',
         'GymV26Environment', 'Reacher'}
"""

# Specify Agent

In [None]:
# Instantiate the agent
model = SAC(
    "MlpPolicy",
    env,
    verbose=1,
    #buffer_size=int(1e3),
    #learning_rate=1e-3,
    #gamma=0.95,
    #batch_size=256,
    tensorboard_log="./output/DRL/tensorboard/")

In [None]:
model = TD3("MlpPolicy", env, verbose=1,tensorboard_log="./output/DRL/tensorboard/")

In [None]:
model = DDPG("MlpPolicy", env, verbose=1,tensorboard_log="./output/DRL/tensorboard/")

In [None]:
model = A2C("MlpPolicy", env, verbose=1,tensorboard_log="./output/DRL/tensorboard/")

In [None]:
model=PPO("MlpPolicy", env, verbose=1,tensorboard_log="./PPO")

# Train the agent

In [None]:

env.reset()

# Train the agent and display a progress bar
model.learn(
    total_timesteps=int(2e6),#2e5
    progress_bar=True,
    log_interval=1,
)



# RUN ALL IN PARALLEL

In [None]:
from threading import Thread
import gymnasium as gym
import matplotlib.pyplot as plt
from IPython import display
from stable_baselines3 import SAC, DDPG, TD3,PPO,A2C

def get_env():
    return gym.make("LunarLanderContinuous", render_mode="")

def do(model):
    model.learn(
        total_timesteps=int(1e6),
        progress_bar=False,
        log_interval=1,
    )
    print("done：",model)

agents ={
    "SAC":SAC("MlpPolicy", get_env(), verbose=0,tensorboard_log="./output/DRL/tensorboard/",stats_window_size=10),
    "TD3":TD3("MlpPolicy", get_env(), verbose=0,tensorboard_log="./output/DRL/tensorboard/",stats_window_size=10),
    "DDPG":DDPG("MlpPolicy", get_env(), verbose=0,tensorboard_log="./output/DRL/tensorboard/",stats_window_size=10),
    "A2C":A2C("MlpPolicy", get_env(), verbose=0,tensorboard_log="./output/DRL/tensorboard/",stats_window_size=10),
    "PPO":PPO("MlpPolicy", get_env(), verbose=0,tensorboard_log="./output/DRL/tensorboard/",stats_window_size=10),
}

for name,agent in agents.items():
    print(name)
    t = Thread(target=do, args=(agent,))
    t.start()

print("all done！")

# FIGURES

In [None]:
import numpy as np

def average_filter(data, window_size):
    """平均滤波函数
    
    Args:
        data (np.ndarray): 输入的数据，shape为(n,)
        window_size (int): 窗口大小
        
    Returns:
        np.ndarray: 平滑后的数据，shape为(n,)
    """
    result = np.zeros_like(data)
    for i in range(window_size // 2, len(data) - window_size // 2):
        result[i] = np.mean(data[i - window_size // 2: i + window_size // 2])
    return result

def expo_filter(data, alpha):
    """指数滤波函数
    
    Args:
        data (np.ndarray): 输入的数据，shape为(n,)
        alpha (float): 平滑因子，取值在(0, 1]之间
        
    Returns:
        np.ndarray: 平滑后的数据，shape为(n,)
    """
    result = np.zeros_like(data)
    result[0] = data[0]
    for i in range(1, len(data)):
        result[i] = alpha * data[i] + (1 - alpha) * result[i - 1]
    return result

def find_nearest(array,v):
    """找到array中距离v最近的值的索引
    
    Args:
        array (np.ndarray): 输入的数据，shape为(n,)
        v (float): 目标值
        
    Returns:
        int: 最近的值的索引
    """
    return np.argmin(np.abs(array - v))

In [None]:
import json
import random
import os
import numpy as np
import matplotlib.pyplot as plt



def load(filepath):
    with open(filepath, 'r') as f:
        agent_reward = json.load(f)
    timestamp = [t[1] for t in agent_reward]
    reward = [t[2] for t in agent_reward]
    return np.array(timestamp), np.array(reward)

#遍历文件夹中的文件
def walkFile(file,filter=False):
    for root, dirs, files in os.walk(file):
        for f in files:
            if f.endswith(".json"):
                filepath = os.path.join(root, f)
                print(filepath)
                timestamp, reward = load(filepath)
                if filter:
                    # 平均滤波
                    reward = average_filter(reward, 5)
                    # 指数滤波
                    #reward = expo_filter(reward, 0.1)

                plot(timestamp, reward, label=f.split(".")[0])
                



def plot(timestamp, reward, label):
    color = next(colors)
    plt.plot(timestamp, reward,c=color, label=label)
    draw_shadow(timestamp,reward,color)


def draw_shadow(x, y,c,window_size=20):

    # Iterate through the data with a sliding window
    for i in range(0,len(x) - window_size + 1,window_size):
        data = y[i:i + window_size]
        #mean = np.mean(data)
        std = np.std(data)*5
        # Plot the shadow
        plt.fill_between(x[i:i + window_size], data - std/2, data + std/2, alpha=0.2, color=c)
        
# Create a color cycle iterator
colors = iter(plt.cm.Set1(np.linspace(0, 1, 5))) 
plt.figure(figsize=(12, 4), dpi=300)
walkFile("./output/DRL/10/avg_reward")
plt.xlabel("Time Steps")
plt.ylabel("Average Reward")
plt.ticklabel_format(style='plain', axis='x')
plt.legend(loc="lower right")
plt.grid()
#Y轴范围
plt.ylim(-1000, 500)
#X轴范围
plt.xlim(0, 1e6)
plt.savefig("./output/DRL/avg_reward.png")
plt.tight_layout()
plt.show()

# Create a color cycle iterator
colors = iter(plt.cm.Set1(np.linspace(0, 1, 5))) 
plt.figure(figsize=(12, 4), dpi=300)
walkFile("./output/DRL/10/survival_episode",filter=True)
plt.xlabel("Time Steps")
plt.ylabel("Average Survival Episodes")
plt.ticklabel_format(style='plain', axis='x')
plt.legend(loc="lower right")
plt.grid()
#Y轴范围
plt.ylim(-400, 1200)
#X轴范围
plt.xlim(0, 1e6)
plt.savefig("./output/DRL/survival_episode.png")
plt.tight_layout()
plt.show()