In [None]:
#SAC
import gymnasium as gym
import numpy as np
import math
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import CheckpointCallback
import pde_control_gym
from pde_control_gym.src import TunedReward1D

# 物理参数配置
T = 5
dt = 1e-3
dx = 0.01
X = 1
lamArr = 5.5

def solveBetaFunction(x, gamma):
    beta = np.zeros(len(x), dtype=np.float32)
    for idx, val in enumerate(x):
        beta[idx] = 5 * math.cos(gamma * math.acos(val))
    return beta

# PDE环境参数
env_params = {
    "T": T,
    "dt": dt,
    "X": X,
    "dx": dx,
    "lamArr": lamArr,
    "reward_class": TunedReward1D(int(T / dt), -1e3, 3e2),
    "normalize": True,
    "sensing_loc": "full",
    "control_type": "Dirchilet",
    "sensing_noise_func": lambda state: state,
    "max_state_value": 1e10,
    "max_control_value": 20,
    "reset_init_condition_func": lambda nx: np.ones(nx) * np.random.uniform(1, 10),
    "reset_recirculation_func": lambda nx, lamArr: solveBetaFunction(np.linspace(0, 1, nx), lamArr),
    "control_sample_rate": 0.05,
}

def main():
    # 创建环境
    env = gym.make("PDEControlGym-TransportPDE1D", **env_params)
    
    
    
    # 模型配置：默认策略
    model = SAC(
        "MlpPolicy",
        env=env,
        learning_rate=1e-4,
        buffer_size=100000,
        batch_size=256,
        tau=0.005,
        gamma=0.99,
        verbose=1,
        tensorboard_log="./tb_SAC/",
    )
    
    # 回调函数
    checkpoint_callback = CheckpointCallback(
        save_freq=100000,
        save_path="./logs_SAC/",
        name_prefix="sac_model",
        save_replay_buffer=True,
        save_vecnormalize=True,
    )
    
    # 训练与保存
    
    model.learn(total_timesteps=1e5, callback=checkpoint_callback)
    
    env.close()
   
   
if __name__ == "__main__":
    main()

In [None]:
#NOSAC
import gymnasium as gym
import numpy as np
import math
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import CheckpointCallback
import pde_control_gym
from pde_control_gym.src import TunedReward1D

# 物理参数配置
T = 5
dt = 1e-3
dx = 0.01
X = 1
lamArr = 5.5

def solveBetaFunction(x, gamma):
    beta = np.zeros(len(x), dtype=np.float32)
    for idx, val in enumerate(x):
        beta[idx] = 5 * math.cos(gamma * math.acos(val))
    return beta

# PDE环境参数
env_params = {
    "T": T,
    "dt": dt,
    "X": X,
    "dx": dx,
    "lamArr": lamArr,
    "reward_class": TunedReward1D(int(T / dt), -1e3, 3e2),
    "normalize": True,
    "sensing_loc": "full",
    "control_type": "Dirchilet",
    "sensing_noise_func": lambda state: state,
    "max_state_value": 1e10,
    "max_control_value": 20,
    "reset_init_condition_func": lambda nx: np.ones(nx) * np.random.uniform(1, 10),
    "reset_recirculation_func": lambda nx, lamArr: solveBetaFunction(np.linspace(0, 1, nx), lamArr),
    "control_sample_rate": 0.05,
}

def main():
    # 创建环境
    env = gym.make("PDEControlGym-TransportPDE1D", **env_params)
    
    # 导入DeepONet特征提取器
    try:
        from DeepONeth import CustomFeatureExtractor
    except ImportError:
        print("错误: 无法导入DeepONet的CustomFeatureExtractor，请检查DeepONetcopy模块")
        return
    
    # 策略参数配置
    policy_kwargs = {
        "features_extractor_class": CustomFeatureExtractor,
        "features_extractor_kwargs": {"features_dim": 101},
    }
    
    # 模型配置
    model = SAC(
        "MlpPolicy",
        env=env,
        policy_kwargs=policy_kwargs,
        learning_rate=1e-4,
        buffer_size=100000,
        batch_size=256,
        tau=0.005,
        gamma=0.99,
        verbose=1,
        tensorboard_log="./tb_NOSAC/",
    )
    
    # 回调函数
    checkpoint_callback = CheckpointCallback(
        save_freq=100000,
        save_path="./logs_NOSAC/",
        name_prefix="sac_model_deeponet",
    )
    
  
    model.learn(total_timesteps=1e5, callback=checkpoint_callback)
    env.close()
    

if __name__ == "__main__":
    main()

In [None]:
#NOSAC_training
import gymnasium as gym
import numpy as np
import math
from stable_baselines3 import SAC
from stable_baselines3.common.callbacks import CheckpointCallback
import pde_control_gym
from pde_control_gym.src import TunedReward1D

# 物理参数配置
T = 5
dt = 1e-3
dx = 0.01
X = 1
lamArr = 5.5

def solveBetaFunction(x, gamma):
    beta = np.zeros(len(x), dtype=np.float32)
    for idx, val in enumerate(x):
        beta[idx] = 5 * math.cos(gamma * math.acos(val))
    return beta

# PDE环境参数
env_params = {
    "T": T,
    "dt": dt,
    "X": X,
    "dx": dx,
    "lamArr": lamArr,
    "reward_class": TunedReward1D(int(T / dt), -1e3, 3e2),
    "normalize": True,
    "sensing_loc": "full",
    "control_type": "Dirchilet",
    "sensing_noise_func": lambda state: state,
    "max_state_value": 1e10,
    "max_control_value": 20,
    "reset_init_condition_func": lambda nx: np.ones(nx) * np.random.uniform(1, 10),
    "reset_recirculation_func": lambda nx, lamArr: solveBetaFunction(np.linspace(0, 1, nx), lamArr),
    "control_sample_rate": 0.05,
}

def main():
    # 创建环境
    env = gym.make("PDEControlGym-TransportPDE1D", **env_params)
    
    # 导入RONet特征提取器
    try:
        from RONeth import CustomFeatureExtractor
    except ImportError:
        print("错误: 无法导入RONet的CustomFeatureExtractor，请检查RONetcopy模块")
        return
    
    # 策略参数配置
    policy_kwargs = {
        "features_extractor_class": CustomFeatureExtractor,
        "features_extractor_kwargs": {"features_dim": 101},
    }
    
    # 模型配置
    model = SAC(
        "MlpPolicy",
        env=env,
        policy_kwargs=policy_kwargs,
        learning_rate=1e-4,
        buffer_size=100000,
        batch_size=256,
        tau=0.002,
        gamma=0.99,
        verbose=1,
        tensorboard_log="./tb_NOSACtraining/",
    )
    
    # 回调函数
    checkpoint_callback = CheckpointCallback(
        save_freq=100000,
        save_path="./logs_NOSACtraining/",
        name_prefix="sac_model_ronet",
    )
    
    
    model.learn(total_timesteps=1e5, callback=checkpoint_callback)
    env.close()
  

if __name__ == "__main__":
    main()