Skip to content

Commit

Permalink
v1.0.0: modified reset() functionaties of PDE envs, fixed several min…
Browse files Browse the repository at this point in the history
…or issues
  • Loading branch information
xiangyuan-zhang committed Apr 2, 2024
1 parent 89c467c commit c8a7b12
Show file tree
Hide file tree
Showing 19 changed files with 861 additions and 689 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# controlgym

## Description
`Controlgym` provides 36 safety-critical industrial control environments and 10 infinite-dimensional PDE-based control problems with continuous, unbounded action and observation spaces that are inspired by real-world applications. This project supports the Learning for Dynamics & Control (L4DC) community, focusing on vital issues: convergence of reinforcement learning (RL) algorithms in policy development, stability, safety, and robustness of learning-based controllers, and the scalability of RL algorithms to high and potentially infinite-dimensional systems. We provide a detailed description of `controlgym` in [this paper](https://arxiv.org/abs/2311.18736).
`Controlgym` provides 36 industrial control environments and 10 infinite-dimensional PDE-based control problems with continuous, unbounded action and observation spaces that are inspired by real-world applications. This project supports the Learning for Dynamics & Control (L4DC) community, focusing on vital issues: convergence of reinforcement learning (RL) algorithms in policy development, stability, and robustness of learning-based controllers, and the scalability of RL algorithms to high and potentially infinite-dimensional systems. We provide a detailed description of `controlgym` in [this paper](https://arxiv.org/abs/2311.18736).

<p align="center">
<img src="figures/gallery.jpeg" alt="" width="700px">
Expand Down Expand Up @@ -80,11 +80,11 @@ conda deactivate
Check out our code examples in this [Jupyter notebook file](./examples.ipynb).

## Reference
- Zhang, X., Mao, W., Mowlavi, S., Benosman, M., & Başar, T. (2023). [Controlgym: Large-Scale Safety-Critical Control Environments for Benchmarking Reinforcement Learning Algorithms.](https://arxiv.org/abs/2311.18736) arXiv preprint arXiv:2311.18736.
- Zhang, X., Mao, W., Mowlavi, S., Benosman, M., & Başar, T. (2023). [Controlgym: Large-Scale Control Environments for Benchmarking Reinforcement Learning Algorithms.](https://arxiv.org/abs/2311.18736) arXiv preprint arXiv:2311.18736.

```bibtex
@article{zhang2023controlgym,
title = {Controlgym: Large-Scale Safety-Critical Control Environments for Benchmarking Reinforcement Learning Algorithms},
title = {Controlgym: Large-Scale Control Environments for Benchmarking Reinforcement Learning Algorithms},
author = {Zhang, Xiangyuan and Mao, Weichao and Mowlavi, Saviz and Benosman, Mouhacine and Ba{\c{s}}ar, Tamer},
journal = {arXiv preprint arXiv:2311.18736},
year = {2023}
Expand Down
3 changes: 2 additions & 1 deletion controlgym/controllers/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ def run(self, state: np.ndarray[float] = None, seed: int = None):
"""
# reset the environment
observation, info = self.env.reset(seed=seed, state=state)
torch.manual_seed(seed=seed)
if seed is not None:
torch.manual_seed(seed=seed)
# run the simulated trajectory and calculate the h2 cost
total_reward = 0
state_traj = np.zeros((self.env.n_state, self.env.n_steps + 1))
Expand Down
52 changes: 28 additions & 24 deletions controlgym/envs/allen_cahn.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,9 @@ class AllenCahnEnv(PDE):
[sample_time]: each discrete-time step represents (ts) seconds. Default is 0.01.
[process_noise_cov]: process noise covariance coefficient. Default is 0.0.
[sensor_noise_cov]: sensor noise covariance coefficient. Default is 0.25.
[random_init_state_cov]: random initial state covariance coefficient. Default is 0.0.
[target_state]: target state. Default is np.zeros(n_state).
[init_state]: initial state. Default is
(self.domain_coordinates - 0.5 * self.domain_length) ** 2
* np.cos((2 * np.pi * (self.domain_coordinates - 0.5 * self.domain_length)) / self.domain_length)
[init_offset_mean]: mean of initial offset. Default is 0.
[init_offset_width]: width of initial offset. Default is 0.2.
[diffusivity_constant]: diffusivity constant. Default is 1e-4.
[potential_constant]: potential constant. Default is 5.0.
[n_state]: dimension of state vector. Default is 256.
Expand All @@ -41,19 +39,19 @@ class AllenCahnEnv(PDE):
[action_limit]: limit of action. Default is None.
[observation_limit]: limit of observation. Default is None.
[reward_limit]: limit of reward. Default is 1e15.
[seed]: random seed. Default is 0.
[seed]: random seed. Default is None.
"""
def __init__(
self,
n_steps: int = 100,
domain_length: float = 2.0,
integration_time: float = 0.001,
integration_time: float = 0.001,
sample_time: float = 0.01,
process_noise_cov: float = 0.0,
sensor_noise_cov: float = 0.25,
random_init_state_cov: float = 0.0,
target_state: np.ndarray[float] = None,
init_state: np.ndarray[float] = None,
init_offset_mean: float = 0.0,
init_offset_width: float = 0.2,
diffusivity_constant: float = 1e-4,
potential_constant: float = 5.0,
n_state: int = 256,
Expand All @@ -65,7 +63,7 @@ def __init__(
action_limit: float = None,
observation_limit: float = None,
reward_limit: float = None,
seed: int = 0,
seed: int = None,
):
PDE.__init__(
self,
Expand All @@ -75,8 +73,7 @@ def __init__(
integration_time=integration_time,
sample_time=sample_time,
process_noise_cov=process_noise_cov,
sensor_noise_cov=sensor_noise_cov,
random_init_state_cov=random_init_state_cov,
sensor_noise_cov=sensor_noise_cov,
target_state=target_state,
n_state=n_state,
n_observation=n_observation,
Expand All @@ -90,22 +87,27 @@ def __init__(
seed=seed,
)

if init_state is not None:
self.init_state = init_state
else:
self.init_state = (
self.domain_coordinates - 0.5 * self.domain_length
) ** 2 * np.cos(
(2 * np.pi * (self.domain_coordinates - 0.5 * self.domain_length))
/ self.domain_length
)
self.state = self.init_state
# physical parameters
self.diffusivity_constant = diffusivity_constant
self.potential_constant = potential_constant

# compute control sup, observation matrix
self.control_sup = self._compute_control_sup()
self.C = self._compute_C()
# initial state parameters
self.init_offset_mean = init_offset_mean
self.init_offset_width = init_offset_width
self.reset()

def select_init_state(self, init_offset=None):
"""Function to select the initial state of the PDE."""
if init_offset is None:
random_offset = self.rng.uniform(-0.5 * self.init_offset_width, 0.5 * self.init_offset_width)
init_offset = self.init_offset_mean + random_offset
init_state = init_offset + (
self.domain_coordinates - 0.5 * self.domain_length
) ** 2 * np.cos(
(2 * np.pi * (self.domain_coordinates - 0.5 * self.domain_length))
/ self.domain_length
)
return init_state

def _compute_fourier_linear_op(self):
"""Private function to compute the linear operator of the PDE in Fourier space.
Expand Down Expand Up @@ -157,5 +159,7 @@ def get_params_asdict(self):
extra_data = {
"diffusivity_constant": self.diffusivity_constant,
"potential_constant": self.potential_constant,
"init_offset_mean": self.init_offset_mean,
"init_offset_width": self.init_offset_width,
}
return {**pde_dict, **extra_data}
56 changes: 37 additions & 19 deletions controlgym/envs/burgers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@ class BurgersEnv(PDE):
[sample_time]: each discrete-time step represents (ts) seconds. Default is 0.05.
[process_noise_cov]: process noise covariance coefficient. Default is 0.0.
[sensor_noise_cov]: sensor noise covariance coefficient. Default is 0.25.
[random_init_state_cov]: random initial state covariance coefficient. Default is 0.0.
[target_state]: target state. Default is np.zeros(n_state).
[init_state]: initial state. Default is
np.cosh(10 * (self.domain_coordinates - 1 * self.domain_length / 2)) ** (-1).
[init_amplitude_mean]: mean of initial amplitude. Default is 1.
[init_amplitude_width]: width of initial amplitude. Default is 0.2.
[init_spread_mean]: mean of initial spread. Default is 0.05.
[init_spread_width]: width of initial spread. Default is 0.02.
[diffusivity_constant]: diffusivity constant. Default is 0.001.
[n_state]: dimension of state vector. Default is 256.
[n_observation]: dimension of observation vector. Default is 10.
Expand All @@ -39,7 +40,7 @@ class BurgersEnv(PDE):
[action_limit]: limit of action. Default is None.
[observation_limit]: limit of observation. Default is None.
[reward_limit]: limit of reward. Default is None.
[seed]: random seed. Default is 0.
[seed]: random seed. Default is None.
"""

def __init__(
Expand All @@ -50,9 +51,11 @@ def __init__(
sample_time: float = 0.05,
process_noise_cov: float = 0.0,
sensor_noise_cov: float = 0.25,
random_init_state_cov: float = 0.0,
target_state: np.ndarray[float] = None,
init_state: np.ndarray[float] = None,
init_amplitude_mean: float = 1.0,
init_amplitude_width: float = 0.2,
init_spread_mean: float = 0.05,
init_spread_width: float = 0.02,
diffusivity_constant: float = 0.001,
n_state: int = 256,
n_observation: int = 10,
Expand All @@ -63,7 +66,7 @@ def __init__(
action_limit: float = None,
observation_limit: float = None,
reward_limit: float = None,
seed: int = 0,
seed: int = None,
):
PDE.__init__(
self,
Expand All @@ -74,7 +77,6 @@ def __init__(
sample_time=sample_time,
process_noise_cov=process_noise_cov,
sensor_noise_cov=sensor_noise_cov,
random_init_state_cov=random_init_state_cov,
target_state=target_state,
n_state=n_state,
n_observation=n_observation,
Expand All @@ -88,18 +90,28 @@ def __init__(
seed=seed,
)

if init_state is not None:
self.init_state = init_state
else:
self.init_state = self.init_state = np.cosh(
10 * (self.domain_coordinates - 1 * self.domain_length / 2)
) ** (-1)
self.state = self.init_state
# physical parameter
self.diffusivity_constant = diffusivity_constant

# compute control sup, observation matrix
self.control_sup = self._compute_control_sup()
self.C = self._compute_C()
# initial state parameters
self.init_amplitude_mean = init_amplitude_mean
self.init_amplitude_width = init_amplitude_width
self.init_spread_mean = init_spread_mean
self.init_spread_width = init_spread_width
self.reset()

def select_init_state(self, init_amplitude=None, init_spread=None):
"""Function to select the initial state of the PDE"""
if init_amplitude is None:
random_amplitude = self.rng.uniform(-0.5 * self.init_amplitude_width, 0.5 * self.init_amplitude_width)
init_amplitude = self.init_amplitude_mean + random_amplitude
if init_spread is None:
random_spread = self.rng.uniform(-0.5 * self.init_spread_width, 0.5 * self.init_spread_width)
init_spread = self.init_spread_mean + random_spread
init_state = init_amplitude * np.cosh(
1 / init_spread * (self.domain_coordinates - 0.5 * self.domain_length)
) ** (-1)
return init_state

def _compute_fourier_linear_op(self):
"""Private function to compute the linear operator of the PDE in Fourier space.
Expand Down Expand Up @@ -150,5 +162,11 @@ def get_params_asdict(self):
a dictionary containing the parameters of the pde environment + extra parameters.
"""
pde_dict = super().get_params_asdict()
extra_data = {"diffusivity_constant": self.diffusivity_constant}
extra_data = {
"diffusivity_constant": self.diffusivity_constant,
"init_amplitude_mean": self.init_amplitude_mean,
"init_amplitude_width": self.init_amplitude_width,
"init_spread_mean": self.init_spread_mean,
"init_spread_width": self.init_spread_width,
}
return {**pde_dict, **extra_data}
48 changes: 27 additions & 21 deletions controlgym/envs/cahn_hilliard.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,9 @@ class CahnHilliardEnv(PDE):
[sample_time]: each discrete-time step represents (ts) seconds. Default is 0.01.
[process_noise_cov]: process noise covariance coefficient. Default is 0.0.
[sensor_noise_cov]: sensor noise covariance coefficient. Default is 0.25.
[random_init_state_cov]: random initial state covariance coefficient. Default is 0.0.
[target_state]: target state. Default is np.zeros(n_state).
[init_state]: initial state. Default is
np.cos(np.pi * (self.domain_coordinates - 0.5 * self.domain_length))
- np.exp(-4 * (np.pi * (self.domain_coordinates - 0.5 * self.domain_length)) ** 2)
[init_amplitude_mean]: mean of initial amplitude. Default is 1.0.
[init_amplitude_width]: width of initial amplitude. Default is 0.2.
[diffusivity_constant]: diffusivity constant. Default is 1.0.
[surface_tension_constant]: surface tension constant. Default is 0.02.
[n_state]: dimension of state vector. Default is 256.
Expand All @@ -42,7 +40,7 @@ class CahnHilliardEnv(PDE):
[action_limit]: limit of action. Default is None.
[observation_limit]: limit of observation. Default is None.
[reward_limit]: limit of reward. Default is None.
[seed]: random seed. Default is 0.
[seed]: random seed. Default is None.
"""

def __init__(
Expand All @@ -53,9 +51,9 @@ def __init__(
sample_time: float = 0.01,
process_noise_cov: float = 0.0,
sensor_noise_cov: float = 0.25,
random_init_state_cov: float = 0.0,
target_state: np.ndarray[float] = None,
init_state: np.ndarray[float] = None,
init_amplitude_mean: float = 1.0,
init_amplitude_width: float = 0.2,
diffusivity_constant: float = 1.0,
surface_tension_constant: float = 0.02,
n_state: int = 256,
Expand All @@ -67,7 +65,7 @@ def __init__(
action_limit: float = None,
observation_limit: float = None,
reward_limit: float = None,
seed: int = 0,
seed: int = None,
):
PDE.__init__(
self,
Expand All @@ -78,7 +76,6 @@ def __init__(
sample_time=sample_time,
process_noise_cov=process_noise_cov,
sensor_noise_cov=sensor_noise_cov,
random_init_state_cov=random_init_state_cov,
target_state=target_state,
n_state=n_state,
n_observation=n_observation,
Expand All @@ -92,21 +89,28 @@ def __init__(
seed=seed,
)

if init_state is not None:
self.init_state = init_state
else:
self.init_state = np.cos(
np.pi * (self.domain_coordinates - 0.5 * self.domain_length)
) - np.exp(
-4 * (np.pi * (self.domain_coordinates - 0.5 * self.domain_length)) ** 2
)
self.state = self.init_state
# physical parameters
self.diffusivity_constant = diffusivity_constant
self.surface_tension_constant = surface_tension_constant

# compute control sup, observation matrix
self.control_sup = self._compute_control_sup()
self.C = self._compute_C()
# initial state parameters
self.init_amplitude_mean = init_amplitude_mean
self.init_amplitude_width = init_amplitude_width
self.reset()

def select_init_state(self, init_amplitude=None):
"""Function to select the initial state of the PDE."""
if init_amplitude is None:
random_amplitude = self.rng.uniform(
-0.5 * self.init_amplitude_width, 0.5 * self.init_amplitude_width
)
init_amplitude = self.init_amplitude_mean + random_amplitude
init_state = init_amplitude * np.cos(
np.pi * (self.domain_coordinates - 0.5 * self.domain_length)
) - np.exp(
-4 * (np.pi * (self.domain_coordinates - 0.5 * self.domain_length)) ** 2
)
return init_state

def _compute_fourier_linear_op(self):
"""Private function to compute the linear operator of the PDE in Fourier space.
Expand Down Expand Up @@ -163,5 +167,7 @@ def get_params_asdict(self):
extra_data = {
"diffusivity_constant": self.diffusivity_constant,
"surface_tension_constant": self.surface_tension_constant,
"init_amplitude_mean": self.init_amplitude_mean,
"init_amplitude_width": self.init_amplitude_width,
}
return {**pde_dict, **extra_data}
Loading

0 comments on commit c8a7b12

Please sign in to comment.