utiasDSL · Federico-PizarroBejarano · May 14, 2024 · Jun 30, 2023 · Jul 10, 2023 · Jul 10, 2023
diff --git a/examples/cbf/config_overrides/ppo_config.yaml b/examples/cbf/config_overrides/ppo_config.yaml
@@ -2,6 +2,7 @@ algo: ppo
 algo_config:
   # model args
   hidden_dim: 64
+  activation: "relu"
   norm_obs: False
   norm_reward: False
   clip_obs: 10.0

diff --git a/examples/cbf/config_overrides/sac_config.yaml b/examples/cbf/config_overrides/sac_config.yaml
@@ -2,6 +2,7 @@ algo: sac
 algo_config:
   # model args
   hidden_dim: 256
+  activation: "relu"
   use_entropy_tuning: False
 
   # optim args

diff --git a/examples/hpo/gp_mpc/config_overrides/cartpole/cartpole_stab.yaml b/examples/hpo/gp_mpc/config_overrides/cartpole/cartpole_stab.yaml
@@ -0,0 +1,67 @@
+task_config:
+  constraints:
+    - constraint_form: default_constraint
+      constrained_variable: input
+    - constraint_form: default_constraint
+      constrained_variable: state
+      upper_bounds:
+        - 100
+        - 100
+        - 100
+        - 100
+      lower_bounds:
+        - -100
+        - -100
+        - -100
+        - -100
+  cost: quadratic
+  ctrl_freq: 15
+  disturbances:
+    observation:
+      - disturbance_func: white_noise
+        std: 0.0001
+  done_on_violation: false
+  episode_len_sec: 10
+  gui: false
+  inertial_prop:
+    cart_mass: 1.0
+    pole_length: 0.5
+    pole_mass: 0.1
+  inertial_prop_randomization_info: null
+  info_in_reset: false
+  init_state:
+    init_x: 0.0
+    init_x_dot: 0.0
+    init_theta: 0.0
+    init_theta_dot: 0.0
+  init_state_randomization_info:
+    init_x:
+      distrib: 'uniform'
+      low: -0.1
+      high: 0.1
+    init_x_dot:
+      distrib: 'uniform'
+      low: -0.1
+      high: 0.1
+    init_theta:
+      distrib: 'uniform'
+      low: -0.2
+      high: 0.2
+    init_theta_dot:
+      distrib: 'uniform'
+      low: -0.1
+      high: 0.1
+  normalized_rl_action_space: false
+  prior_prop:
+    cart_mass: 1.0
+    pole_length: 0.5
+    pole_mass: 0.1
+  pyb_freq: 750
+  randomized_inertial_prop: false
+  randomized_init: true
+  task: stabilization
+  task_info:
+    stabilization_goal: [0]
+    stabilization_goal_tolerance: 0.005
+  use_constraint_penalty: false
+  verbose: false
diff --git a/examples/hpo/gp_mpc/config_overrides/cartpole/gp_mpc_cartpole_150.yaml b/examples/hpo/gp_mpc/config_overrides/cartpole/gp_mpc_cartpole_150.yaml
@@ -0,0 +1,66 @@
+algo: gp_mpc
+algo_config:
+  additional_constraints: null
+  deque_size: 10
+  eval_batch_size: 10
+  gp_approx: mean_eq
+  gp_model_path: null
+  horizon: 20
+  prior_info:
+    prior_prop:
+      cart_mass: 1.0
+      pole_length: 0.5
+      pole_mass: 0.1
+  initial_rollout_std: 0.0
+  input_mask: null
+  learing_rate: null
+  learning_rate:
+  - 0.01
+  - 0.01
+  - 0.01
+  - 0.01
+  normalize_training_data: false
+  online_learning: false
+  optimization_iterations:
+  - 3000
+  - 3000
+  - 3000
+  - 3000
+  overwrite_saved_data: false
+  prior_param_coeff: 1.5
+  prob: 0.95
+  q_mpc:
+  - 1
+  - 1
+  - 1
+  - 1
+  r_mpc:
+  - 0.1
+  kernel: Matern
+  sparse_gp: True
+  n_ind_points: 40
+  inducing_point_selection_method: 'kmeans'
+  recalc_inducing_points_at_every_step: false
+  soft_constraints:
+    gp_soft_constraints: false
+    gp_soft_constraints_coeff: 0
+    prior_soft_constraints: true
+    prior_soft_constraints_coeff: 10
+  target_mask: null
+  train_iterations: null
+  test_data_ratio: 0.2
+  use_prev_start: true
+  warmstart: true
+  num_epochs: 5
+  num_samples: 75
+  num_test_episodes_per_epoch: 2
+  num_train_episodes_per_epoch: 2
+  same_test_initial_state: true
+  same_train_initial_state: false
+  rand_data_selection: false
+  terminate_train_on_done: True
+  terminate_test_on_done: False
+  parallel: True
+
+device: cpu
+restore: null
diff --git a/examples/hpo/gp_mpc/config_overrides/cartpole/gp_mpc_cartpole_hpo.yaml b/examples/hpo/gp_mpc/config_overrides/cartpole/gp_mpc_cartpole_hpo.yaml
@@ -0,0 +1,36 @@
+hpo_config:
+
+  hpo: True # do hyperparameter optimization
+  load_if_exists: True # this should set to True if hpo is run in parallel
+  use_database: False # this is set to true if MySQL is used
+  objective: [exponentiated_avg_return] # [other metrics defined in base_experiment.py]
+  direction: [maximize] # [maximize, maximize]
+  dynamical_runs: False # if True, dynamically increase runs
+  warm_trials: 20 # number of trials to run before dyamical runs
+  approximation_threshold: 5 # this is only used when dynamical_runs is True
+  repetitions: 5 # number of samples of performance for each objective query
+  alpha: 1 # significance level for CVaR
+  use_gpu: True
+  dashboard: False
+  seed: 24
+  save_n_best_hps: 3
+  # budget
+  trials: 40
+
+  # hyperparameters
+  hps_config:
+    horizon: 20
+    learning_rate:
+    - 0.01
+    - 0.01
+    - 0.01
+    - 0.01
+    optimization_iterations:
+    - 3000
+    - 3000
+    - 3000
+    - 3000
+    kernel: Matern
+    n_ind_points: 35
+    num_epochs: 5
+    num_samples: 75
diff --git a/examples/hpo/gp_mpc/config_overrides/cartpole/optimized_hyperparameters.yaml b/examples/hpo/gp_mpc/config_overrides/cartpole/optimized_hyperparameters.yaml
@@ -0,0 +1,7 @@
+horizon: 35
+kernel: 'RBF'
+n_ind_points: 40
+num_epochs: 5
+num_samples: 75
+optimization_iterations: [2800, 2800, 2800, 2800]
+learning_rate: [0.023172075157730145, 0.023172075157730145, 0.023172075157730145, 0.023172075157730145]
diff --git a/examples/hpo/hpo_experiment.py b/examples/hpo/hpo_experiment.py
@@ -0,0 +1,119 @@
+"""Template hyperparameter optimization/hyperparameter evaluation script.
+
+"""
+import os
+from functools import partial
+
+import yaml
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from safe_control_gym.envs.benchmark_env import Environment, Task
+
+from safe_control_gym.hyperparameters.hpo import HPO
+from safe_control_gym.experiments.base_experiment import BaseExperiment
+from safe_control_gym.utils.configuration import ConfigFactory
+from safe_control_gym.utils.registration import make
+from safe_control_gym.utils.utils import set_device_from_config, set_dir_from_config, set_seed_from_config
+
+
+def hpo(config):
+    """Hyperparameter optimization.
+
+    Usage:
+        * to start HPO, use with `--func hpo`.
+
+    """
+
+    # Experiment setup.
+    if config.hpo_config.hpo:
+        set_dir_from_config(config)
+    set_seed_from_config(config)
+    set_device_from_config(config)
+
+    # HPO
+    hpo = HPO(config.algo,
+              config.task,
+              config.sampler,
+              config.load_study,
+              config.output_dir,
+              config.task_config,
+              config.hpo_config,
+              **config.algo_config)
+
+    if config.hpo_config.hpo:
+        hpo.hyperparameter_optimization()
+        print('Hyperparameter optimization done.')
+
+
+def train(config):
+    """Training for a given set of hyperparameters.
+
+    Usage:
+        * to start training, use with `--func train`.
+
+    """
+    # Override algo_config with given yaml file
+    if config.opt_hps == '':
+        # if no opt_hps file is given
+        pass
+    else:
+        # if opt_hps file is given
+        with open(config.opt_hps, 'r') as f:
+            opt_hps = yaml.load(f, Loader=yaml.FullLoader)
+        for hp in opt_hps:
+            if isinstance(config.algo_config[hp], list) and not isinstance(opt_hps[hp], list):
+                config.algo_config[hp] = [opt_hps[hp]] * len(config.algo_config[hp])
+            else:
+                config.algo_config[hp] = opt_hps[hp]
+    # Experiment setup.
+    set_dir_from_config(config)
+    set_seed_from_config(config)
+    set_device_from_config(config)
+
+    # Define function to create task/env.
+    env_func = partial(make, config.task, output_dir=config.output_dir, **config.task_config)
+    # Create the controller/control_agent.
+    # Note:
+    # eval_env will take config.seed * 111 as its seed
+    # env will take config.seed as its seed
+    control_agent = make(config.algo,
+                         env_func,
+                         training=True,
+                         checkpoint_path=os.path.join(config.output_dir, 'model_latest.pt'),
+                         output_dir=config.output_dir,
+                         use_gpu=config.use_gpu,
+                         seed=config.seed,
+                         **config.algo_config)
+    control_agent.reset()
+
+    eval_env = env_func(seed=config.seed * 111)
+    # Run experiment
+    experiment = BaseExperiment(eval_env, control_agent)
+    experiment.launch_training()
+    results, metrics = experiment.run_evaluation(n_episodes=1, n_steps=None, done_on_max_steps=True)
+    control_agent.close()
+
+    return eval_env.X_GOAL, results, metrics
+
+
+MAIN_FUNCS = {'hpo': hpo, 'train': train}
+
+
+if __name__ == '__main__':
+
+    # Make config.
+    fac = ConfigFactory()
+    fac.add_argument('--func', type=str, default='train', help='main function to run.')
+    fac.add_argument('--opt_hps', type=str, default='', help='yaml file as a result of HPO.')
+    fac.add_argument('--load_study', type=bool, default=False, help='whether to load study from a previous HPO.')
+    fac.add_argument('--sampler', type=str, default='TPESampler', help='which sampler to use in HPO.')
+    # merge config
+    config = fac.merge()
+
+    # Execute.
+    func = MAIN_FUNCS.get(config.func, None)
+    if func is None:
+        raise Exception('Main function {} not supported.'.format(config.func))
+    func(config)
diff --git a/examples/hpo/rl/config_overrides/cartpole/cartpole_stab.yaml b/examples/hpo/rl/config_overrides/cartpole/cartpole_stab.yaml
@@ -0,0 +1,61 @@
+task_config:
+  info_in_reset: True
+  ctrl_freq: 15
+  pyb_freq: 750
+  physics: pyb
+
+  # state initialization
+  init_state:
+    init_x: 0.0
+    init_x_dot: 0.0
+    init_theta: 0.0
+    init_theta_dot: 0.0
+  randomized_init: True
+  randomized_inertial_prop: False
+  normalized_rl_action_space: True
+
+  init_state_randomization_info:
+    init_x:
+      distrib: 'uniform'
+      low: -0.1
+      high: 0.1
+    init_x_dot:
+      distrib: 'uniform'
+      low: -0.1
+      high: 0.1
+    init_theta:
+      distrib: 'uniform'
+      low: -0.2
+      high: 0.2
+    init_theta_dot:
+      distrib: 'uniform'
+      low: -0.1
+      high: 0.1
+
+  task: stabilization
+  task_info:
+    stabilization_goal: [0]
+    stabilization_goal_tolerance: 0.005
+
+  inertial_prop:
+    pole_length: 0.5
+    cart_mass: 1
+    pole_mass: 0.1
+
+  episode_len_sec: 10
+  cost: rl_reward
+  obs_goal_horizon: 1
+
+  # RL Reward
+  rew_state_weight: [1, 1, 1, 1]
+  rew_act_weight: 0.1
+  rew_exponential: True
+
+  # constraints
+  constraints:
+  - constraint_form: default_constraint
+    constrained_variable: state
+  - constraint_form: default_constraint
+    constrained_variable: input
+  done_on_out_of_bound: True
+  done_on_violation: False
diff --git a/examples/hpo/rl/ppo/config_overrides/cartpole/optimized_hyperparameters.yaml b/examples/hpo/rl/ppo/config_overrides/cartpole/optimized_hyperparameters.yaml
@@ -0,0 +1,13 @@
+activation: leaky_relu
+actor_lr: 0.0007948148615930024
+clip_param: 0.1
+critic_lr: 0.007497368468753617
+entropy_coef: 0.00010753631441212628
+gae_lambda: 0.8
+gamma: 0.98
+hidden_dim: 32
+max_env_steps: 72000
+mini_batch_size: 128
+opt_epochs: 5
+rollout_steps: 150
+target_kl: 1.587713889686473e-07