Skip to content

Commit

Permalink
Merge pull request #139 from Federico-PizarroBejarano/minor_updates
Browse files Browse the repository at this point in the history
Many minor updates
  • Loading branch information
adamhall committed Oct 12, 2023
2 parents 0b118b4 + 560b1a3 commit 83fae93
Show file tree
Hide file tree
Showing 71 changed files with 639 additions and 362 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ repos:
- id: check-yaml
- id: check-toml
- id: check-added-large-files
args: ['--maxkb=10000']
- id: check-docstring-first
- id: check-executables-have-shebangs
- id: check-shebang-scripts-are-executable
Expand Down
4 changes: 2 additions & 2 deletions examples/mpsc/train_rl_model.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ALGO='ppo'
rm -r -f ./unsafe_rl_temp_data/

# Train the unsafe controller/agent.
python3 ../../safe_control_gym/experiments/execute_rl_controller.py \
python3 ../../safe_control_gym/experiments/train_rl_controller.py \
--algo ${ALGO} \
--task ${SYS} \
--overrides \
Expand All @@ -24,7 +24,7 @@ python3 ../../safe_control_gym/experiments/execute_rl_controller.py \
--seed 2

# Move the newly trained unsafe model.
mv ./unsafe_rl_temp_data/seed2_*/model_latest.pt ./models/${ALGO}_model_${SYS}_${TASK}.pt
mv ./unsafe_rl_temp_data/seed2_*/model_best.pt ./models/${ALGO}_model_${SYS}_${TASK}.pt

# Removed the temporary data used to train the new unsafe model.
rm -r -f ./unsafe_rl_temp_data/
19 changes: 12 additions & 7 deletions examples/rl/config_overrides/cartpole/cartpole_stab.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,31 @@ task_config:
normalized_rl_action_space: True

# state initialization
init_state:
init_x: 0.1
init_x_dot: -1.5
init_theta: -0.155
init_theta_dot: 0.75
randomized_init: True
randomized_inertial_prop: False

init_state_randomization_info:
init_x:
distrib: 'uniform'
low: -1
high: 1
low: -2
high: 2
init_x_dot:
distrib: 'uniform'
low: -0.1
high: 0.1
low: -2
high: 2
init_theta:
distrib: 'uniform'
low: -0.16
high: 0.16
init_theta_dot:
distrib: 'uniform'
low: -0.1
high: 0.1
low: -1
high: 1

task: stabilization
task_info:
Expand All @@ -40,7 +45,7 @@ task_config:

episode_len_sec: 10
cost: rl_reward
obs_goal_horizon: 1
obs_goal_horizon: 0

# RL Reward
rew_state_weight: [1, 1, 1, 1]
Expand Down
17 changes: 11 additions & 6 deletions examples/rl/config_overrides/cartpole/cartpole_track.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,26 +7,31 @@ task_config:
normalized_rl_action_space: True

# state initialization
init_state:
init_x: 0
init_x_dot: 0
init_theta: 0
init_theta_dot: 0
randomized_init: True
randomized_inertial_prop: False

init_state_randomization_info:
init_x:
distrib: 'uniform'
low: -1
high: 1
low: -2
high: 2
init_x_dot:
distrib: 'uniform'
low: -0.1
high: 0.1
low: -2
high: 2
init_theta:
distrib: 'uniform'
low: -0.16
high: 0.16
init_theta_dot:
distrib: 'uniform'
low: -0.1
high: 0.1
low: -1
high: 1

task: traj_tracking
task_info:
Expand Down
33 changes: 17 additions & 16 deletions examples/rl/config_overrides/cartpole/ppo_cartpole.yaml
Original file line number Diff line number Diff line change
@@ -1,40 +1,41 @@
algo: ppo
algo_config:
# model args
hidden_dim: 64
hidden_dim: 32
activation: 'leaky_relu'
norm_obs: False
norm_reward: False
clip_obs: 10.0
clip_reward: 10.0

# loss args
gamma: 0.99
use_gae: True
gae_lambda: 0.95
gamma: 0.98
use_gae: False
gae_lambda: 0.8
use_clipped_value: False
clip_param: 0.2
target_kl: 0.01
entropy_coef: 0.01
clip_param: 0.1
target_kl: 1.587713889686473e-07
entropy_coef: 0.00010753631441212628

# optim args
opt_epochs: 10
mini_batch_size: 64
actor_lr: 0.0003
critic_lr: 0.001
opt_epochs: 5
mini_batch_size: 128
actor_lr: 0.0007948148615930024
critic_lr: 0.007497368468753617
max_grad_norm: 0.5

# runner args
max_env_steps: 100000
max_env_steps: 300000
num_workers: 1
rollout_batch_size: 4
rollout_steps: 100
rollout_steps: 150
deque_size: 10
eval_batch_size: 10

# misc
log_interval: 1000
save_interval: 1000
log_interval: 6000
save_interval: 0
num_checkpoints: 0
eval_interval: 1000
eval_interval: 6000
eval_save_best: True
tensorboard: False
29 changes: 18 additions & 11 deletions examples/rl/config_overrides/cartpole/sac_cartpole.yaml
Original file line number Diff line number Diff line change
@@ -1,29 +1,36 @@
algo: sac
algo_config:
# model args
hidden_dim: 64
hidden_dim: 256
activation: 'relu'

# loss args
gamma: 0.98
tau: 0.12145208815621376
init_temperature: 0.2
use_entropy_tuning: False
target_entropy: null

# optim args
train_interval: 100
train_batch_size: 64
actor_lr: 0.001
critic_lr: 0.001
train_batch_size: 512
actor_lr: 0.00045196308120485273
critic_lr: 0.022547326782152065
entropy_lr: 0.001

# runner args
max_env_steps: 100000
warm_up_steps: 1000
max_env_steps: 150000
warm_up_steps: 100
rollout_batch_size: 4
num_workers: 1
max_buffer_size: 1000000
deque_size: 10
eval_batch_size: 10

# misc
log_interval: 1000
save_interval: 1000
num_checkpoints: 100
eval_interval: 1000
log_interval: 3000
save_interval: 0
num_checkpoints: 0
eval_interval: 3000
eval_save_best: True
tensorboard: True
tensorboard: False
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
algo: safe_explorer_ppo
algo_config:
# model args
hidden_dim: 32
activation: 'leaky_relu'
norm_obs: False
norm_reward: False
clip_obs: 10.0
clip_reward: 10.0

# Safety layer args
pretraining: False
pretrained: null
constraint_slack: [0.01, 0.01, 0.01, 0.05, 0.01, 0.01, 0.01, 0.05]
constraint_hidden_dim: 100

# loss args
gamma: 0.98
use_gae: False
gae_lambda: 0.8
use_clipped_value: False
clip_param: 0.1
target_kl: 1.587713889686473e-07
entropy_coef: 0.00010753631441212628

# optim args
opt_epochs: 5
mini_batch_size: 128
actor_lr: 0.0007948148615930024
critic_lr: 0.007497368468753617
max_grad_norm: 0.5

# runner args
max_env_steps: 300000
num_workers: 1
rollout_batch_size: 4
rollout_steps: 150
deque_size: 10
eval_batch_size: 10

# misc
log_interval: 6000
save_interval: 0
num_checkpoints: 0
eval_interval: 6000
eval_save_best: True
tensorboard: False
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
algo: safe_explorer_ppo
algo_config:
# model args
hidden_dim: 32
activation: 'leaky_relu'
norm_obs: False
norm_reward: False
clip_obs: 10.0
clip_reward: 10.0

# Safety layer args
pretraining: True
pretrained: null
constraint_hidden_dim: 100
constraint_lr: 0.0001
constraint_batch_size: 256
constraint_steps_per_epoch: 3000
constraint_epochs: 200
constraint_eval_steps: 1500
constraint_eval_interval: 5
constraint_buffer_size: 1000000
constraint_slack: [0.01, 0.01, 0.01, 0.05, 0.01, 0.01, 0.01, 0.05]

# loss args
gamma: 0.98
use_gae: False
gae_lambda: 0.8
use_clipped_value: False
clip_param: 0.1
target_kl: 1.587713889686473e-07
entropy_coef: 0.00010753631441212628

# optim args
opt_epochs: 5
mini_batch_size: 128
actor_lr: 0.0007948148615930024
critic_lr: 0.007497368468753617
max_grad_norm: 0.5

# runner args
max_env_steps: 100000
num_workers: 1
rollout_batch_size: 4
rollout_steps: 150
deque_size: 10
eval_batch_size: 10

# misc
log_interval: 100
save_interval: 0
num_checkpoints: 0
eval_interval: 100
eval_save_best: True
tensorboard: False
10 changes: 5 additions & 5 deletions examples/rl/config_overrides/quadrotor_2D/ppo_quadrotor_2D.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ algo_config:
rollout_steps: 1000

# misc
log_interval: 1000
save_interval: 1000
num_checkpoints: 10
eval_interval: 1000
log_interval: 10000
save_interval: 0
num_checkpoints: 0
eval_interval: 10000
eval_save_best: True
tensorboard: True
tensorboard: False
29 changes: 18 additions & 11 deletions examples/rl/config_overrides/quadrotor_2D/quadrotor_2D_stab.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,41 @@ task_config:
quad_type: 2
normalized_rl_action_space: True

init_state:
init_x: 0.5
init_x_dot: 0
init_z: 1.5
init_z_dot: 0
init_theta: 0
init_theta_dot: 0
randomized_init: True
randomized_inertial_prop: False

init_state_randomization_info:
init_x:
distrib: 'uniform'
low: -1
high: 1
low: -2
high: 2
init_x_dot:
distrib: 'uniform'
low: -0.1
high: 0.1
low: -1
high: 1
init_z:
distrib: 'uniform'
low: 1
low: 0.3 # Just so it doesn't crash into the ground
high: 2
init_z_dot:
distrib: 'uniform'
low: -0.1
high: 0.1
low: -1
high: 1
init_theta:
distrib: 'uniform'
low: -0.2
high: 0.2
init_theta_dot:
distrib: 'uniform'
low: -0.1
high: 0.1
low: -1.5
high: 1.5

task: stabilization
task_info:
Expand All @@ -45,9 +52,9 @@ task_config:
M: 0.027
Iyy: 1.4e-05

episode_len_sec: 6
episode_len_sec: 5
cost: rl_reward
obs_goal_horizon: 1
obs_goal_horizon: 0

# RL Reward
rew_state_weight: [1, 1, 1, 1, 1, 1]
Expand Down
Loading

0 comments on commit 83fae93

Please sign in to comment.