initial commit for qlearning_mpc

utiasDSL · Nov 9, 2023 · cbb1159 · cbb1159
1 parent 8841562
commit cbb1159
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 17 deletions.
diff --git a/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml b/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml
@@ -8,10 +8,10 @@ task_config:
   # state initialization
   init_state:
     init_x: 0.1
-    init_x_dot: -1.5
-    init_theta: -0.175
-    init_theta_dot: 0.5
-  randomized_init: True
+    init_x_dot: -0.5
+    init_theta: -0.1
+    init_theta_dot: 0.0
+  randomized_init: False
   randomized_inertial_prop: False
   normalized_rl_action_space: True
 
@@ -56,16 +56,16 @@ task_config:
   constraints:
     - constraint_form: default_constraint
       constrained_variable: state
-      upper_bounds:
-        - 2
-        - 2
-        - 0.18
-        - 2
-      lower_bounds:
-        - -2
-        - -2
-        - -0.18
-        - -2
+      # upper_bounds:
+      #   - 20
+      #   - 20
+      #   - 0.58
+      #   - 20
+      # lower_bounds:
+      #   - -20
+      #   - -20
+      #   - -0.58
+      #   - -20
     - constraint_form: default_constraint
       constrained_variable: input
   done_on_out_of_bound: True

diff --git a/safe_control_gym/controllers/mpc/qlearning_mpc.py b/safe_control_gym/controllers/mpc/qlearning_mpc.py
@@ -238,7 +238,7 @@ def setup_optimizer(self):
         opti.minimize(cost)
         # Create solver (IPOPT solver in this version)
         # opts = {'ipopt.print_level': 0, 'ipopt.sb': 'yes', 'print_time': 0}
-        opts = {'expand': True}
+        opts = {'expand': True, 'ipopt.print_level': 0, 'print_time': 0}
         opti.solver('ipopt', opts)
         self.opti_dict = {
             'opti': opti,
@@ -298,7 +298,7 @@ def select_action(self,
         self.results_dict['horizon_states'].append(deepcopy(self.x_prev))
         self.results_dict['horizon_inputs'].append(deepcopy(self.u_prev))
         self.results_dict['goal_states'].append(deepcopy(goal_states))
-        self.results_dict['t_wall'].append(opti.stats()['t_wall_total'])
+        # self.results_dict['t_wall'].append(opti.stats()['t_wall_total'])
         # Take the first action from the solved action sequence.
         if u_val.ndim > 1:
             action = u_val[:, 0]

diff --git a/safe_control_gym/controllers/mpc/qlearning_mpc.yaml b/safe_control_gym/controllers/mpc/qlearning_mpc.yaml
@@ -4,7 +4,7 @@ r_mpc:
   - 0.1
 q_mpc:
   - 1.
-warmstart: True
+warmstart: False
 soft_constraints: False
 constraint_tol: 1.0e-6