From cbb1159f32fe5b30d56760e9f2de6b92415c9a42 Mon Sep 17 00:00:00 2001
From: Shambhuraj Sawant <shambhurajsawant10@gmail.com>
Date: Thu, 9 Nov 2023 14:29:33 +0100
Subject: [PATCH] initial commit for qlearning_mpc

---
 .../cartpole/cartpole_stab.yaml               | 28 +++++++++----------
 .../controllers/mpc/qlearning_mpc.py          |  4 +--
 .../controllers/mpc/qlearning_mpc.yaml        |  2 +-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml b/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml
index bc1daacaf..3cce88187 100644
--- a/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml
+++ b/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml
@@ -8,10 +8,10 @@ task_config:
   # state initialization
   init_state:
     init_x: 0.1
-    init_x_dot: -1.5
-    init_theta: -0.175
-    init_theta_dot: 0.5
-  randomized_init: True
+    init_x_dot: -0.5
+    init_theta: -0.1
+    init_theta_dot: 0.0
+  randomized_init: False
   randomized_inertial_prop: False
   normalized_rl_action_space: True
 
@@ -56,16 +56,16 @@ task_config:
   constraints:
     - constraint_form: default_constraint
       constrained_variable: state
-      upper_bounds:
-        - 2
-        - 2
-        - 0.18
-        - 2
-      lower_bounds:
-        - -2
-        - -2
-        - -0.18
-        - -2
+      # upper_bounds:
+      #   - 20
+      #   - 20
+      #   - 0.58
+      #   - 20
+      # lower_bounds:
+      #   - -20
+      #   - -20
+      #   - -0.58
+      #   - -20
     - constraint_form: default_constraint
       constrained_variable: input
   done_on_out_of_bound: True
diff --git a/safe_control_gym/controllers/mpc/qlearning_mpc.py b/safe_control_gym/controllers/mpc/qlearning_mpc.py
index da8809f3b..5fb10e95f 100644
--- a/safe_control_gym/controllers/mpc/qlearning_mpc.py
+++ b/safe_control_gym/controllers/mpc/qlearning_mpc.py
@@ -238,7 +238,7 @@ def setup_optimizer(self):
         opti.minimize(cost)
         # Create solver (IPOPT solver in this version)
         # opts = {'ipopt.print_level': 0, 'ipopt.sb': 'yes', 'print_time': 0}
-        opts = {'expand': True}
+        opts = {'expand': True, 'ipopt.print_level': 0, 'print_time': 0}
         opti.solver('ipopt', opts)
         self.opti_dict = {
             'opti': opti,
@@ -298,7 +298,7 @@ def select_action(self,
         self.results_dict['horizon_states'].append(deepcopy(self.x_prev))
         self.results_dict['horizon_inputs'].append(deepcopy(self.u_prev))
         self.results_dict['goal_states'].append(deepcopy(goal_states))
-        self.results_dict['t_wall'].append(opti.stats()['t_wall_total'])
+        # self.results_dict['t_wall'].append(opti.stats()['t_wall_total'])
         # Take the first action from the solved action sequence.
         if u_val.ndim > 1:
             action = u_val[:, 0]
diff --git a/safe_control_gym/controllers/mpc/qlearning_mpc.yaml b/safe_control_gym/controllers/mpc/qlearning_mpc.yaml
index 407961e6b..24cf0c060 100644
--- a/safe_control_gym/controllers/mpc/qlearning_mpc.yaml
+++ b/safe_control_gym/controllers/mpc/qlearning_mpc.yaml
@@ -4,7 +4,7 @@ r_mpc:
   - 0.1
 q_mpc:
   - 1.
-warmstart: True
+warmstart: False
 soft_constraints: False
 constraint_tol: 1.0e-6