From cbb1159f32fe5b30d56760e9f2de6b92415c9a42 Mon Sep 17 00:00:00 2001 From: Shambhuraj Sawant Date: Thu, 9 Nov 2023 14:29:33 +0100 Subject: [PATCH] initial commit for qlearning_mpc --- .../cartpole/cartpole_stab.yaml | 28 +++++++++---------- .../controllers/mpc/qlearning_mpc.py | 4 +-- .../controllers/mpc/qlearning_mpc.yaml | 2 +- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml b/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml index bc1daacaf..3cce88187 100644 --- a/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml +++ b/examples/rlmpc/config_overrides/cartpole/cartpole_stab.yaml @@ -8,10 +8,10 @@ task_config: # state initialization init_state: init_x: 0.1 - init_x_dot: -1.5 - init_theta: -0.175 - init_theta_dot: 0.5 - randomized_init: True + init_x_dot: -0.5 + init_theta: -0.1 + init_theta_dot: 0.0 + randomized_init: False randomized_inertial_prop: False normalized_rl_action_space: True @@ -56,16 +56,16 @@ task_config: constraints: - constraint_form: default_constraint constrained_variable: state - upper_bounds: - - 2 - - 2 - - 0.18 - - 2 - lower_bounds: - - -2 - - -2 - - -0.18 - - -2 + # upper_bounds: + # - 20 + # - 20 + # - 0.58 + # - 20 + # lower_bounds: + # - -20 + # - -20 + # - -0.58 + # - -20 - constraint_form: default_constraint constrained_variable: input done_on_out_of_bound: True diff --git a/safe_control_gym/controllers/mpc/qlearning_mpc.py b/safe_control_gym/controllers/mpc/qlearning_mpc.py index da8809f3b..5fb10e95f 100644 --- a/safe_control_gym/controllers/mpc/qlearning_mpc.py +++ b/safe_control_gym/controllers/mpc/qlearning_mpc.py @@ -238,7 +238,7 @@ def setup_optimizer(self): opti.minimize(cost) # Create solver (IPOPT solver in this version) # opts = {'ipopt.print_level': 0, 'ipopt.sb': 'yes', 'print_time': 0} - opts = {'expand': True} + opts = {'expand': True, 'ipopt.print_level': 0, 'print_time': 0} opti.solver('ipopt', opts) self.opti_dict = { 'opti': opti, @@ -298,7 +298,7 @@ def select_action(self, self.results_dict['horizon_states'].append(deepcopy(self.x_prev)) self.results_dict['horizon_inputs'].append(deepcopy(self.u_prev)) self.results_dict['goal_states'].append(deepcopy(goal_states)) - self.results_dict['t_wall'].append(opti.stats()['t_wall_total']) + # self.results_dict['t_wall'].append(opti.stats()['t_wall_total']) # Take the first action from the solved action sequence. if u_val.ndim > 1: action = u_val[:, 0] diff --git a/safe_control_gym/controllers/mpc/qlearning_mpc.yaml b/safe_control_gym/controllers/mpc/qlearning_mpc.yaml index 407961e6b..24cf0c060 100644 --- a/safe_control_gym/controllers/mpc/qlearning_mpc.yaml +++ b/safe_control_gym/controllers/mpc/qlearning_mpc.yaml @@ -4,7 +4,7 @@ r_mpc: - 0.1 q_mpc: - 1. -warmstart: True +warmstart: False soft_constraints: False constraint_tol: 1.0e-6