# Cartpole Problem

The state and control vectors $\textbf{x}$ and $\textbf{u}$ are defined as follows:

$$
\begin{equation*}
\textbf{x} = \begin{bmatrix}
    x & \theta & \dot{x} & \dot{\theta}
    \end{bmatrix}^T
\end{equation*}
$$

$$
\begin{equation*}
\textbf{u} = \begin{bmatrix}
    F_{x}
    \end{bmatrix}^T
\end{equation*}
$$

The goal is to swing the pendulum upright:

$$
\begin{equation*}
\textbf{x}_{goal} = \begin{bmatrix}
    0 & 0 & 0 & 0
    \end{bmatrix}^T
\end{equation*}
$$

**Note**: The force is constrained between $-1$ and $1$. This is achieved by
instead fitting for unconstrained actions and then applying it to a squashing
function $\tanh(\textbf{u})$. This is directly embedded into the dynamics model
in order to be auto-differentiated. This also means that we need to apply this
transformation manually to the output of our iLQR at the end.

In [1]:
%matplotlib inline

In [2]:
from __future__ import print_function

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import time

In [4]:
from ilqr.cost import QRCost, FiniteDiffCost
from ilqr.mujoco_dynamics import MujocoDynamics
from ilqr.mujoco_controller import iLQR, RecedingHorizonController
from ilqr.examples.cartpole import CartpoleDynamics
from ilqr.dynamics import constrain

from scipy.optimize import approx_fprime

import mujoco_py
from mujoco_py import MjViewer
import os

In [5]:
def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
    J_hist.append(J_opt)
    info = "converged" if converged else ("accepted" if accepted else "failed")
    final_state = xs[-1]
    print("iteration", iteration_count, info, J_opt, final_state)

In [6]:
xml_path = os.path.join('..', 'ilqr', 'xmls', 'inverted_double_pendulum.xml')
dynamics = MujocoDynamics(xml_path, frame_skip = 4)
print(dynamics.dt)

0.04


In [7]:
print(dynamics.state_size)
x_goal = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])

# Instantenous state cost.
Q = np.eye(dynamics.state_size)
Q[0, 0] = 5.0
Q[1, 1] = 50.0
Q[2, 2] = 50.0
Q[3, 3] = 20.0
Q[4, 4] = 700.0
Q[5, 5] = 700.0



# Terminal state cost.
Q_terminal = 10 * Q

# Instantaneous control cost.
R = np.eye(1)

cost1 = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=x_goal)

6


In [8]:
def l(x, u, i):
    c0 = x[0] ** 2
    c1 = 10 * (np.cos(x[1]) - 1) ** 2
    c2 = 10 * (np.cos(x[2]) - 1) ** 2
    c3 = x[3] ** 2
    c4 = x[4] ** 2
    c5 = x[5] ** 2
    cu = u[0] ** 2
    return c0 + c1 + c2 + c3 + c4 + c5 + cu

cost2 = FiniteDiffCost(l, lambda x, i: l(x, [0.0], i), 6, 1, use_multiprocessing = True)

Finished loading process 52498
Finished loading process 52499
Finished loading process 52500
Finished loading process 52501
Finished loading process 52502
Finished loading process 52503
Finished loading process 52504
Finished loading process 52505
Finished loading process 52506
Finished loading process 52507
Finished loading process 52508
Finished loading process 52509
Finished loading process 52510
Finished loading process 52511
Finished loading process 52512
Finished loading process 52513


In [9]:
N = 100
#x0 = np.array([0.0, np.random.uniform(-np.pi, np.pi), np.random.uniform(-np.pi, np.pi), 0.0, 0.0, 0.0])
x0 = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
#us_init = np.random.uniform(-1, 1, (N, dynamics.action_size))
us_init = np.zeros((N, dynamics.action_size))
ilqr = iLQR(dynamics, cost2, N)
mpc = RecedingHorizonController(x0, ilqr)

In [None]:
t0 = time.time()
J_hist = []
controls = mpc.control(us_init, step_size = 3, initial_n_iterations = 500, subsequent_n_iterations = 100, on_iteration = on_iteration)
us = []
for i in range(30):
    print('ITERATION', i, '\n')
    if i == 29:
        us.append(next(controls)[2])
    else:
        us.append(next(controls)[1])
    
print('time', time.time() - t0)
us = np.concatenate(us)

ITERATION 0 

iteration 0 accepted 5262.628141128234 [ 0.44659346  4.69282842 -6.38518629 -0.03589576 -4.10998383 20.19246774]
iteration 1 accepted 2871.4460015993486 [-1.44295028  2.19702893 -5.36844781 -6.22165067 -2.06626185  6.39085672]
iteration 2 accepted 2295.885455529641 [ 1.31541204  3.0176254  -6.60189973  1.60515752  1.06864873  8.41832172]
iteration 3 accepted 2222.433161722189 [ 1.15957076  2.23805302 -6.69022121  2.90055996  5.10148562  2.41042719]
iteration 4 accepted 2101.1075321317358 [ 0.56446054  2.18716537 -6.68665762  1.47708935  5.17460236  2.67197205]
iteration 5 accepted 1905.0822479860815 [ 0.04262411  2.22935743 -6.76900157 -0.06763085  4.88229725  3.37223685]
iteration 6 accepted 1758.4860883915976 [ 0.69439958  1.575534   -6.8075173  -0.85060873  5.32084773 -0.3315661 ]
iteration 7 accepted 1730.4624794036765 [ 0.72887021  1.44287688 -7.21890822  0.75719422  4.40016201 -0.81258934]
iteration 8 accepted 1724.160452455622 [ 0.16834346  1.68310433 -7.17321727  

iteration 72 accepted 451.7493540130286 [-0.39448219  1.22469126  1.88045678  1.129637    6.32688486  1.49948163]
iteration 73 accepted 214.42567855913194 [ 0.87705962  0.57010494 -1.14595854  2.68893348  1.35102774 -6.23838147]
iteration 74 accepted 59.92836749856011 [ 0.0653795   0.22854487  0.0176417   0.53002182  1.29350935 -1.90855165]
iteration 75 accepted 57.437122794805205 [-0.03870893  0.41676217 -0.35861002  2.11398571 -1.15661173 -0.61197066]
iteration 76 accepted 38.470696369235085 [-0.04442957  0.49125217 -0.3025765   1.53884806  0.00337623 -1.09254691]
iteration 77 accepted 26.250818380848497 [-0.02587593  0.28948608 -0.20628116 -0.07307571  1.52224674 -2.14874938]
iteration 78 accepted 6.012417215203309 [ 0.01445211 -0.02339388  0.05441809 -0.37233063  0.73021332 -0.75739252]
iteration 79 accepted 0.8662083368433351 [ 0.00364503 -0.00900497  0.01065073  0.25548455 -0.53701931  0.68837231]
iteration 80 accepted 0.00030242603356719764 [-9.79289668e-05  2.20182022e-04 -2.84



ITERATION 2 

iteration 0 accepted 37.39991597244512 [-0.15851766 -0.67998522  0.23329168 -2.22715015 -0.61307786  1.20984764]
iteration 1 accepted 25.484456206226447 [-0.05409988 -0.36371077  0.19558418  0.34558139 -2.65007032  3.27583439]
iteration 2 accepted 1.9453492056496906 [-0.00275516  0.00714359 -0.06000585 -0.27156922  0.64424895 -1.12838122]
iteration 3 accepted 8.614837576040822e-05 [-1.32531854e-05 -3.56323573e-04  4.39481214e-04  9.25972908e-04
 -4.38733727e-03  7.30330209e-03]
iteration 4 accepted 1.4088314982625874e-09 [-2.67006704e-07 -7.55566594e-07 -8.31071900e-08  2.04281508e-06
 -2.46676864e-06  2.61135516e-06]
iteration 5 accepted 1.2618647748735184e-09 [ 6.92366639e-07 -8.87144067e-07  4.90937683e-09  3.41761761e-07
  1.34957562e-07  9.36806104e-09]
iteration 6 accepted 1.2616643359462824e-09 [ 6.86762642e-07 -8.90577606e-07  4.69492644e-09  3.38287726e-07
  1.30143339e-07  7.71505179e-09]
iteration 7 accepted 1.2602675132987726e-09 [ 6.50847163e-07 -9.13156004e-

iteration 19 accepted 7.77642412769899e-10 [ 3.15940990e-07 -1.17772710e-06 -4.45001745e-08 -4.69363667e-07
 -4.08362122e-07 -7.92845220e-08]
iteration 20 failed 7.776424127698992e-10 [ 3.15940990e-07 -1.17772710e-06 -4.45001745e-08 -4.69363667e-07
 -4.08362122e-07 -7.92845220e-08]
iteration 21 failed 7.776424127698992e-10 [ 3.15940990e-07 -1.17772710e-06 -4.45001745e-08 -4.69363667e-07
 -4.08362122e-07 -7.92845220e-08]
iteration 22 failed 7.776424127698992e-10 [ 3.15940990e-07 -1.17772710e-06 -4.45001745e-08 -4.69363667e-07
 -4.08362122e-07 -7.92845220e-08]
iteration 23 failed 7.776424127698992e-10 [ 3.15940990e-07 -1.17772710e-06 -4.45001745e-08 -4.69363667e-07
 -4.08362122e-07 -7.92845220e-08]
iteration 24 failed 7.776424127698992e-10 [ 3.15940990e-07 -1.17772710e-06 -4.45001745e-08 -4.69363667e-07
 -4.08362122e-07 -7.92845220e-08]
iteration 25 failed 7.776424127698992e-10 [ 3.15940990e-07 -1.17772710e-06 -4.45001745e-08 -4.69363667e-07
 -4.08362122e-07 -7.92845220e-08]
iteration 26

iteration 2 accepted 1.0152289063088837 [-0.00566805  0.01268901 -0.01648781 -0.2776994   0.58455388 -0.75773784]
iteration 3 accepted 4.239843066284753e-05 [ 3.30427209e-05 -9.72780347e-05  1.28873350e-04  1.68751783e-03
 -3.72540835e-03  4.97053725e-03]
iteration 4 accepted 5.308348431380878e-10 [ 1.00228220e-07 -1.00806222e-06  3.85600047e-08  6.67069310e-07
 -1.85256814e-06  2.42270511e-06]
iteration 5 accepted 5.274595694923484e-10 [ 1.24261756e-07 -1.02333881e-06 -2.24468805e-08  7.69157023e-07
 -1.86419857e-06  1.92399168e-06]
iteration 6 accepted 5.218576627283526e-10 [ 2.19957393e-07 -1.08641287e-06 -3.52705463e-08  5.70509316e-07
 -1.61414788e-06  1.43638161e-06]
iteration 7 accepted 5.217494000282903e-10 [ 2.19526115e-07 -1.08619838e-06 -3.53441090e-08  5.64778220e-07
 -1.60166215e-06  1.42154774e-06]
iteration 8 accepted 5.216434296403125e-10 [ 2.19069216e-07 -1.08599056e-06 -3.54182997e-08  5.59076606e-07
 -1.58930986e-06  1.40684995e-06]
iteration 9 accepted 5.21114060986

iteration 11 accepted 4.6543231732141124e-10 [-2.21986509e-07 -1.42479307e-06 -8.95083002e-08 -1.22935415e-06
 -8.90033055e-07 -3.20156469e-07]
iteration 12 accepted 4.6185024032495873e-10 [-2.35364361e-07 -1.41162376e-06 -8.76990880e-08 -1.19101002e-06
 -8.69793194e-07 -3.13816618e-07]
iteration 13 accepted 4.609522683506468e-10 [-2.66677182e-07 -1.41247826e-06 -8.95183082e-08 -1.19852024e-06
 -8.85123273e-07 -3.21235227e-07]
iteration 14 accepted 4.6091548159259035e-10 [-2.66540817e-07 -1.41230105e-06 -8.94835002e-08 -1.19797984e-06
 -8.84770210e-07 -3.21099692e-07]
iteration 15 failed 4.6091548159258993e-10 [-2.66540817e-07 -1.41230105e-06 -8.94835002e-08 -1.19797984e-06
 -8.84770210e-07 -3.21099692e-07]
iteration 16 failed 4.6091548159258993e-10 [-2.66540817e-07 -1.41230105e-06 -8.94835002e-08 -1.19797984e-06
 -8.84770210e-07 -3.21099692e-07]
iteration 17 failed 4.6091548159258993e-10 [-2.66540817e-07 -1.41230105e-06 -8.94835002e-08 -1.19797984e-06
 -8.84770210e-07 -3.21099692e-07]

In [11]:
viewer = MjViewer(dynamics.sim)
dynamics.set_state(x0)
print(dynamics.get_state())
for i in range(us.shape[0]):
    dynamics.step(us[i])
    viewer.render()

Creating window glfw
[0.         2.37780563 2.23655316 0.         0.         0.        ]


In [12]:
print(us.shape)

(187, 1)
