# Cartpole Problem

The state and control vectors $\textbf{x}$ and $\textbf{u}$ are defined as follows:

$$
\begin{equation*}
\textbf{x} = \begin{bmatrix}
    x & \theta & \dot{x} & \dot{\theta}
    \end{bmatrix}^T
\end{equation*}
$$

$$
\begin{equation*}
\textbf{u} = \begin{bmatrix}
    F_{x}
    \end{bmatrix}^T
\end{equation*}
$$

The goal is to swing the pendulum upright:

$$
\begin{equation*}
\textbf{x}_{goal} = \begin{bmatrix}
    0 & 0 & 0 & 0
    \end{bmatrix}^T
\end{equation*}
$$

**Note**: The force is constrained between $-1$ and $1$. This is achieved by
instead fitting for unconstrained actions and then applying it to a squashing
function $\tanh(\textbf{u})$. This is directly embedded into the dynamics model
in order to be auto-differentiated. This also means that we need to apply this
transformation manually to the output of our iLQR at the end.

In [1]:
%matplotlib inline

In [2]:
from __future__ import print_function

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import time

In [4]:
from ilqr.cost import QRCost, FiniteDiffCost
from ilqr.mujoco_dynamics import MujocoDynamics
from ilqr.mujoco_controller import iLQR, RecedingHorizonController
from ilqr.examples.cartpole import CartpoleDynamics
from ilqr.dynamics import constrain

from scipy.optimize import approx_fprime

import mujoco_py
from mujoco_py import MjViewer
import os

In [5]:
def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
    J_hist.append(J_opt)
    info = "converged" if converged else ("accepted" if accepted else "failed")
    final_state = xs[-1]
    print("iteration", iteration_count, info, J_opt, final_state)

In [6]:
xml_path = os.path.join('..', 'ilqr', 'xmls', 'inverted_pendulum.xml')
dynamics = MujocoDynamics(xml_path, frame_skip = 2, use_multiprocessing = True)
print(dynamics.dt)


Finished loading process 52767
Finished loading process 52768
Finished loading process 52769
Finished loading process 52770
Finished loading process 52771
Finished loading process 52772
Finished loading process 52773
Finished loading process 52774
Finished loading process 52775
Finished loading process 52776
Finished loading process 52777
0.04
Finished loading process 52778
Finished loading process 52779
Finished loading process 52780
Finished loading process 52781
Finished loading process 52782


In [7]:
x_goal = np.array([0.0, 0.0, 0.0, 0.0])

# Instantenous state cost.
Q = np.eye(4)
Q[0, 0] = 2.0
Q[1, 1] = 10.0


# Terminal state cost.
Q_terminal = 10 * Q

# Instantaneous control cost.
R = np.eye(1)

cost1 = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=x_goal)

In [8]:
cost2 = FiniteDiffCost(lambda x, u, i: 2 * (x[0] ** 2) + 10 * (x[1] ** 2) + x[2] ** 2 + x[3] ** 2 + u[0] ** 2,
                      lambda x, i: (2 * (x[0] ** 2) + 10 * (x[1] ** 2) + x[2] ** 2 + x[3] ** 2),
                      4, 1, use_multiprocessing = True)

Finished loading process 52783
Finished loading process 52784
Finished loading process 52785
Finished loading process 52786
Finished loading process 52787
Finished loading process 52788
Finished loading process 52789
Finished loading process 52790
Finished loading process 52791
Finished loading process 52792
Finished loading process 52793
Finished loading process 52794
Finished loading process 52795
Finished loading process 52796
Finished loading process 52797
Finished loading process 52798


In [12]:
N = 100
x0 = np.array([0.0, np.random.uniform(-np.pi, np.pi), 0.0, 0.0])
"""us_init = np.array([[-4.76751939e-01],
 [ 3.34490970e-01],
 [-3.99608551e-01],
 [ 8.41882163e-01],
 [-8.93302461e-01],
 [-3.57273055e-01],
 [-3.32158856e-01],
 [-4.82030121e-01],
 [-6.84388675e-01],
 [-4.26475287e-01],
 [-4.90913171e-01],
 [ 1.14754770e-01],
 [ 3.90275383e-01],
 [-4.36421243e-01],
 [ 5.57806778e-01],
 [ 7.83813923e-01],
 [-3.27778717e-01],
 [ 8.00582346e-01],
 [-8.49640982e-01],
 [-5.69222128e-01],
 [ 2.58447724e-01],
 [ 6.02857039e-01],
 [-6.11855326e-01],
 [ 7.00853348e-01],
 [-9.31090157e-01],
 [ 4.97665652e-01],
 [ 2.45721323e-01],
 [-1.92025996e-01],
 [ 2.72219728e-02],
 [ 7.95701514e-01],
 [-8.92320606e-01],
 [ 3.22802941e-02],
 [ 2.69562194e-01],
 [-1.46125346e-01],
 [-3.15934186e-02],
 [ 6.61809200e-01],
 [ 4.76622656e-01],
 [-9.78007260e-01],
 [ 5.73481914e-01],
 [-1.28208542e-02],
 [ 1.48147746e-01],
 [ 1.39421731e-04],
 [ 1.08812740e-01],
 [ 6.16007441e-01],
 [ 2.66982969e-01],
 [-2.09250070e-02],
 [ 6.04343953e-02],
 [ 4.14836049e-01],
 [-7.01346473e-01],
 [ 2.94563133e-01],
 [-3.07180590e-01],
 [ 6.53429823e-01],
 [ 3.87696411e-01],
 [-1.60361255e-01],
 [-7.91982930e-01],
 [ 3.04331662e-01],
 [-3.33057338e-01],
 [-1.45487867e-01],
 [-4.48293362e-01],
 [-4.56753222e-01],
 [-5.63113978e-02],
 [ 9.17106858e-01],
 [-7.79117478e-01],
 [-7.74944928e-01],
 [ 1.26081663e-01],
 [ 8.11397037e-02],
 [-6.58667412e-01],
 [ 9.01877119e-01],
 [-7.59017615e-01],
 [-6.54909707e-01],
 [-7.19152458e-01],
 [-8.23250291e-01],
 [-1.96576912e-01],
 [ 3.31076346e-01],
 [-9.59322994e-01],
 [ 6.61615691e-01],
 [-4.48940253e-01],
 [-4.10547311e-01],
 [-8.26340358e-01],
 [ 7.48939731e-01],
 [-8.83894866e-01],
 [ 4.12684469e-01],
 [-4.61578622e-01],
 [-8.29689676e-01],
 [-9.02561735e-01],
 [-2.44970624e-01],
 [ 2.86652487e-01],
 [-8.59512109e-01],
 [-5.89043961e-01],
 [ 6.21286175e-01],
 [-4.02464523e-01],
 [-7.80221770e-01],
 [-7.58513349e-01],
 [ 5.35469863e-01],
 [ 7.43535637e-01],
 [ 9.40814704e-01],
 [-9.31071558e-01],
 [-4.20465454e-01],
 [-1.28056017e-01],
 [-2.09487816e-01]])"""
us_init = np.random.uniform(-1, 1, (N, dynamics.action_size))
ilqr = iLQR(dynamics, cost2, N)
mpc = RecedingHorizonController(x0, ilqr)

In [13]:
t0 = time.time()
J_hist = []
controls = mpc.control(us_init, initial_n_iterations = 500, subsequent_n_iterations = 100, on_iteration = on_iteration)
us = []
for i in range(100):
    print('ITERATION', i, '\n')
    us.append(next(controls)[1])
    
print('time', time.time() - t0)

ITERATION 0 

iteration 0 accepted 9879.53472466532 [ 2.63860591 -2.53055171  2.74791843 -2.15852085]
iteration 1 accepted 9043.97061093862 [ 2.99916883 -2.60493817  3.36395644 -2.14119813]
iteration 2 accepted 8467.724464114683 [ 2.41734164 -2.55828318  4.49272557 -2.20674193]
iteration 3 accepted 8332.981288910449 [ 2.30222017 -2.56632028  4.49789737 -2.15852398]
iteration 4 accepted 8236.510824712748 [ 1.72843899 -2.53096855  4.70552905 -2.09390465]
iteration 5 accepted 8047.801706741236 [ 1.59837309 -2.55543942  4.69831851 -2.08992666]
iteration 6 accepted 7982.7015312643825 [ 1.37117024 -2.53626143  4.8090768  -2.15515071]
iteration 7 accepted 7903.87364910616 [ 1.29373758 -2.54242204  4.63733481 -2.08578861]
iteration 8 accepted 7900.140850442189 [ 1.16133351 -2.53773587  4.71035281 -2.12344483]
iteration 9 accepted 7848.962471075285 [ 1.22233838 -2.52405438  4.70979443 -2.14047549]
iteration 10 accepted 7733.119795163235 [ 1.13616255 -2.50657483  4.71032782 -2.16374085]
iteratio

iteration 91 accepted 3579.9227074149385 [ 0.11348378  0.00837527 -0.09806306 -0.04006118]
iteration 92 accepted 3539.3515811841426 [-0.00321121  0.00357318 -0.01564329 -0.00684208]
iteration 93 accepted 3532.079920585829 [-0.08089385  0.00030936  0.03947239  0.01538398]
iteration 94 accepted 3511.191460735846 [-0.04369756  0.00214335  0.01187409  0.00420017]
iteration 95 accepted 3507.8331062389907 [ 0.09844946  0.00820349 -0.08850543 -0.03627636]
iteration 96 accepted 3452.7416979642726 [ 0.05520825  0.00617778 -0.0567203  -0.02341232]
iteration 97 accepted 3439.162203970884 [ 0.00523687  0.0036275  -0.01912559 -0.00815824]
iteration 98 accepted 3417.639092329417 [ 0.04982311  0.00560764 -0.05128015 -0.02114767]
iteration 99 accepted 3371.361713697683 [ 0.05278621  0.00536238 -0.05152472 -0.02116745]
iteration 100 accepted 3358.556231330439 [ 0.10450916  0.00768439 -0.0889858  -0.03630689]
iteration 101 accepted 3304.7273971668315 [-0.03615563  0.00097616  0.01589736  0.00618646]
ite

iteration 180 converged 1958.9918439570515 [-0.01926452 -0.00121984  0.01967553  0.00835836]
ITERATION 1 

iteration 0 accepted 1923.8005184428391 [-0.01426859 -0.01139903  0.23781718 -0.49844441]
iteration 1 accepted 1923.7955026972393 [-0.01639497 -0.00804953  0.16183788 -0.32048036]
iteration 2 accepted 1923.1667799379195 [-0.01715257 -0.00668084  0.13270972 -0.25244733]
iteration 3 accepted 1922.8158379968777 [-0.0178413  -0.00554028  0.1085516  -0.19587786]
iteration 4 accepted 1922.7362681366208 [-0.01884438 -0.003782    0.07135527 -0.10891027]
iteration 5 accepted 1922.659579112789 [-0.01920159 -0.00326879  0.06044003 -0.08324115]
iteration 6 accepted 1922.642462839068 [-0.01976202 -0.00248797  0.04381265 -0.04409776]
iteration 7 accepted 1922.6148843906542 [-0.01995825 -0.00225983  0.03892891 -0.0325252 ]
iteration 8 accepted 1922.6122287081137 [-0.02026929 -0.00191305  0.03149396 -0.01487683]
iteration 9 accepted 1922.6004593213106 [-0.02037662 -0.00181182  0.02931077 -0.00965

iteration 2 accepted 572.7827103619437 [-0.00859613 -0.00053199  0.00027756  0.03291312]
iteration 3 accepted 572.7380573446935 [-0.00856171 -0.00064115  0.0029441   0.02699976]
iteration 4 converged 572.7377307438326 [-0.00846926 -0.00080615  0.00700738  0.01789259]
ITERATION 15 

iteration 0 accepted 462.5966087283031 [-0.00618174 -0.00474281  0.07752741 -0.14679595]
iteration 1 accepted 462.5718089089734 [-0.00746639 -0.00161127  0.02404911 -0.02217707]
iteration 2 accepted 462.56848930085806 [-0.00756546 -0.00136454  0.01921332 -0.01113753]
iteration 3 accepted 462.5670447724733 [-0.00764773 -0.00121094  0.01619843 -0.00420485]
iteration 4 accepted 462.56634039129636 [-0.00768243 -0.00112034  0.01443003 -0.00017066]
iteration 5 converged 462.56597224821326 [-0.00771237 -0.00106893  0.01341985  0.00215806]
ITERATION 16 

iteration 0 accepted 368.64011989785376 [-0.00732184  0.00046772  0.01101457  0.01234622]
iteration 1 accepted 368.63814446719954 [-0.00726856 -0.00016784  0.010984

iteration 1 accepted 29.17555981434113 [-0.00302126  0.00010798 -0.00734873  0.03794894]
iteration 2 accepted 29.17398157503368 [-0.0026803  -0.00062443  0.00687376  0.00534027]
iteration 3 converged 29.173977169102017 [-0.0026613  -0.00066605  0.00765878  0.00353564]
ITERATION 29 

iteration 0 accepted 22.99718866484528 [ 1.06659543e-04 -6.47417597e-03  1.11470903e-01 -2.36713688e-01]
iteration 1 accepted 22.92276632189754 [-0.00196913 -0.00161265  0.02623667 -0.03961723]
iteration 2 accepted 22.920297426889984 [-0.00239757 -0.00069993  0.00843521  0.00116299]
iteration 3 converged 22.920290547531586 [-0.00242134 -0.00064788  0.00745433  0.00341798]
ITERATION 30 

iteration 0 accepted 18.12285983922644 [-0.00137492 -0.0018037   0.04274641 -0.07460656]
iteration 1 accepted 18.114841007019212 [-0.00204031 -0.00100181  0.01340564 -0.01098929]
iteration 2 accepted 18.114568309990545 [-0.00218929 -0.0006462   0.00757395  0.00259434]
iteration 3 converged 18.11456754747051 [-0.00219713 -0.0

iteration 4 converged 1.348338369335988 [ 1.29133034e-05 -4.34515937e-04  5.19224253e-03  2.50121792e-03]
ITERATION 48 

iteration 0 accepted 1.2675288489882004 [ 0.00240234 -0.00771498  0.08219069 -0.18535637]
iteration 1 accepted 1.2217016685191728 [ 0.00042391 -0.00099792  0.02017136 -0.03110139]
iteration 2 accepted 1.2201825621953122 [ 9.96944877e-05 -4.72106834e-04  5.89768955e-03  6.92219732e-04]
iteration 3 accepted 1.2201783326773863 [ 8.07294157e-05 -4.28198539e-04  5.13364484e-03  2.46235882e-03]
iteration 4 converged 1.220178332474587 [ 8.06004365e-05 -4.27922895e-04  5.12827087e-03  2.47467404e-03]
ITERATION 49 

iteration 0 accepted 1.1804052554979017 [-0.00225671  0.00515919 -0.09499346  0.23372382]
iteration 1 accepted 1.1114275325454828 [-0.00029108  0.00051006 -0.01303523  0.04394614]
iteration 2 accepted 1.1091340806388779 [ 0.00012085 -0.00037121  0.00411606  0.00463909]
iteration 3 accepted 1.1091276859473733 [ 0.00014375 -0.00042137  0.00506178  0.00246499]
iterat

iteration 1 accepted 0.32868357913732815 [ 0.00126663 -0.00124615  0.02706935 -0.04827131]
iteration 2 accepted 0.32525295860201786 [ 0.00077399 -0.00042653  0.00565319 -0.00046097]
iteration 3 accepted 0.32524343794415633 [ 0.00074557 -0.00036112  0.00450602  0.00219451]
iteration 4 converged 0.3252434374877202 [ 0.00074537 -0.00036071  0.00449796  0.00221299]
ITERATION 66 

iteration 0 accepted 0.3090561793730882 [ 0.00155098 -0.00220136  0.03745166 -0.07418421]
iteration 1 accepted 0.3015466219302917 [ 0.0009106  -0.00067031  0.01053455 -0.01168168]
iteration 2 accepted 0.3012898268541194 [ 0.00077341 -0.00037551  0.00479789  0.00147115]
iteration 3 accepted 0.3012891083674991 [ 0.00076574 -0.0003587   0.0044809   0.00219991]
iteration 4 converged 0.3012891083330228 [ 0.00076568 -0.00035858  0.00447869  0.00220499]
ITERATION 67 

iteration 0 accepted 0.33172162526589716 [-0.00126798  0.00444195 -0.0817439   0.20162955]
iteration 1 accepted 0.2804673673816819 [ 0.00040893  0.00045053

iteration 4 converged 0.06756852877004287 [ 0.00096511 -0.00033743  0.00428992  0.00212654]
ITERATION 84 

iteration 0 accepted 0.07782378748048623 [ 0.00215177 -0.00247296  0.05360908 -0.10842353]
iteration 1 accepted 0.06187394836254731 [ 0.00118893 -0.00082903  0.01303054 -0.01808191]
iteration 2 accepted 0.06133144812903674 [ 0.00098238 -0.00036074  0.00474907  0.00105839]
iteration 3 accepted 0.06132993126420024 [ 0.00097128 -0.00033695  0.00428739  0.00211676]
iteration 4 converged 0.06132993119142954 [ 0.0009712  -0.00033678  0.00428418  0.00212414]
ITERATION 85 

iteration 0 accepted 0.06248630639981309 [ 0.00187221 -0.00380879  0.03183869 -0.06887987]
iteration 1 accepted 0.05585724659828714 [ 0.00110055 -0.00050024  0.01003032 -0.01049842]
iteration 2 accepted 0.05564040823910701 [ 0.00098418 -0.00035369  0.00456782  0.00144854]
iteration 3 accepted 0.055639805465751646 [ 0.00097695 -0.00033627  0.00428084  0.00211726]
iteration 4 converged 0.055639805436850306 [ 0.0009769  -

In [14]:
viewer = MjViewer(dynamics.sim)
dynamics.set_state(x0)
print(dynamics.get_state())
for i, u in enumerate(us):
    dynamics.step(u[0])
    viewer.render()

Creating window glfw
[ 0.         -1.82006901  0.          0.        ]
