# Cartpole Problem

The state and control vectors $\textbf{x}$ and $\textbf{u}$ are defined as follows:

$$
\begin{equation*}
\textbf{x} = \begin{bmatrix}
    x & \theta & \dot{x} & \dot{\theta}
    \end{bmatrix}^T
\end{equation*}
$$

$$
\begin{equation*}
\textbf{u} = \begin{bmatrix}
    F_{x}
    \end{bmatrix}^T
\end{equation*}
$$

The goal is to swing the pendulum upright:

$$
\begin{equation*}
\textbf{x}_{goal} = \begin{bmatrix}
    0 & 0 & 0 & 0
    \end{bmatrix}^T
\end{equation*}
$$

**Note**: The force is constrained between $-1$ and $1$. This is achieved by
instead fitting for unconstrained actions and then applying it to a squashing
function $\tanh(\textbf{u})$. This is directly embedded into the dynamics model
in order to be auto-differentiated. This also means that we need to apply this
transformation manually to the output of our iLQR at the end.

In [1]:
%matplotlib inline

In [2]:
from __future__ import print_function

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import time

In [4]:
from ilqr.cost import QRCost, FiniteDiffCost
from ilqr.mujoco_dynamics import MujocoDynamics
from ilqr.mujoco_controller import iLQR, RecedingHorizonController
from ilqr.examples.cartpole import CartpoleDynamics
from ilqr.dynamics import constrain

from scipy.optimize import approx_fprime

import mujoco_py
from mujoco_py import MjViewer
import os

In [5]:
def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
    J_hist.append(J_opt)
    info = "converged" if converged else ("accepted" if accepted else "failed")
    final_state = xs[-1]
    print("iteration", iteration_count, info, J_opt, final_state)

In [6]:
xml_path = os.path.join('..', 'ilqr', 'xmls', 'inverted_pendulum.xml')
dynamics = MujocoDynamics(xml_path, frame_skip = 2, use_multiprocessing = True)
print(dynamics.dt)


Finished loading process 50528
Finished loading process 50529
Finished loading process 50530
Finished loading process 50531
Finished loading process 50532
Finished loading process 50533
Finished loading process 50534
Finished loading process 50535
Finished loading process 50536
Finished loading process 50537
Finished loading process 50538
Finished loading process 50539
0.04
Finished loading process 50540
Finished loading process 50541
Finished loading process 50542
Finished loading process 50543


In [7]:
x_goal = np.array([0.0, 0.0, 0.0, 0.0])

# Instantenous state cost.
Q = np.eye(4)
Q[0, 0] = 2.0
Q[1, 1] = 10.0


# Terminal state cost.
Q_terminal = 10 * Q

# Instantaneous control cost.
R = np.eye(1)

cost1 = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=x_goal)

In [8]:
cost2 = FiniteDiffCost(lambda x, u, i: 2 * (x[0] ** 2) + 10 * (x[1] ** 2) + x[2] ** 2 + x[3] ** 2 + u[0] ** 2,
                      lambda x, i: 10 * (2 * (x[0] ** 2) + 10 * (x[1] ** 2) + x[2] ** 2 + x[3] ** 2),
                      4, 1, use_multiprocessing = True)

Finished loading process 50544
Finished loading process 50545
Finished loading process 50546
Finished loading process 50547
Finished loading process 50548
Finished loading process 50549
Finished loading process 50550
Finished loading process 50551
Finished loading process 50552
Finished loading process 50553
Finished loading process 50554
Finished loading process 50555
Finished loading process 50556
Finished loading process 50557
Finished loading process 50558
Finished loading process 50559


In [9]:
N = 100
x0 = np.array([0.0, np.random.uniform(-np.pi, np.pi), 0.0, 0.0])
"""us_init = np.array([[-4.76751939e-01],
 [ 3.34490970e-01],
 [-3.99608551e-01],
 [ 8.41882163e-01],
 [-8.93302461e-01],
 [-3.57273055e-01],
 [-3.32158856e-01],
 [-4.82030121e-01],
 [-6.84388675e-01],
 [-4.26475287e-01],
 [-4.90913171e-01],
 [ 1.14754770e-01],
 [ 3.90275383e-01],
 [-4.36421243e-01],
 [ 5.57806778e-01],
 [ 7.83813923e-01],
 [-3.27778717e-01],
 [ 8.00582346e-01],
 [-8.49640982e-01],
 [-5.69222128e-01],
 [ 2.58447724e-01],
 [ 6.02857039e-01],
 [-6.11855326e-01],
 [ 7.00853348e-01],
 [-9.31090157e-01],
 [ 4.97665652e-01],
 [ 2.45721323e-01],
 [-1.92025996e-01],
 [ 2.72219728e-02],
 [ 7.95701514e-01],
 [-8.92320606e-01],
 [ 3.22802941e-02],
 [ 2.69562194e-01],
 [-1.46125346e-01],
 [-3.15934186e-02],
 [ 6.61809200e-01],
 [ 4.76622656e-01],
 [-9.78007260e-01],
 [ 5.73481914e-01],
 [-1.28208542e-02],
 [ 1.48147746e-01],
 [ 1.39421731e-04],
 [ 1.08812740e-01],
 [ 6.16007441e-01],
 [ 2.66982969e-01],
 [-2.09250070e-02],
 [ 6.04343953e-02],
 [ 4.14836049e-01],
 [-7.01346473e-01],
 [ 2.94563133e-01],
 [-3.07180590e-01],
 [ 6.53429823e-01],
 [ 3.87696411e-01],
 [-1.60361255e-01],
 [-7.91982930e-01],
 [ 3.04331662e-01],
 [-3.33057338e-01],
 [-1.45487867e-01],
 [-4.48293362e-01],
 [-4.56753222e-01],
 [-5.63113978e-02],
 [ 9.17106858e-01],
 [-7.79117478e-01],
 [-7.74944928e-01],
 [ 1.26081663e-01],
 [ 8.11397037e-02],
 [-6.58667412e-01],
 [ 9.01877119e-01],
 [-7.59017615e-01],
 [-6.54909707e-01],
 [-7.19152458e-01],
 [-8.23250291e-01],
 [-1.96576912e-01],
 [ 3.31076346e-01],
 [-9.59322994e-01],
 [ 6.61615691e-01],
 [-4.48940253e-01],
 [-4.10547311e-01],
 [-8.26340358e-01],
 [ 7.48939731e-01],
 [-8.83894866e-01],
 [ 4.12684469e-01],
 [-4.61578622e-01],
 [-8.29689676e-01],
 [-9.02561735e-01],
 [-2.44970624e-01],
 [ 2.86652487e-01],
 [-8.59512109e-01],
 [-5.89043961e-01],
 [ 6.21286175e-01],
 [-4.02464523e-01],
 [-7.80221770e-01],
 [-7.58513349e-01],
 [ 5.35469863e-01],
 [ 7.43535637e-01],
 [ 9.40814704e-01],
 [-9.31071558e-01],
 [-4.20465454e-01],
 [-1.28056017e-01],
 [-2.09487816e-01]])"""
us_init = np.random.uniform(-1, 1, (N, dynamics.action_size))
ilqr = iLQR(dynamics, cost2, N)
mpc = RecedingHorizonController(x0, ilqr)

In [10]:
t0 = time.time()
J_hist = []
controls = mpc.control(us_init, initial_n_iterations = 500, subsequent_n_iterations = 100, on_iteration = on_iteration)
us = []
for i in range(100):
    print('ITERATION', i, '\n')
    us.append(next(controls)[1])
    
print('time', time.time() - t0)

ITERATION 0 

iteration 0 accepted 12313.55825221075 [-1.63959048  2.41939722 -0.24067539 -3.93843166]
iteration 1 accepted 10947.139636138925 [-1.14241954  2.21842285 -0.80585711 -2.24172159]
iteration 2 accepted 9865.186716478182 [-1.44578773  2.17468455 -4.17344602  0.52197362]
iteration 3 accepted 9622.194405948872 [-1.23952297  2.2210536  -3.60933683  0.85115956]
iteration 4 accepted 9188.120786479485 [-1.07450579  2.20384843 -3.28654285  1.08251663]
iteration 5 accepted 9015.420948582838 [-0.98063179  2.16371664 -3.35027301  1.11619979]
iteration 6 accepted 8933.866855618293 [-0.91707207  2.10607782 -3.41191439  1.06464647]
iteration 7 accepted 8804.710980698046 [-0.87873558  2.04688733 -3.21913933  1.14358811]
iteration 8 accepted 8795.496214860292 [-0.8058331   1.9082668  -3.23002229  0.90347354]
iteration 9 accepted 8776.243518923975 [-0.8416162   1.81910206 -3.16246605  0.69527682]
iteration 10 accepted 8676.05352519567 [-0.81521251  1.80346513 -2.8670211   0.90389693]
iterat

iteration 99 accepted 2542.5132126801045 [ 0.041932    0.05953883  0.05407552 -0.04632112]
iteration 100 accepted 2476.549461540054 [ 0.03166374  0.04612657  0.04472507 -0.03506539]
iteration 101 accepted 2402.2958239661707 [ 0.02624574  0.01592197  0.00315783 -0.01562391]
iteration 102 accepted 2369.3716583444057 [-0.01283583 -0.00304114  0.01767483  0.00808041]
iteration 103 accepted 2305.902315949841 [ 0.00392162 -0.00085015  0.0021864   0.00125422]
iteration 104 accepted 2303.9732650149413 [-0.0086357  -0.00251583  0.01391144  0.00641459]
iteration 105 accepted 2283.060273024981 [-0.00626295 -0.0021971   0.01169111  0.00543468]
iteration 106 accepted 2268.975801125725 [ 0.00399274 -0.00079041  0.00198659  0.00114853]
iteration 107 accepted 2243.0490164017688 [-0.01477242 -0.00330254  0.01952961  0.00888327]
iteration 108 accepted 2229.7679100020155 [-0.03690268 -0.00631317  0.04036702  0.01808053]
iteration 109 accepted 2191.5551681983807 [-0.00113542 -0.001446    0.00659243  0.003

iteration 7 converged 1156.4340993499268 [ 0.00234005 -0.00017085 -0.00132359  0.00679368]
ITERATION 8 

iteration 0 accepted 1046.169466355628 [-0.00096446  0.007752   -0.13042301  0.3069677 ]
iteration 1 accepted 1045.247090817018 [ 0.00227233 -0.00033062 -0.00128081  0.00613108]
iteration 2 accepted 1045.0548784452308 [ 0.0022814  -0.00033245 -0.00072999  0.00495598]
iteration 3 accepted 1045.0330866795866 [ 0.00228138 -0.00033522 -0.00028922  0.0040331 ]
iteration 4 accepted 1045.0303246045776 [ 2.28390056e-03 -3.37036301e-04  5.34345333e-05  3.30928885e-03]
iteration 5 converged 1045.0299136359642 [ 0.00228515 -0.00033857  0.00032224  0.00274328]
ITERATION 9 

iteration 0 accepted 923.7093748040844 [ 0.00221811 -0.00038342  0.00640297 -0.01027717]
iteration 1 accepted 923.708244983941 [ 0.00221087 -0.00037579  0.00532338 -0.00794156]
iteration 2 converged 923.7081155563201 [ 0.00218836 -0.00035227  0.00261414 -0.00205901]
ITERATION 10 

iteration 0 accepted 801.8344819808495 [-0.0

iteration 2 accepted 42.750031040949885 [ 0.00091852 -0.0003758   0.00203334  0.0013386 ]
iteration 3 converged 42.75003010022303 [ 0.00091925 -0.00037557  0.00216282  0.00106369]
ITERATION 28 

iteration 0 accepted 36.370339453350915 [ 0.00134048 -0.00157109  0.01633384 -0.03283507]
iteration 1 accepted 36.3565347596623 [ 0.00088302 -0.0003761   0.00257918  0.00028036]
iteration 2 converged 36.35652678213764 [ 0.00088124 -0.00037942  0.00219848  0.00107295]
ITERATION 29 

iteration 0 accepted 33.188381859712294 [-0.00362024  0.01068675 -0.17900916  0.42176551]
iteration 1 accepted 31.05285167722438 [ 0.00083388 -0.00041538 -0.00031265  0.0062853 ]
iteration 2 accepted 31.052509381035428 [ 0.00084476 -0.00038114  0.00221518  0.00112447]
iteration 3 converged 31.052509368064946 [ 0.00084537 -0.00038274  0.00222839  0.00109179]
ITERATION 30 

iteration 0 accepted 26.662523559458887 [ 0.00154557 -0.00224707  0.02763993 -0.05895381]
iteration 1 accepted 26.619173362016358 [ 0.00081717 -0.0

iteration 1 accepted 0.733503310361105 [ 0.00039555 -0.00026357  0.0037178  -0.00029756]
iteration 2 accepted 0.7334563823021756 [ 0.00043017 -0.00043704  0.00263295  0.001257  ]
iteration 3 converged 0.7334563803556747 [ 0.00042955 -0.00043526  0.00263012  0.00126804]
ITERATION 53 

iteration 0 accepted 10.327707338936694 [ 0.01004208 -0.02446586  0.39024051 -0.89521724]
iteration 1 accepted 0.6139648986100175 [ 9.16267532e-05  4.89838334e-04 -7.48380543e-03  2.62184533e-02]
iteration 2 accepted 0.6065292842446496 [ 0.00042198 -0.00043745  0.00256687  0.00141964]
iteration 3 converged 0.6065290089165224 [ 0.00042208 -0.00043641  0.00263778  0.00127159]
ITERATION 54 

iteration 0 accepted 0.5151182121503516 [ 0.0008754  -0.00166612  0.01717575 -0.03366755]
iteration 1 accepted 0.5004737969980712 [ 0.00041689 -0.00043428  0.00303892  0.00045326]
iteration 2 accepted 0.500465337695028 [ 0.00041519 -0.00043774  0.00264713  0.00126954]
iteration 3 converged 0.5004653373744885 [ 0.0004151  

iteration 3 converged 0.012276819359982054 [ 0.00034256 -0.00044928  0.00272058  0.0013084 ]
ITERATION 74 

iteration 0 accepted 0.0131688256326353 [ 0.00059556 -0.00115455  0.00868195 -0.01361927]
iteration 1 accepted 0.010496114346495302 [ 0.00033992 -0.00044273  0.00289806  0.00096182]
iteration 2 accepted 0.010494551516720267 [ 0.00034079 -0.00044973  0.00272332  0.00130702]
iteration 3 converged 0.01049455145666087 [ 0.00034074 -0.00044959  0.00272248  0.00130924]
ITERATION 75 

iteration 0 accepted 1.756234750207111 [-0.00366572  0.00959327 -0.16103204  0.38185779]
iteration 1 accepted 0.009439318113694715 [ 3.20533495e-04 -4.62668799e-04  8.30596200e-05  6.79472001e-03]
iteration 2 accepted 0.009061084642565387 [ 0.00033844 -0.00044837  0.00271014  0.00134441]
iteration 3 accepted 0.009061070340534995 [ 0.00033903 -0.00044988  0.00272425  0.00131006]
iteration 4 converged 0.009061070340526587 [ 0.00033903 -0.00044988  0.00272426  0.00131003]
ITERATION 76 

iteration 0 accepted 0

iteration 3 converged 0.0029112661047969403 [ 0.00032185 -0.00045279  0.00274213  0.00131804]
ITERATION 94 

iteration 0 accepted 15.270833959089655 [ 0.01257122 -0.03103131  0.48900514 -1.12143679]
iteration 1 accepted 0.05166303936515176 [-0.0004028   0.00151373 -0.02394185  0.06511881]
iteration 2 accepted 0.0028656131676925317 [ 0.00032018 -0.00045259  0.00257093  0.00168412]
iteration 3 accepted 0.0028639525621010765 [ 0.00032137 -0.00045286  0.0027425   0.00131854]
iteration 4 converged 0.0028639525611564593 [ 0.00032137 -0.00045287  0.00274262  0.00131825]
ITERATION 95 

iteration 0 accepted 1.717284987644141 [ 0.00427705 -0.01043931  0.16484961 -0.37568588]
iteration 1 accepted 0.003192878443830693 [ 0.0003386  -0.00043892  0.00535669 -0.0041054 ]
iteration 2 accepted 0.002822823337347519 [ 0.00032151 -0.00045445  0.00275702  0.00128447]
iteration 3 accepted 0.0028228093476966275 [ 0.00032093 -0.00045294  0.00274309  0.00131843]
iteration 4 converged 0.0028228093476881234 [ 0.0

In [11]:
viewer = MjViewer(dynamics.sim)
dynamics.set_state(x0)
print(dynamics.get_state())
for i, u in enumerate(us):
    print('')
    dynamics.step(u[0])
    viewer.render()

Creating window glfw
[0.         2.33919972 0.         0.        ]




































































































