# Cartpole Problem

The state and control vectors $\textbf{x}$ and $\textbf{u}$ are defined as follows:

$$
\begin{equation*}
\textbf{x} = \begin{bmatrix}
    x & \theta & \dot{x} & \dot{\theta}
    \end{bmatrix}^T
\end{equation*}
$$

$$
\begin{equation*}
\textbf{u} = \begin{bmatrix}
    F_{x}
    \end{bmatrix}^T
\end{equation*}
$$

The goal is to swing the pendulum upright:

$$
\begin{equation*}
\textbf{x}_{goal} = \begin{bmatrix}
    0 & 0 & 0 & 0
    \end{bmatrix}^T
\end{equation*}
$$

**Note**: The force is constrained between $-1$ and $1$. This is achieved by
instead fitting for unconstrained actions and then applying it to a squashing
function $\tanh(\textbf{u})$. This is directly embedded into the dynamics model
in order to be auto-differentiated. This also means that we need to apply this
transformation manually to the output of our iLQR at the end.

In [7]:
%matplotlib inline

In [8]:
from __future__ import print_function

In [9]:
import numpy as np
import matplotlib.pyplot as plt
import time

In [10]:
from ilqr.cost import QRCost, FiniteDiffCost
from ilqr.mujoco_dynamics import MujocoDynamics
from ilqr.mujoco_controller import iLQR, RecedingHorizonController
from ilqr.examples.cartpole import CartpoleDynamics
from ilqr.dynamics import constrain

from scipy.optimize import approx_fprime

import mujoco_py
from mujoco_py import MjViewer
import os

In [11]:
def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
    J_hist.append(J_opt)
    info = "converged" if converged else ("accepted" if accepted else "failed")
    final_state = xs[-1]
    print("iteration", iteration_count, info, J_opt, final_state)

In [12]:
xml_path = os.path.join('..', 'ilqr', 'xmls', 'inverted_pendulum.xml')
dynamics = MujocoDynamics(xml_path, frame_skip = 2, use_multiprocessing = True)
print(dynamics.dt)


Finished loading process 51103
Finished loading process 51104
Finished loading process 51105
Finished loading process 51106
Finished loading process 51107
Finished loading process 51108
Finished loading process 51109
Finished loading process 51110
Finished loading process 51111
Finished loading process 51112
Finished loading process 51114
Finished loading process 51113
0.04
Finished loading process 51115
Finished loading process 51116
Finished loading process 51117
Finished loading process 51118


In [13]:
x_goal = np.array([0.0, 0.0, 0.0, 0.0])

# Instantenous state cost.
Q = np.eye(4)
Q[0, 0] = 2.0
Q[1, 1] = 10.0


# Terminal state cost.
Q_terminal = 10 * Q

# Instantaneous control cost.
R = np.eye(1)

cost1 = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=x_goal)

In [14]:
cost2 = FiniteDiffCost(lambda x, u, i: 2 * (x[0] ** 2) + 10 * (x[1] ** 2) + x[2] ** 2 + x[3] ** 2 + u[0] ** 2,
                      lambda x, i: 10 * (2 * (x[0] ** 2) + 10 * (x[1] ** 2) + x[2] ** 2 + x[3] ** 2),
                      4, 1, use_multiprocessing = True)

Finished loading process 51119
Finished loading process 51120
Finished loading process 51121
Finished loading process 51122
Finished loading process 51123
Finished loading process 51124
Finished loading process 51125
Finished loading process 51126
Finished loading process 51127
Finished loading process 51128
Finished loading process 51129
Finished loading process 51130
Finished loading process 51131
Finished loading process 51132
Finished loading process 51133
Finished loading process 51134


In [18]:
N = 100
x0 = np.array([0.0, np.random.uniform(-np.pi, np.pi), 0.0, 0.0])
"""us_init = np.array([[-4.76751939e-01],
 [ 3.34490970e-01],
 [-3.99608551e-01],
 [ 8.41882163e-01],
 [-8.93302461e-01],
 [-3.57273055e-01],
 [-3.32158856e-01],
 [-4.82030121e-01],
 [-6.84388675e-01],
 [-4.26475287e-01],
 [-4.90913171e-01],
 [ 1.14754770e-01],
 [ 3.90275383e-01],
 [-4.36421243e-01],
 [ 5.57806778e-01],
 [ 7.83813923e-01],
 [-3.27778717e-01],
 [ 8.00582346e-01],
 [-8.49640982e-01],
 [-5.69222128e-01],
 [ 2.58447724e-01],
 [ 6.02857039e-01],
 [-6.11855326e-01],
 [ 7.00853348e-01],
 [-9.31090157e-01],
 [ 4.97665652e-01],
 [ 2.45721323e-01],
 [-1.92025996e-01],
 [ 2.72219728e-02],
 [ 7.95701514e-01],
 [-8.92320606e-01],
 [ 3.22802941e-02],
 [ 2.69562194e-01],
 [-1.46125346e-01],
 [-3.15934186e-02],
 [ 6.61809200e-01],
 [ 4.76622656e-01],
 [-9.78007260e-01],
 [ 5.73481914e-01],
 [-1.28208542e-02],
 [ 1.48147746e-01],
 [ 1.39421731e-04],
 [ 1.08812740e-01],
 [ 6.16007441e-01],
 [ 2.66982969e-01],
 [-2.09250070e-02],
 [ 6.04343953e-02],
 [ 4.14836049e-01],
 [-7.01346473e-01],
 [ 2.94563133e-01],
 [-3.07180590e-01],
 [ 6.53429823e-01],
 [ 3.87696411e-01],
 [-1.60361255e-01],
 [-7.91982930e-01],
 [ 3.04331662e-01],
 [-3.33057338e-01],
 [-1.45487867e-01],
 [-4.48293362e-01],
 [-4.56753222e-01],
 [-5.63113978e-02],
 [ 9.17106858e-01],
 [-7.79117478e-01],
 [-7.74944928e-01],
 [ 1.26081663e-01],
 [ 8.11397037e-02],
 [-6.58667412e-01],
 [ 9.01877119e-01],
 [-7.59017615e-01],
 [-6.54909707e-01],
 [-7.19152458e-01],
 [-8.23250291e-01],
 [-1.96576912e-01],
 [ 3.31076346e-01],
 [-9.59322994e-01],
 [ 6.61615691e-01],
 [-4.48940253e-01],
 [-4.10547311e-01],
 [-8.26340358e-01],
 [ 7.48939731e-01],
 [-8.83894866e-01],
 [ 4.12684469e-01],
 [-4.61578622e-01],
 [-8.29689676e-01],
 [-9.02561735e-01],
 [-2.44970624e-01],
 [ 2.86652487e-01],
 [-8.59512109e-01],
 [-5.89043961e-01],
 [ 6.21286175e-01],
 [-4.02464523e-01],
 [-7.80221770e-01],
 [-7.58513349e-01],
 [ 5.35469863e-01],
 [ 7.43535637e-01],
 [ 9.40814704e-01],
 [-9.31071558e-01],
 [-4.20465454e-01],
 [-1.28056017e-01],
 [-2.09487816e-01]])"""
us_init = np.random.uniform(-1, 1, (N, dynamics.action_size))
ilqr = iLQR(dynamics, cost2, N)
mpc = RecedingHorizonController(x0, ilqr)

In [19]:
t0 = time.time()
J_hist = []
controls = mpc.control(us_init, initial_n_iterations = 500, subsequent_n_iterations = 100, on_iteration = on_iteration)
us = []
for i in range(100):
    print('ITERATION', i, '\n')
    us.append(next(controls)[1])
    
print('time', time.time() - t0)

ITERATION 0 

iteration 0 accepted 7500.881955292431 [ 0.70632104 -2.76017572  1.94147661 -0.39949183]
iteration 1 accepted 6615.558607273923 [ 1.05853522 -1.94852962  4.67713748  0.15783129]
iteration 2 accepted 6250.259391772116 [ 0.99655653 -1.999958    3.29358176 -0.80542234]
iteration 3 accepted 6085.61132242731 [ 0.97109137 -1.74907324  3.47291871 -0.22247246]
iteration 4 accepted 5926.559913386045 [ 0.97960155 -1.74393179  3.32283735 -0.41065567]
iteration 5 accepted 5741.684683776171 [ 0.94403433 -1.69839188  3.10216127 -0.70443111]
iteration 6 accepted 5629.786095618172 [ 1.22698343 -1.6038812   3.82414556 -0.4019485 ]
iteration 7 accepted 5583.061174476095 [ 1.02669007 -1.96084236  3.38960056 -1.60146464]
iteration 8 accepted 5372.287881858136 [ 0.97589383 -1.66408119  3.8939778  -0.48537742]
iteration 9 accepted 5224.542649505464 [ 1.02822369 -1.62522062  2.98084549 -0.45610531]
iteration 10 accepted 4977.98752159494 [ 1.12613325 -1.64327451  3.35442279 -0.76281238]
iteratio

iteration 0 accepted 0.021703603160587483 [-0.00016852  0.00087795 -0.01366824  0.04060538]
iteration 1 accepted 0.0031842994131480997 [ 0.0003291  -0.00045331  0.00229161  0.00223841]
iteration 2 accepted 0.0031736121523882853 [ 0.00033145 -0.00045099  0.00272992  0.00131943]
iteration 3 converged 0.0031736117496958785 [ 0.00033155 -0.00045125  0.00273228  0.00131365]
ITERATION 12 

iteration 0 accepted 0.004948762641221733 [ 0.00026218 -0.0003245  -0.00275289  0.01350746]
iteration 1 accepted 0.0031361164096616557 [ 0.00032714 -0.00044333  0.00260709  0.00160929]
iteration 2 accepted 0.003135066604446144 [ 0.00033044 -0.00045141  0.00273257  0.00131593]
iteration 3 converged 0.003135066565051358 [ 0.00033045 -0.00045142  0.0027334   0.00131414]
ITERATION 13 

iteration 0 accepted 0.003563856149502831 [ 0.00031172 -0.00033071  0.005727   -0.00471098]
iteration 1 accepted 0.00309771737655406 [ 0.00033276 -0.00046124  0.00279024  0.00116394]
iteration 2 accepted 0.0030974432636426616 [ 

iteration 2 accepted 0.0026904995208535685 [ 0.00031773 -0.00045258  0.00253503  0.00176669]
iteration 3 accepted 0.002688016926733473 [ 0.0003193  -0.00045321  0.00274462  0.00131957]
iteration 4 converged 0.0026880169253083214 [ 0.0003193  -0.00045323  0.00274476  0.00131922]
ITERATION 31 

iteration 0 accepted 1.474104432590571 [-0.00340889  0.00875751 -0.14757653  0.35053724]
iteration 1 accepted 0.0030630555346181044 [ 2.97248399e-04 -4.52894723e-04  9.92183437e-05  6.86849691e-03]
iteration 2 accepted 0.002677914838881287 [ 0.00031843 -0.00045186  0.0027307   0.00135404]
iteration 3 accepted 0.002677900292561809 [ 0.00031899 -0.00045328  0.00274506  0.00131938]
iteration 4 converged 0.0026779002925535426 [ 0.00031899 -0.00045328  0.00274508  0.00131936]
ITERATION 32 

iteration 0 accepted 0.13168948873705688 [ 0.00148641 -0.00342246  0.04681888 -0.1022172 ]
iteration 1 accepted 0.0027393186322344784 [ 0.00032963 -0.00046295  0.00385885 -0.00106425]
iteration 2 accepted 0.00266896

iteration 4 converged 0.0026302448287194653 [ 0.00031586 -0.0004538   0.00274828  0.00132079]
ITERATION 50 

iteration 0 accepted 0.5896117899233584 [-0.00203395  0.00548381 -0.09183664  0.22202394]
iteration 1 accepted 0.002888021109242536 [ 0.00029383 -0.00043341  0.00062177  0.00587217]
iteration 2 accepted 0.002631478269013804 [ 0.00031537 -0.00045281  0.00273645  0.00134913]
iteration 3 accepted 0.0026314685999083293 [ 0.00031578 -0.00045381  0.00274836  0.00132085]
iteration 4 converged 0.0026314685999033333 [ 0.00031578 -0.00045381  0.00274837  0.00132083]
ITERATION 51 

iteration 0 accepted 4.262733350088129 [ 0.00662504 -0.01612815  0.25900328 -0.59256152]
iteration 1 accepted 0.002651627421916879 [ 0.00026697 -0.00024662  0.00339173  0.00083166]
iteration 2 accepted 0.0026328317430506128 [ 0.00031629 -0.00045562  0.00274825  0.00131629]
iteration 3 converged 0.0026328308460768094 [ 0.00031569 -0.00045383  0.00274846  0.00132087]
ITERATION 52 

iteration 0 accepted 0.014644969

iteration 2 accepted 0.0026621462811463725 [ 0.00031548 -0.00045555  0.00276245  0.00128871]
iteration 3 accepted 0.002662133435095168 [ 0.00031487 -0.00045396  0.00274931  0.00132122]
iteration 4 converged 0.0026621334350877333 [ 0.00031487 -0.00045397  0.0027493   0.00132124]
ITERATION 70 

iteration 0 accepted 6.044956922946441 [-0.00719691  0.01829591 -0.30256036  0.70845127]
iteration 1 accepted 0.003068292835988401 [ 0.00042831 -0.00085272  0.00472153 -0.00446484]
iteration 2 accepted 0.002663494281774087 [ 0.00031438 -0.00045226  0.00276714  0.00128803]
iteration 3 accepted 0.0026634789156785264 [ 0.00031485 -0.00045397  0.00274933  0.00132123]
iteration 4 converged 0.0026634789156604944 [ 0.00031485 -0.00045397  0.00274932  0.00132125]
ITERATION 71 

iteration 0 accepted 0.09669594354721082 [ 0.00133243 -0.00303619  0.04032301 -0.08707525]
iteration 1 accepted 0.002716929396266933 [ 0.00032393 -0.00046113  0.0037095  -0.0007304 ]
iteration 2 accepted 0.0026647743748375178 [ 0.0

iteration 2 accepted 0.0026800622618283467 [ 0.00031521 -0.00045554  0.00276323  0.00128788]
iteration 3 accepted 0.002680048671398192 [ 0.00031461 -0.00045401  0.00274957  0.00132134]
iteration 4 converged 0.002680048671390062 [ 0.00031461 -0.00045401  0.00274956  0.00132136]
ITERATION 90 

iteration 0 accepted 15.61796558854973 [-0.01204595  0.03049737 -0.48897347  1.13677807]
iteration 1 accepted 0.05456189454624333 [ 0.00105732 -0.00247131  0.03028258 -0.06445984]
iteration 2 accepted 0.002682268164239459 [ 0.00031582 -0.00045429  0.00292484  0.00094792]
iteration 3 accepted 0.0026805378655855847 [ 0.00031461 -0.00045402  0.00274969  0.00132107]
iteration 4 converged 0.002680537864511404 [ 0.00031461 -0.00045401  0.00274957  0.00132137]
ITERATION 91 

iteration 0 accepted 0.30760432425530415 [ 0.00205488 -0.00483508  0.07077902 -0.15778435]
iteration 1 accepted 0.002833491279786401 [ 0.00033161 -0.00047091  0.00438647 -0.00218937]
iteration 2 accepted 0.0026810048965115172 [ 0.0003

In [20]:
viewer = MjViewer(dynamics.sim)
dynamics.set_state(x0)
print(dynamics.get_state())
for i, u in enumerate(us):
    dynamics.step(u[0])
    viewer.render()

Creating window glfw
[0. 0. 0. 0.]
