In [1]:
%matplotlib inline

In [2]:
from __future__ import print_function

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import time

In [4]:
from ilqr.cost import QRCost, FiniteDiffCost
from ilqr.mujoco_dynamics import MujocoDynamics
from ilqr.mujoco_controller import iLQR, RecedingHorizonController
from ilqr.examples.cartpole import CartpoleDynamics
from ilqr.dynamics import constrain

from scipy.optimize import approx_fprime

import mujoco_py
from mujoco_py import MjViewer
import os

%load_ext autoreload
%autoreload 2

In [5]:
def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
    J_hist.append(J_opt)
    info = "converged" if converged else ("accepted" if accepted else "failed")
    final_state = xs[-1]
    print("iteration", iteration_count, info, J_opt, final_state)

In [6]:
xml_path = os.path.join('..', 'ilqr', 'xmls', 'inverted_double_pendulum.xml')
dynamics = MujocoDynamics(xml_path, frame_skip = 4, use_multiprocessing = True)
print(dynamics.dt)

Finished loading process 69534
Finished loading process 69535
Finished loading process 69536
Finished loading process 69537
Finished loading process 69538
Finished loading process 69539
Finished loading process 69540
Finished loading process 69541
Finished loading process 69542
Finished loading process 69543
Finished loading process 69544
Finished loading process 69545
0.04
Finished loading process 69546
Finished loading process 69547
Finished loading process 69548
Finished loading process 69549


In [7]:
print(dynamics.state_size)
x_goal = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])

# Instantenous state cost.
Q = np.eye(dynamics.state_size)
Q[0, 0] = 5.0
Q[1, 1] = 50.0
Q[2, 2] = 50.0
Q[3, 3] = 20.0
Q[4, 4] = 700.0
Q[5, 5] = 700.0



# Terminal state cost.
Q_terminal = 10 * Q

# Instantaneous control cost.
R = np.eye(1)

cost1 = QRCost(Q, R, Q_terminal=Q_terminal, x_goal=x_goal)

6


In [8]:
def l(x, u, i):
    c0 = x[0] ** 2
    c1 = 10 * ((np.cos(x[1]) - 1) ** 2)
    c2 = 10 * ((np.cos(x[2]) - 1) ** 2)
    c3 = x[3] ** 2
    c4 = x[4] ** 2
    c5 = x[5] ** 2
    cu = 0.5 * u[0] ** 2
    return c0 + c1 + c2 + c3 + c4 + c5 + cu

cost2 = FiniteDiffCost(l, lambda x, i: l(x, [0.0], i), 6, 1, use_multiprocessing = True)

Finished loading process 69550
Finished loading process 69551
Finished loading process 69552
Finished loading process 69553
Finished loading process 69554
Finished loading process 69555
Finished loading process 69556
Finished loading process 69557
Finished loading process 69558
Finished loading process 69559
Finished loading process 69560
Finished loading process 69561
Finished loading process 69562
Finished loading process 69563
Finished loading process 69564
Finished loading process 69565


In [9]:
N = 100
x0 = np.array([0.0, np.random.uniform(-np.pi, np.pi), np.random.uniform(-np.pi, np.pi), 0.0, 0.0, 0.0])
#x0 = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
us_init = np.random.uniform(-1, 1, (N, dynamics.action_size))
#us_init = np.zeros((N, dynamics.action_size))
ilqr = iLQR(dynamics, cost2, N)
mpc = RecedingHorizonController(x0, ilqr)

In [None]:
t0 = time.time()
J_hist = []
controls = mpc.control(us_init, step_size = 3, initial_n_iterations = 500, subsequent_n_iterations = 100, on_iteration = on_iteration)
us = []
for i in range(30):
    print('ITERATION', i, '\n')
    if i == 29:
        us.append(next(controls)[2])
    else:
        us.append(next(controls)[1])
    
print('time', time.time() - t0)
us = np.concatenate(us)

ITERATION 0 

iteration 0 accepted 24524.71511667896 [-2.99018691e+00  1.66834928e+01  1.38567277e+01  1.12313791e-02
 -6.03490694e+00  1.32450482e+01]
iteration 1 accepted 23864.530051100683 [-2.99020930e+00  1.66762288e+01  1.38079037e+01  4.92573841e-03
 -5.93254660e+00  1.33373396e+01]
iteration 2 accepted 22417.615624606613 [-2.99035921 16.64736662 13.63347197  0.02000666 -5.44511727 13.08145136]
iteration 3 accepted 21079.95456622708 [-2.99180097 16.60510296 13.45342012  0.06416332 -4.67970007 12.3414488 ]
iteration 4 accepted 20115.07321689 [-2.99377065 16.58919695 13.26595934  0.12906241 -3.99953621 11.22744272]
iteration 5 accepted 19252.615213899815 [-2.99570597 16.57215564 13.0763072   0.18950065 -3.19036674  9.6690208 ]
iteration 6 accepted 18496.536181221374 [-2.99798533 16.5364477  12.92939433  0.25550193 -2.24604649  7.98135427]
iteration 7 accepted 18280.661653681684 [-3.00037066 16.53245767 12.88284278  0.33229797 -1.9522856   7.46272842]
iteration 8 accepted 18151.060



ITERATION 1 

iteration 0 failed 15952.490575766038 [-2.91109457 16.61770859 12.63836161  0.64892289 -0.06642203  2.46682988]
iteration 1 failed 15952.490575766038 [-2.91109457 16.61770859 12.63836161  0.64892289 -0.06642203  2.46682988]
iteration 2 accepted 15952.348812889117 [-2.91119324 16.61758895 12.63820019  0.64741872 -0.06731942  2.46526485]
iteration 3 failed 15952.348812889124 [-2.91119324 16.61758895 12.63820019  0.64741872 -0.06731942  2.46526485]
iteration 4 accepted 15952.24409285175 [-2.9113182  16.61747017 12.63799111  0.64573121 -0.06826585  2.46356111]
iteration 5 failed 15952.24409285174 [-2.9113182  16.61747017 12.63799111  0.64573121 -0.06826585  2.46356111]
iteration 6 accepted 15949.459966415847 [-2.90874633 16.63102429 12.62863662  0.67201669 -0.11959036  2.42866512]
iteration 7 failed 15949.459966415845 [-2.90874633 16.63102429 12.62863662  0.67201669 -0.11959036  2.42866512]
iteration 8 accepted 15946.404307432384 [-2.90864893 16.63091003 12.62852622  0.671592

iteration 16 accepted 15511.074665868286 [-2.60657772 16.40460391 13.20274669  1.04419597 -2.39621522  2.50157662]
iteration 17 failed 15511.074665868291 [-2.60657772 16.40460391 13.20274669  1.04419597 -2.39621522  2.50157662]
iteration 18 accepted 15510.548173387226 [-2.62658881 16.39380637 13.19047022  0.97450992 -2.40759501  2.47221601]
iteration 19 accepted 15510.385017349225 [-2.62670885 16.39357963 13.1903125   0.97394146 -2.40844988  2.47151489]
iteration 20 accepted 15510.166665938841 [-2.62687496 16.39355842 13.19017805  0.97350202 -2.40807731  2.47141689]
iteration 21 failed 15510.166665938832 [-2.62687496 16.39355842 13.19017805  0.97350202 -2.40807731  2.47141689]
iteration 22 failed 15510.166665938832 [-2.62687496 16.39355842 13.19017805  0.97350202 -2.40807731  2.47141689]
iteration 23 accepted 15509.927285257472 [-2.62704255 16.39353408 13.19004562  0.97305493 -2.40771062  2.4713133 ]
iteration 24 accepted 15509.057030857211 [-2.56887562 16.3781771  13.19908825  1.11494

iteration 47 accepted 15347.869640026995 [-2.26091029 16.17626965 13.20005667  1.62576969 -2.41945445 -0.90403123]
iteration 48 failed 15347.869640026995 [-2.26091029 16.17626965 13.20005667  1.62576969 -2.41945445 -0.90403123]
iteration 49 failed 15347.869640026995 [-2.26091029 16.17626965 13.20005667  1.62576969 -2.41945445 -0.90403123]
iteration 50 accepted 15347.756860403884 [-2.26067227 16.17632157 13.20013202  1.62637017 -2.41960164 -0.90393651]
iteration 51 accepted 15347.479792749185 [-2.26298936 16.17454316 13.19712327  1.61769701 -2.42186556 -0.9081089 ]
iteration 52 failed 15347.479792749178 [-2.26298936 16.17454316 13.19712327  1.61769701 -2.42186556 -0.9081089 ]
iteration 53 accepted 15347.164974700225 [-2.26357544 16.17458113 13.19728217  1.61660087 -2.42116771 -0.9080376 ]
iteration 54 failed 15347.164974700223 [-2.26357544 16.17458113 13.19728217  1.61660087 -2.42116771 -0.9080376 ]
iteration 55 failed 15347.164974700223 [-2.26357544 16.17458113 13.19728217  1.61660087 

In [11]:
dynamics.set_state(x0)
print(dynamics.get_state())
video = []
for i in range(us.shape[0]):
    dynamics.step(us[i])
    video.append(dynamics.sim.render(512, 512))

Creating window glfw
[0.         2.91843606 0.70713401 0.         0.         0.        ]


In [12]:
print(us.shape)

(187, 1)


In [None]:
make_video_fn(video)()