In [1]:
from __future__ import print_function

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import time

In [3]:
from ilqr.cost import QRCost, FiniteDiffCost
from ilqr.mujoco_dynamics import MujocoDynamics
from ilqr.mujoco_controller import iLQR, RecedingHorizonController
from ilqr.examples.cartpole import CartpoleDynamics
from ilqr.dynamics import constrain

from scipy.optimize import approx_fprime

import mujoco_py
from mujoco_py import MjViewer
import os

In [4]:
def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
    J_hist.append(J_opt)
    info = "converged" if converged else ("accepted" if accepted else "failed")
    final_state = xs[-1]
    print("iteration", iteration_count, info, J_opt, final_state)

In [5]:
xml_path = os.path.join('..', 'ilqr', 'xmls', 'reacher.xml')

#STATE: [theta1, theta2, goal_x, goal_y, theta1_dot, theta2_dot, goal_x_dot, goal_y_dot]

dynamics = MujocoDynamics(xml_path, frame_skip = 2, use_multiprocessing = True)
print('dt:', dynamics.dt)

Finished loading process 69465
Finished loading process 69466
Finished loading process 69467
Finished loading process 69468
Finished loading process 69469
Finished loading process 69470
Finished loading process 69472
Finished loading process 69474
Finished loading process 69471
Finished loading process 69475
Finished loading process 69473
Finished loading process 69476
dt: 0.02
Finished loading process 69477
Finished loading process 69478
Finished loading process 69479
Finished loading process 69480


In [6]:
def l(x, u, i):
    jointposx = 0.1 * np.cos(x[0])
    jointposy = 0.1 * np.sin(x[0])
    fingerposx = jointposx + 0.1 * np.cos(x[0] + x[1])
    fingerposy = jointposy + 0.1 * np.sin(x[0] + x[1])
    
    dist = np.sqrt((fingerposx - x[2]) ** 2 + (fingerposy - x[3]) ** 2)
    return 100 * (dist) + x[4] ** 2 + x[5] ** 2 + u[0] ** 2 + u[1] ** 2

cost2 = FiniteDiffCost(l, lambda x, i: l(x, [0.0, 0.0], i), 8, 2, use_multiprocessing = True)

Finished loading process 69481
Finished loading process 69482
Finished loading process 69483
Finished loading process 69484
Finished loading process 69485
Finished loading process 69486
Finished loading process 69487
Finished loading process 69488
Finished loading process 69489
Finished loading process 69490
Finished loading process 69491
Finished loading process 69492
Finished loading process 69493
Finished loading process 69494
Finished loading process 69495
Finished loading process 69496


In [7]:
N = 100
x0 = np.array([0.0, 0.0, np.random.uniform(-0.14, 0.14), np.random.uniform(-0.14, 0.14), 0.0, 0.0, 0.0, 0.0])
#us_init = np.random.uniform(-1, 1, (N, dynamics.action_size))
us_init = np.zeros((N, dynamics.action_size))
ilqr = iLQR(dynamics, cost2, N)
mpc = RecedingHorizonController(x0, ilqr)

In [8]:
t0 = time.time()
J_hist = []
controls = mpc.control(us_init, step_size = 1, initial_n_iterations = 500, subsequent_n_iterations = 100, on_iteration = on_iteration)
us = []
for i in range(30):
    print('ITERATION', i, '\n')
    if i == 29:
        us.append(next(controls)[2])
    else:
        us.append(next(controls)[1])
    
print('time', time.time() - t0)
us = np.concatenate(us)

ITERATION 0 

iteration 0 failed 2595.8140542313818 [ 0.          0.         -0.05148809 -0.05299569  0.          0.
  0.          0.        ]
iteration 1 failed 2595.8140542313818 [ 0.          0.         -0.05148809 -0.05299569  0.          0.
  0.          0.        ]
iteration 2 failed 2595.8140542313818 [ 0.          0.         -0.05148809 -0.05299569  0.          0.
  0.          0.        ]
iteration 3 accepted 2207.4613707824305 [-0.72680002 -0.57103418 -0.05148809 -0.05299569  0.00708862  0.00556346
  0.          0.        ]
iteration 4 accepted 1551.90184415982 [-2.46912759 -2.99987227 -0.05148809 -0.05299569  0.02399378  0.02899678
  0.          0.        ]
iteration 5 accepted 1469.9294945795323 [-2.1479523  -2.99973501 -0.05148809 -0.05299569  0.02076612  0.02919234
  0.          0.        ]
iteration 6 accepted 1451.25608413385 [-2.07913592 -2.99934995 -0.05148809 -0.05299569  0.02012607  0.02987773
  0.          0.        ]
iteration 7 accepted 1342.5666774547224 [-1.621

iteration 2 failed 950.6459054515132 [-1.14942467 -2.38471092 -0.05148809 -0.05299569 -0.0039896   0.00344003
  0.          0.        ]
iteration 3 failed 950.6459054515132 [-1.14942467 -2.38471092 -0.05148809 -0.05299569 -0.0039896   0.00344003
  0.          0.        ]
iteration 4 accepted 950.6442665686973 [-1.14943708 -2.38475595 -0.05148809 -0.05299569 -0.00398825  0.00344026
  0.          0.        ]
iteration 5 converged 950.6438065584878 [-1.14942763 -2.38472698 -0.05148809 -0.05299569 -0.00398537  0.00343612
  0.          0.        ]
ITERATION 2 

iteration 0 failed 908.2689841494661 [-1.14953178 -2.38463251 -0.05148809 -0.05299569 -0.00642152  0.00600317
  0.          0.        ]
iteration 1 failed 908.2689841494661 [-1.14953178 -2.38463251 -0.05148809 -0.05299569 -0.00642152  0.00600317
  0.          0.        ]
iteration 2 failed 908.2689841494661 [-1.14953178 -2.38463251 -0.05148809 -0.05299569 -0.00642152  0.00600317
  0.          0.        ]
iteration 3 converged 908.268

iteration 3 accepted 503.7210638887303 [-1.14897663e+00 -2.38481963e+00 -5.14880931e-02 -5.29956931e-02
  8.01859774e-02  2.09634922e-04  0.00000000e+00  0.00000000e+00]
iteration 4 accepted 503.7128342578108 [-1.14897875e+00 -2.38482075e+00 -5.14880931e-02 -5.29956931e-02
  7.82468352e-02  1.16637487e-04  0.00000000e+00  0.00000000e+00]
iteration 5 failed 503.7128342578109 [-1.14897875e+00 -2.38482075e+00 -5.14880931e-02 -5.29956931e-02
  7.82468352e-02  1.16637487e-04  0.00000000e+00  0.00000000e+00]
iteration 6 failed 503.7128342578109 [-1.14897875e+00 -2.38482075e+00 -5.14880931e-02 -5.29956931e-02
  7.82468352e-02  1.16637487e-04  0.00000000e+00  0.00000000e+00]
iteration 7 failed 503.7128342578109 [-1.14897875e+00 -2.38482075e+00 -5.14880931e-02 -5.29956931e-02
  7.82468352e-02  1.16637487e-04  0.00000000e+00  0.00000000e+00]
iteration 8 accepted 503.7065898218036 [-1.14897567e+00 -2.38482068e+00 -5.14880931e-02 -5.29956931e-02
  7.81800589e-02  1.07100439e-04  0.00000000e+00  0.

iteration 16 failed 364.79803947213213 [-1.14715483 -2.38434418 -0.05148809 -0.05299569  0.19147777  0.05357383
  0.          0.        ]
iteration 17 converged 364.79794356547393 [-1.14715675 -2.38434426 -0.05148809 -0.05299569  0.19144615  0.05356225
  0.          0.        ]
ITERATION 16 

iteration 0 failed 334.55349987286115 [-1.14168333 -2.38269168 -0.05148809 -0.05299569  0.35534952  0.11150294
  0.          0.        ]
iteration 1 failed 334.55349987286115 [-1.14168333 -2.38269168 -0.05148809 -0.05299569  0.35534952  0.11150294
  0.          0.        ]
iteration 2 failed 334.55349987286115 [-1.14168333 -2.38269168 -0.05148809 -0.05299569  0.35534952  0.11150294
  0.          0.        ]
iteration 3 accepted 334.52201848585753 [-1.14176928 -2.38271857 -0.05148809 -0.05299569  0.35520549  0.11145036
  0.          0.        ]
iteration 4 accepted 334.5186033552545 [-1.142387   -2.38292791 -0.05148809 -0.05299569  0.35387726  0.11096728
  0.          0.        ]
iteration 5 accept

iteration 7 accepted 186.07963554208752 [-1.14973182 -2.3849804  -0.05148809 -0.05299569 -0.0443262  -0.03265756
  0.          0.        ]
iteration 8 accepted 186.0792368554574 [-1.14973833 -2.38498457 -0.05148809 -0.05299569 -0.04428318 -0.03260584
  0.          0.        ]
iteration 9 accepted 186.07902727615752 [-1.14976935 -2.38500433 -0.05148809 -0.05299569 -0.04401597 -0.03216854
  0.          0.        ]
iteration 10 failed 186.07902727615752 [-1.14976935 -2.38500433 -0.05148809 -0.05299569 -0.04401597 -0.03216854
  0.          0.        ]
iteration 11 failed 186.07902727615752 [-1.14976935 -2.38500433 -0.05148809 -0.05299569 -0.04401597 -0.03216854
  0.          0.        ]
iteration 12 failed 186.07902727615752 [-1.14976935 -2.38500433 -0.05148809 -0.05299569 -0.04401597 -0.03216854
  0.          0.        ]
iteration 13 failed 186.07902727615752 [-1.14976935 -2.38500433 -0.05148809 -0.05299569 -0.04401597 -0.03216854
  0.          0.        ]
iteration 14 failed 186.07902727

iteration 12 failed 132.49345257279842 [-1.14946526 -2.38468056 -0.05148809 -0.05299569 -0.09645947 -0.07601168
  0.          0.        ]
iteration 13 accepted 132.4776918734596 [-1.14944844 -2.38478154 -0.05148809 -0.05299569 -0.09618857 -0.07585286
  0.          0.        ]
iteration 14 accepted 132.47132745625842 [-1.14945641 -2.3848278  -0.05148809 -0.05299569 -0.09291589 -0.07283301
  0.          0.        ]
iteration 15 accepted 132.46090248023557 [-1.14945678 -2.38482776 -0.05148809 -0.05299569 -0.09277289 -0.07271454
  0.          0.        ]
iteration 16 accepted 132.45635204989435 [-1.1494561  -2.38482827 -0.05148809 -0.05299569 -0.09082782 -0.07073097
  0.          0.        ]
iteration 17 accepted 132.45615573543364 [-1.14945569 -2.38482837 -0.05148809 -0.05299569 -0.08985999 -0.06970648
  0.          0.        ]
iteration 18 failed 132.45615573543375 [-1.14945569 -2.38482837 -0.05148809 -0.05299569 -0.08985999 -0.06970648
  0.          0.        ]
iteration 19 failed 132.4

iteration 3 accepted 103.20375565862187 [-1.14890007 -2.38452318 -0.05148809 -0.05299569  0.01965609  0.01815591
  0.          0.        ]
iteration 4 failed 103.20375565862192 [-1.14890007 -2.38452318 -0.05148809 -0.05299569  0.01965609  0.01815591
  0.          0.        ]
iteration 5 failed 103.20375565862192 [-1.14890007 -2.38452318 -0.05148809 -0.05299569  0.01965609  0.01815591
  0.          0.        ]
iteration 6 failed 103.20375565862192 [-1.14890007 -2.38452318 -0.05148809 -0.05299569  0.01965609  0.01815591
  0.          0.        ]
iteration 7 failed 103.20375565862192 [-1.14890007 -2.38452318 -0.05148809 -0.05299569  0.01965609  0.01815591
  0.          0.        ]
iteration 8 failed 103.20375565862192 [-1.14890007 -2.38452318 -0.05148809 -0.05299569  0.01965609  0.01815591
  0.          0.        ]
iteration 9 accepted 103.20309950308278 [-1.14890076 -2.38452361 -0.05148809 -0.05299569  0.01965442  0.01815439
  0.          0.        ]
iteration 10 converged 103.2030794976

In [10]:
viewer = MjViewer(dynamics.sim)
dynamics.set_state(x0)
print(dynamics.get_state())
for i in range(25):
    viewer.render()
for i in range(us.shape[0]):
    dynamics.step(us[i])
    viewer.render()

Creating window glfw
[ 0.          0.         -0.05148809 -0.05299569  0.          0.
  0.          0.        ]


In [10]:
print(l(dynamics.get_state(), np.array([0.0, 0.0]), 0))

0.009459733372342061
