In [1]:
from __future__ import print_function

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import time

In [3]:
from ilqr.cost import QRCost, FiniteDiffCost
from ilqr.mujoco_dynamics import MujocoDynamics
from ilqr.mujoco_controller import iLQR, RecedingHorizonController
from ilqr.examples.cartpole import CartpoleDynamics
from ilqr.dynamics import constrain

from scipy.optimize import approx_fprime

import mujoco_py
from mujoco_py import MjViewer
import os

In [4]:
def on_iteration(iteration_count, xs, us, J_opt, accepted, converged):
    J_hist.append(J_opt)
    info = "converged" if converged else ("accepted" if accepted else "failed")
    final_state = xs[-1]
    print("iteration", iteration_count, info, J_opt, final_state)

In [5]:
xml_path = os.path.join('..', 'ilqr', 'xmls', 'reacher.xml')

#STATE: [theta1, theta2, goal_x, goal_y, theta1_dot, theta2_dot, goal_x_dot, goal_y_dot]

dynamics = MujocoDynamics(xml_path, frame_skip = 2, use_multiprocessing = True)
print('dt:', dynamics.dt)

Finished loading process 63583
Finished loading process 63584
Finished loading process 63585
Finished loading process 63586
Finished loading process 63587
Finished loading process 63588
Finished loading process 63589
Finished loading process 63590
Finished loading process 63591
Finished loading process 63592
Finished loading process 63593
Finished loading process 63594
dt: 0.02
Finished loading process 63595
Finished loading process 63596
Finished loading process 63597
Finished loading process 63598


In [6]:
def l(x, u, i):
    jointposx = 0.1 * np.cos(x[0])
    jointposy = 0.1 * np.sin(x[0])
    fingerposx = jointposx + 0.1 * np.cos(x[0] + x[1])
    fingerposy = jointposy + 0.1 * np.sin(x[0] + x[1])
    
    dist = np.sqrt((fingerposx - x[2]) ** 2 + (fingerposy - x[3]) ** 2)
    return 100 * (dist) + x[4] ** 2 + x[5] ** 2 + u[0] ** 2 + u[1] ** 2

cost2 = FiniteDiffCost(l, lambda x, i: l(x, [0.0, 0.0], i), 8, 2, use_multiprocessing = True)

Finished loading process 63599
Finished loading process 63600
Finished loading process 63601
Finished loading process 63602
Finished loading process 63603
Finished loading process 63604
Finished loading process 63605
Finished loading process 63606
Finished loading process 63607
Finished loading process 63608
Finished loading process 63609
Finished loading process 63610
Finished loading process 63611
Finished loading process 63612
Finished loading process 63613
Finished loading process 63614


In [7]:
N = 100
x0 = np.array([0.0, 0.0, np.random.uniform(-0.14, 0.14), np.random.uniform(-0.14, 0.14), 0.0, 0.0, 0.0, 0.0])
#us_init = np.random.uniform(-1, 1, (N, dynamics.action_size))
us_init = np.zeros((N, dynamics.action_size))
ilqr = iLQR(dynamics, cost2, N)
mpc = RecedingHorizonController(x0, ilqr)

In [8]:
t0 = time.time()
J_hist = []
controls = mpc.control(us_init, step_size = 1, initial_n_iterations = 500, subsequent_n_iterations = 100, on_iteration = on_iteration)
us = []
for i in range(30):
    print('ITERATION', i, '\n')
    if i == 29:
        us.append(next(controls)[2])
    else:
        us.append(next(controls)[1])
    
print('time', time.time() - t0)
us = np.concatenate(us)

ITERATION 0 

iteration 0 accepted 867.0468121153953 [-1.06381385e+00  3.11391341e-01  9.07615252e-02 -9.81575993e-02
 -6.02309206e-04  1.37790312e-03  0.00000000e+00  0.00000000e+00]
iteration 1 accepted 809.6649547314524 [-0.96715647  0.320839    0.09076153 -0.0981576  -0.00269427  0.00641625
  0.          0.        ]
iteration 2 accepted 801.6180944781063 [-0.97010413  0.31997377  0.09076153 -0.0981576  -0.00330138  0.00720174
  0.          0.        ]
iteration 3 accepted 801.6103375214775 [-0.96928111  0.31816361  0.09076153 -0.0981576  -0.00331863  0.00720761
  0.          0.        ]
iteration 4 converged 801.6103374809684 [-0.96928095  0.31816316  0.09076153 -0.0981576  -0.00331871  0.00720754
  0.          0.        ]
ITERATION 1 

iteration 0 failed 792.7666076843612 [-0.96929228  0.31818777  0.09076153 -0.0981576   0.00216711 -0.00470712
  0.          0.        ]
iteration 1 failed 792.7666076843612 [-0.96929228  0.31818777  0.09076153 -0.0981576   0.00216711 -0.00470712
  0

iteration 53 accepted 418.36033677033487 [ 1.41516538e-02 -1.67737208e+00  9.07615252e-02 -9.81575993e-02
  7.97170498e-04  9.81911243e-03  0.00000000e+00  0.00000000e+00]
iteration 54 accepted 418.28792478334776 [ 0.01418436 -1.67738259  0.09076153 -0.0981576   0.00190296  0.01037263
  0.          0.        ]
iteration 55 accepted 418.2569935243728 [ 0.01417893 -1.67738211  0.09076153 -0.0981576   0.0019322   0.01069284
  0.          0.        ]
iteration 56 accepted 418.1051857584924 [ 0.01368461 -1.67730019  0.09076153 -0.0981576   0.01422332  0.04404534
  0.          0.        ]
iteration 57 accepted 417.9942775131289 [ 0.01424678 -1.67745349  0.09076153 -0.0981576   0.02120344  0.04607486
  0.          0.        ]
iteration 58 accepted 417.9879271408014 [ 0.01417243 -1.6774136   0.09076153 -0.0981576   0.01920073  0.04307717
  0.          0.        ]
iteration 59 accepted 417.9630029093507 [ 0.01415253 -1.67738223  0.09076153 -0.0981576   0.01471969  0.0309257
  0.          0.    

iteration 8 failed 241.09101411585465 [ 0.01382882 -1.6768942   0.09076153 -0.0981576  -0.03810489  0.04251028
  0.          0.        ]
iteration 9 failed 241.09101411585465 [ 0.01382882 -1.6768942   0.09076153 -0.0981576  -0.03810489  0.04251028
  0.          0.        ]
iteration 10 failed 241.09101411585465 [ 0.01382882 -1.6768942   0.09076153 -0.0981576  -0.03810489  0.04251028
  0.          0.        ]
iteration 11 converged 241.09099644315492 [ 0.01382884 -1.67689424  0.09076153 -0.0981576  -0.03810468  0.04251004
  0.          0.        ]
ITERATION 10 

iteration 0 converged 224.03637294139003 [ 0.01268611 -1.67574308  0.09076153 -0.0981576  -0.07612371  0.07261492
  0.          0.        ]
ITERATION 11 

iteration 0 accepted 208.10121374719222 [ 0.01085615 -1.67405837  0.09076153 -0.0981576  -0.11245212  0.10141561
  0.          0.        ]
iteration 1 failed 208.1012137471922 [ 0.01085615 -1.67405837  0.09076153 -0.0981576  -0.11245212  0.10141561
  0.          0.        ]
it

iteration 14 failed 179.0020385749978 [ 1.41332598e-02 -1.67735950e+00  9.07615252e-02 -9.81575993e-02
 -3.54478319e-04 -8.78133376e-03  0.00000000e+00  0.00000000e+00]
iteration 15 failed 179.0020385749978 [ 1.41332598e-02 -1.67735950e+00  9.07615252e-02 -9.81575993e-02
 -3.54478319e-04 -8.78133376e-03  0.00000000e+00  0.00000000e+00]
iteration 16 failed 179.0020385749978 [ 1.41332598e-02 -1.67735950e+00  9.07615252e-02 -9.81575993e-02
 -3.54478319e-04 -8.78133376e-03  0.00000000e+00  0.00000000e+00]
iteration 17 failed 179.0020385749978 [ 1.41332598e-02 -1.67735950e+00  9.07615252e-02 -9.81575993e-02
 -3.54478319e-04 -8.78133376e-03  0.00000000e+00  0.00000000e+00]
iteration 18 accepted 179.00129001904665 [ 1.41478601e-02 -1.67735667e+00  9.07615252e-02 -9.81575993e-02
 -3.54065425e-04 -8.78108321e-03  0.00000000e+00  0.00000000e+00]
iteration 19 accepted 178.99928093583895 [ 1.41771249e-02 -1.67738653e+00  9.07615252e-02 -9.81575993e-02
 -3.53000329e-04 -8.78135913e-03  0.00000000e+

iteration 0 accepted 120.61242636053667 [ 0.01469439 -1.67673737  0.09076153 -0.0981576   0.0389984   0.0461982
  0.          0.        ]
iteration 1 accepted 120.61125498021912 [ 0.01457587 -1.67683781  0.09076153 -0.0981576   0.03175539  0.0392427
  0.          0.        ]
iteration 2 converged 120.61117774417662 [ 0.01456495 -1.67684889  0.09076153 -0.0981576   0.0309593   0.03848113
  0.          0.        ]
ITERATION 19 

iteration 0 accepted 110.97459266699197 [ 0.01522171 -1.67600062  0.09076153 -0.0981576   0.04848546  0.05972271
  0.          0.        ]
iteration 1 failed 110.97459266699198 [ 0.01522171 -1.67600062  0.09076153 -0.0981576   0.04848546  0.05972271
  0.          0.        ]
iteration 2 failed 110.97459266699198 [ 0.01522171 -1.67600062  0.09076153 -0.0981576   0.04848546  0.05972271
  0.          0.        ]
iteration 3 failed 110.97459266699198 [ 0.01522171 -1.67600062  0.09076153 -0.0981576   0.04848546  0.05972271
  0.          0.        ]
iteration 4 accepte

iteration 6 failed 93.27241944648428 [ 1.41563830e-02 -1.67739236e+00  9.07615252e-02 -9.81575993e-02
 -6.60931618e-05  8.26009354e-03  0.00000000e+00  0.00000000e+00]
iteration 7 accepted 93.27157879499259 [ 1.41564006e-02 -1.67739207e+00  9.07615252e-02 -9.81575993e-02
 -8.03794171e-05  8.24254951e-03  0.00000000e+00  0.00000000e+00]
iteration 8 accepted 93.27148020382236 [ 1.41563697e-02 -1.67738820e+00  9.07615252e-02 -9.81575993e-02
 -3.41337357e-04  7.89114310e-03  0.00000000e+00  0.00000000e+00]
iteration 9 accepted 93.26735197843044 [ 1.41585803e-02 -1.67737693e+00  9.07615252e-02 -9.81575993e-02
 -4.48609275e-04  7.09823184e-03  0.00000000e+00  0.00000000e+00]
iteration 10 accepted 93.26716400309745 [ 1.41585719e-02 -1.67737690e+00  9.07615252e-02 -9.81575993e-02
 -4.48427201e-04  7.09574464e-03  0.00000000e+00  0.00000000e+00]
iteration 11 accepted 93.26685815590564 [ 1.41585297e-02 -1.67737673e+00  9.07615252e-02 -9.81575993e-02
 -4.46645745e-04  7.08294912e-03  0.00000000e+

iteration 36 accepted 57.68485940674172 [ 0.0141574  -1.67738588  0.09076153 -0.0981576   0.00793363  0.02159179
  0.          0.        ]
iteration 37 failed 57.68485940674173 [ 0.0141574  -1.67738588  0.09076153 -0.0981576   0.00793363  0.02159179
  0.          0.        ]
iteration 38 failed 57.68485940674173 [ 0.0141574  -1.67738588  0.09076153 -0.0981576   0.00793363  0.02159179
  0.          0.        ]
iteration 39 failed 57.68485940674173 [ 0.0141574  -1.67738588  0.09076153 -0.0981576   0.00793363  0.02159179
  0.          0.        ]
iteration 40 failed 57.68485940674173 [ 0.0141574  -1.67738588  0.09076153 -0.0981576   0.00793363  0.02159179
  0.          0.        ]
iteration 41 failed 57.68485940674173 [ 0.0141574  -1.67738588  0.09076153 -0.0981576   0.00793363  0.02159179
  0.          0.        ]
iteration 42 failed 57.68485940674173 [ 0.0141574  -1.67738588  0.09076153 -0.0981576   0.00793363  0.02159179
  0.          0.        ]
iteration 43 failed 57.68485940674173 [

In [9]:
viewer = MjViewer(dynamics.sim)
dynamics.set_state(x0)
print(dynamics.get_state())
for i in range(50):
    viewer.render()
for i in range(us.shape[0]):
    dynamics.step(us[i])
    viewer.render()

Creating window glfw
[ 0.          0.          0.09076153 -0.0981576   0.          0.
  0.          0.        ]


In [10]:
print(l(dynamics.get_state(), np.array([0.0, 0.0]), 0))

0.009459733372342061
