In [1]:
import sys
sys.path.append('../')

In [2]:
import numpy as np
import gym
import pybullet_envs
from pilco.models import PILCO
from pilco.controllers import RbfController, LinearController
from pilco.rewards import ExponentialReward
import tensorflow as tf
from tensorflow import logging
np.random.seed(0)

def rollout(policy, timesteps):
    X = []; Y = []
    env.reset()
    x, _, _, _ = env.step([0.])
    for timestep in range(timesteps):
        # env.render()
        u = policy(x)
        x_new, _, done, _ = env.step(u)
        if done: break
        X.append(np.hstack((x, u)))
        Y.append(x_new - x)
        x = x_new
    return np.stack(X), np.stack(Y)

def random_policy(x):
    return env.action_space.sample()

def pilco_policy(x):
    return pilco.compute_action(x[None, :])[0, :]

with tf.Session(graph=tf.Graph()) as sess:
    env = gym.make('InvertedPendulumSwingupBulletEnv-v0')
    # Initial random rollouts to generate a dataset
    X,Y = rollout(policy=random_policy, timesteps=100)
    for i in range(1,3):
        X_, Y_ = rollout(policy=random_policy, timesteps=100)
        X = np.vstack((X, X_))
        Y = np.vstack((Y, Y_))


    state_dim = Y.shape[1]
    control_dim = X.shape[1] - state_dim
    controller = RbfController(state_dim=state_dim, control_dim=control_dim, num_basis_functions=5)
    #controller = LinearController(state_dim=state_dim, control_dim=control_dim)

    pilco = PILCO(X, Y, controller=controller, horizon=40)
    # Example of user provided reward function, setting a custom target state
    # R = ExponentialReward(state_dim=state_dim, t=np.array([0.1,0,0,0]))
    # pilco = PILCO(X, Y, controller=controller, horizon=40, reward=R)

    # Example of fixing a parameter, optional, for a linear controller only
    #pilco.controller.b = np.array([[0.0]])
    #pilco.controller.b.trainable = False
    import time
    head = '############################################################## '
    
    for rollouts in range(3):
        st = time.time()
        pilco.optimize_models()
        
        ms = 'pilco.optimize_models() takes '
        print(head+ms+str(time.time()-st)+'sec')
        st = time.time()

        pilco.optimize_policy()

        ms = 'pilco.optimize_policy() takes '
        print(head+ms+str(time.time()-st)+'sec')
        st = time.time()
        
        X_new, Y_new = rollout(policy=pilco_policy, timesteps=100)
        print("No of ops:", len(tf.get_default_graph().get_operations()))
        # Update dataset
        X = np.vstack((X, X_new)); Y = np.vstack((Y, Y_new))
        pilco.mgpr.set_XY(X, Y)

        ms = 'pilco.mgpr takes '
        print(head+ms+str(time.time()-st)+'sec')
        st = time.time()



INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1777.299186
  Number of iterations: 64
  Number of functions evaluations: 78


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1777.299186
  Number of iterations: 64
  Number of functions evaluations: 78


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1731.243470
  Number of iterations: 51
  Number of functions evaluations: 60


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1731.243470
  Number of iterations: 51
  Number of functions evaluations: 60


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1759.502566
  Number of iterations: 62
  Number of functions evaluations: 76


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1759.502566
  Number of iterations: 62
  Number of functions evaluations: 76


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1764.031791
  Number of iterations: 94
  Number of functions evaluations: 109


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1764.031791
  Number of iterations: 94
  Number of functions evaluations: 109


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1708.909801
  Number of iterations: 57
  Number of functions evaluations: 75


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1708.909801
  Number of iterations: 57
  Number of functions evaluations: 75


-----Learned models------
---Lengthscales---
         GP0       GP1        GP2         GP3        GP4
0  92250.883  1039.957  39880.244  127180.050  15041.078
1     12.452  1418.530  41459.459  121465.491  16879.116
2  61156.543     1.313   5437.399       0.901   6430.086
3  72522.356    12.567      0.866      28.569      1.167
4  94211.878    64.971      7.232      17.934     27.663
5     65.102    16.635     18.914      48.447     19.120
---Variances---
     GP0    GP1    GP2    GP3    GP4
0  0.015  0.972  0.001  0.029  4.107
---Noises---
         GP0        GP1        GP2        GP3        GP4
0  1.000e-06  1.000e-06  1.000e-06  1.000e-06  1.000e-06
############################################################## pilco.optimize_models() takes 10.529339075088501sec
INFO:tensorflow:Optimization terminated with:
  Message: b'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
  Objective function value: -38.836318
  Number of iterations: 50
  Number of functions evaluations: 68


INFO:tensorflow:Optimization terminated with:
  Message: b'STOP: TOTAL NO. of ITERATIONS REACHED LIMIT'
  Objective function value: -38.836318
  Number of iterations: 50
  Number of functions evaluations: 68


Controller's optimization: done in 326.7 seconds with reward=38.836.
############################################################## pilco.optimize_policy() takes 334.2228043079376sec
No of ops: 8177
############################################################## pilco.mgpr takes 3.4672091007232666sec
INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2236.901356
  Number of iterations: 54
  Number of functions evaluations: 67


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2236.901356
  Number of iterations: 54
  Number of functions evaluations: 67


INFO:tensorflow:Optimization terminated with:
  Message: b'ABNORMAL_TERMINATION_IN_LNSRCH'
  Objective function value: -1367.831685
  Number of iterations: 88
  Number of functions evaluations: 175


INFO:tensorflow:Optimization terminated with:
  Message: b'ABNORMAL_TERMINATION_IN_LNSRCH'
  Objective function value: -1367.831685
  Number of iterations: 88
  Number of functions evaluations: 175


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2329.677858
  Number of iterations: 66
  Number of functions evaluations: 73


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2329.677858
  Number of iterations: 66
  Number of functions evaluations: 73


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2112.691805
  Number of iterations: 77
  Number of functions evaluations: 90


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2112.691805
  Number of iterations: 77
  Number of functions evaluations: 90


INFO:tensorflow:Optimization terminated with:
  Message: b'ABNORMAL_TERMINATION_IN_LNSRCH'
  Objective function value: -1168.092091
  Number of iterations: 82
  Number of functions evaluations: 156


INFO:tensorflow:Optimization terminated with:
  Message: b'ABNORMAL_TERMINATION_IN_LNSRCH'
  Objective function value: -1168.092091
  Number of iterations: 82
  Number of functions evaluations: 156


-----Learned models------
---Lengthscales---
       GP0        GP1       GP2      GP3        GP4
0    0.325      0.080  9258.296    0.161      0.069
1   11.749  19924.458  9540.981  956.426   2448.983
2   76.404      0.431  1700.958    3.755      0.347
3  196.984   7892.773     0.943  457.829   1655.298
4  396.887  18484.629     5.486  103.668  25153.663
5   85.901   1264.578    16.220  422.370    766.143
---Variances---
     GP0       GP1    GP2    GP3        GP4
0  0.015  49078.37  0.001  1.488  58264.057
---Noises---
         GP0        GP1        GP2        GP3        GP4
0  1.000e-06  1.000e-06  1.000e-06  1.000e-06  1.000e-06
############################################################## pilco.optimize_models() takes 21.94959020614624sec
Randomising controller
INFO:tensorflow:Optimization terminated with:
  Message: b'ABNORMAL_TERMINATION_IN_LNSRCH'
  Objective function value: -0.705038
  Number of iterations: 10
  Number of functions evaluations: 80


INFO:tensorflow:Optimization terminated with:
  Message: b'ABNORMAL_TERMINATION_IN_LNSRCH'
  Objective function value: -0.705038
  Number of iterations: 10
  Number of functions evaluations: 80


Controller's optimization: done in 663.8 seconds with reward=0.705.
############################################################## pilco.optimize_policy() takes 678.7721111774445sec
No of ops: 8252
############################################################## pilco.mgpr takes 2.94171142578125sec
INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2835.786372
  Number of iterations: 71
  Number of functions evaluations: 85


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2835.786372
  Number of iterations: 71
  Number of functions evaluations: 85


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1950.770218
  Number of iterations: 67
  Number of functions evaluations: 117


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -1950.770218
  Number of iterations: 67
  Number of functions evaluations: 117


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2928.235174
  Number of iterations: 58
  Number of functions evaluations: 70


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2928.235174
  Number of iterations: 58
  Number of functions evaluations: 70


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2709.968603
  Number of iterations: 75
  Number of functions evaluations: 87


INFO:tensorflow:Optimization terminated with:
  Message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
  Objective function value: -2709.968603
  Number of iterations: 75
  Number of functions evaluations: 87


InvalidArgumentError: Cholesky decomposition was not successful. The input might not be valid.
	 [[Node: GPR-f1b6ae6b-106/likelihood_1/Cholesky = Cholesky[T=DT_DOUBLE, _device="/job:localhost/replica:0/task:0/device:CPU:0"](GPR-f1b6ae6b-106/likelihood_1/add_2)]]

Caused by op 'GPR-f1b6ae6b-106/likelihood_1/Cholesky', defined at:
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/lib/python3.6/asyncio/base_events.py", line 1432, in _run_once
    handle._run()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tornado/gen.py", line 781, in inner
    self.run()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2843, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2869, in _run_cell
    return runner(coro)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3044, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3209, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3291, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-2-60cee50bd4c5>", line 46, in <module>
    pilco = PILCO(X, Y, controller=controller, horizon=40)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/gpflow-1.3.0-py3.6.egg/gpflow/core/compilable.py", line 86, in __init__
    origin_init(self, *args, **kwargs)
  File "/home/data/mukai/m1_autumn/fuji/PILCO-1/pilco/models/pilco.py", line 20, in __init__
    self.mgpr = MGPR(X, Y)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/gpflow-1.3.0-py3.6.egg/gpflow/core/compilable.py", line 86, in __init__
    origin_init(self, *args, **kwargs)
  File "/home/data/mukai/m1_autumn/fuji/PILCO-1/pilco/models/mgpr.py", line 24, in __init__
    self.create_models(X, Y)
  File "/home/data/mukai/m1_autumn/fuji/PILCO-1/pilco/models/mgpr.py", line 33, in create_models
    self.models[i].clear(); self.models[i].compile()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/gpflow-1.3.0-py3.6.egg/gpflow/core/node.py", line 58, in compile
    self.build()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/gpflow-1.3.0-py3.6.egg/gpflow/core/node.py", line 156, in build
    self._build()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/gpflow-1.3.0-py3.6.egg/gpflow/models/model.py", line 79, in _build
    likelihood = self._build_likelihood()
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/gpflow-1.3.0-py3.6.egg/gpflow/decors.py", line 49, in name_scope_wrapper
    return method(*args, **kwargs)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/gpflow-1.3.0-py3.6.egg/gpflow/decors.py", line 67, in tensor_mode_wrapper
    result = method(obj, *args, **kwargs)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/gpflow-1.3.0-py3.6.egg/gpflow/models/gpr.py", line 64, in _build_likelihood
    L = tf.cholesky(K)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tensorflow/python/ops/gen_linalg_ops.py", line 412, in cholesky
    "Cholesky", input=input, name=name)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3160, in create_op
    op_def=op_def)
  File "/home/lab/shigeharu/.pyenv/versions/3.6.5/envs/365nog/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1625, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Cholesky decomposition was not successful. The input might not be valid.
	 [[Node: GPR-f1b6ae6b-106/likelihood_1/Cholesky = Cholesky[T=DT_DOUBLE, _device="/job:localhost/replica:0/task:0/device:CPU:0"](GPR-f1b6ae6b-106/likelihood_1/add_2)]]
