In [1]:
%load_ext autoreload
%autoreload 2

# %env CUDA_DEVICE_ORDER=PCI_BUS_ID
# %env CUDA_VISIBLE_DEVICES=1
# %env CUDA_LAUNCH_BLOCKING=1

In [2]:
!module load intel ffmpeg

# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [3]:
import gym
from surface_seg.envs.mcs_env import MCSEnv
import gym.wrappers
import numpy as np
import tensorforce 
import copy


In [4]:
def setup_env():
    
    Gs = {}
    Gs["G2_etas"] = np.logspace(np.log10(0.05), np.log10(5.0), num=4)
    Gs["G2_rs_s"] = [0] * 4
    Gs["G4_etas"] = [0.005]
    Gs["G4_zetas"] = [1.0]
    Gs["G4_gammas"] = [+1.0, -1]
    Gs["cutoff"] = 6.5

    G = copy.deepcopy(Gs)

    # order descriptors for simple_nn
    cutoff = G["cutoff"]
    G["G2_etas"] = [a / cutoff**2 for a in G["G2_etas"]]
    G["G4_etas"] = [a / cutoff**2 for a in G["G4_etas"]]
    descriptors = (
        G["G2_etas"],
        G["G2_rs_s"],
        G["G4_etas"],
        G["cutoff"],
        G["G4_zetas"],
        G["G4_gammas"],
    )
    
    # Set up gym
    MCS_gym = MCSEnv(fingerprints=True, descriptors=descriptors)
    
    # Wrap the gym to provide video rendering every 50 steps
    recording_MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         "./vid", 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%50==0)
    
    #Convert gym to tensorfce environment
    recording_env = tensorforce.environments.OpenAIGym(recording_MCS_gym,
                                         max_episode_timesteps=400,
                                         visualize=False)
    
    return recording_env

# Set up the gym and agent in tensorforce

In [6]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent='trpo', 
    environment=setup_env(), 
    batch_size=10, 
    learning_rate=1e-2,
    memory = 40000,
    max_episode_timesteps = 400,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.3, decay_steps=1000, decay_rate=0.5
    ))

agent_spec = agent.spec

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Run the DRL method on a single instance

In [8]:
from tensorforce.execution import Runner

runner = Runner(
    agent=agent_spec,
    environment=setup_env(),
    max_episode_timesteps=400,
)

runner.run(num_episodes=1000)
runner.run(num_episodes=100, evaluation=True)
runner.close()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Episodes:   0%|          | 0/1000 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.16 w energy 0.20
found a new local minima! distance=2.26 w energy -0.03
found a new local minima! distance=3.27 w energy 1.28
found a new local minima! distance=2.34 w energy 1.51


Episodes:   0%|          | 1/1000 [00:32, reward=3991.12, ts/ep=400, sec/ep=32.05, ms/ts=80.1, agent=33.7%]

found a new local minima! distance=0.18 w energy -0.01
found a new local minima! distance=1.45 w energy 0.11


Episodes:   0%|          | 2/1000 [00:46, reward=1963.62, ts/ep=400, sec/ep=14.75, ms/ts=36.9, agent=10.1%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.59 w energy 0.27


Episodes:   0%|          | 3/1000 [01:03, reward=1320.30, ts/ep=400, sec/ep=16.71, ms/ts=41.8, agent=8.7%] 

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.00 w energy 0.19


Episodes:   0%|          | 4/1000 [01:14, reward=1953.38, ts/ep=400, sec/ep=10.68, ms/ts=26.7, agent=14.2%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.91 w energy 0.19


Episodes:   0%|          | 5/1000 [01:25, reward=1943.49, ts/ep=400, sec/ep=11.79, ms/ts=29.5, agent=12.2%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   1%|          | 6/1000 [01:34, reward=3042.52, ts/ep=400, sec/ep=8.63, ms/ts=21.6, agent=16.7%] 

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   1%|          | 7/1000 [01:43, reward=3035.34, ts/ep=400, sec/ep=8.38, ms/ts=20.9, agent=16.8%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.43 w energy 0.11


Episodes:   1%|          | 8/1000 [01:50, reward=2046.25, ts/ep=400, sec/ep=7.77, ms/ts=19.4, agent=18.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   1%|          | 9/1000 [01:58, reward=3027.28, ts/ep=400, sec/ep=7.99, ms/ts=20.0, agent=18.1%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   1%|          | 10/1000 [02:07, reward=3030.47, ts/ep=400, sec/ep=8.84, ms/ts=22.1, agent=16.2%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.07 w energy 0.20


Episodes:   1%|          | 11/1000 [02:20, reward=1976.87, ts/ep=400, sec/ep=13.14, ms/ts=32.9, agent=47.3%]

found a new local minima! distance=0.18 w energy -0.01
found a new local minima! distance=1.45 w energy 0.11


Episodes:   1%|          | 12/1000 [02:29, reward=2038.95, ts/ep=400, sec/ep=8.67, ms/ts=21.7, agent=16.1%] 

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.15 w energy 0.20


Episodes:   1%|▏         | 13/1000 [02:37, reward=1980.04, ts/ep=400, sec/ep=7.80, ms/ts=19.5, agent=19.2%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   1%|▏         | 14/1000 [02:45, reward=3068.53, ts/ep=400, sec/ep=8.00, ms/ts=20.0, agent=17.3%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.32 w energy 0.20


Episodes:   2%|▏         | 15/1000 [02:53, reward=3048.39, ts/ep=400, sec/ep=7.94, ms/ts=19.8, agent=19.3%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   2%|▏         | 16/1000 [03:00, reward=3047.02, ts/ep=400, sec/ep=7.79, ms/ts=19.5, agent=18.9%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   2%|▏         | 17/1000 [03:08, reward=3049.43, ts/ep=400, sec/ep=8.01, ms/ts=20.0, agent=19.0%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   2%|▏         | 18/1000 [03:16, reward=3039.55, ts/ep=400, sec/ep=7.44, ms/ts=18.6, agent=19.6%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   2%|▏         | 19/1000 [03:23, reward=3037.79, ts/ep=400, sec/ep=7.55, ms/ts=18.9, agent=18.5%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   2%|▏         | 20/1000 [03:31, reward=3051.18, ts/ep=400, sec/ep=7.84, ms/ts=19.6, agent=18.3%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   2%|▏         | 21/1000 [03:45, reward=3051.59, ts/ep=400, sec/ep=13.34, ms/ts=33.3, agent=46.6%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.17 w energy 0.20


Episodes:   2%|▏         | 22/1000 [03:52, reward=1989.74, ts/ep=400, sec/ep=7.49, ms/ts=18.7, agent=18.3%] 

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   2%|▏         | 23/1000 [04:00, reward=3050.46, ts/ep=400, sec/ep=7.53, ms/ts=18.8, agent=18.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   2%|▏         | 24/1000 [04:08, reward=3045.59, ts/ep=400, sec/ep=8.17, ms/ts=20.4, agent=17.3%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   2%|▎         | 25/1000 [04:15, reward=3044.22, ts/ep=400, sec/ep=7.53, ms/ts=18.8, agent=19.3%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.04 w energy 0.20


Episodes:   3%|▎         | 26/1000 [04:23, reward=1981.36, ts/ep=400, sec/ep=7.52, ms/ts=18.8, agent=18.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   3%|▎         | 27/1000 [04:31, reward=3056.07, ts/ep=400, sec/ep=7.96, ms/ts=19.9, agent=18.5%]

found a new local minima! distance=0.12 w energy -0.01


Episodes:   3%|▎         | 28/1000 [04:37, reward=1092.89, ts/ep=400, sec/ep=6.09, ms/ts=15.2, agent=23.5%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   3%|▎         | 29/1000 [04:45, reward=3041.51, ts/ep=400, sec/ep=7.65, ms/ts=19.1, agent=19.1%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   3%|▎         | 30/1000 [04:52, reward=3037.25, ts/ep=400, sec/ep=7.59, ms/ts=19.0, agent=18.3%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   3%|▎         | 31/1000 [05:04, reward=3046.79, ts/ep=400, sec/ep=12.25, ms/ts=30.6, agent=49.6%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   3%|▎         | 32/1000 [05:13, reward=3053.10, ts/ep=400, sec/ep=8.28, ms/ts=20.7, agent=16.4%] 

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   3%|▎         | 33/1000 [05:20, reward=3040.07, ts/ep=400, sec/ep=7.65, ms/ts=19.1, agent=19.1%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   3%|▎         | 34/1000 [05:28, reward=3047.82, ts/ep=400, sec/ep=7.81, ms/ts=19.5, agent=18.6%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   4%|▎         | 35/1000 [05:36, reward=3056.56, ts/ep=400, sec/ep=8.18, ms/ts=20.4, agent=18.0%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   4%|▎         | 36/1000 [05:44, reward=3043.82, ts/ep=400, sec/ep=7.79, ms/ts=19.5, agent=18.8%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.17 w energy 0.20


Episodes:   4%|▎         | 37/1000 [05:52, reward=1987.05, ts/ep=400, sec/ep=7.68, ms/ts=19.2, agent=18.1%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   4%|▍         | 38/1000 [05:59, reward=3043.59, ts/ep=400, sec/ep=7.62, ms/ts=19.1, agent=18.8%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   4%|▍         | 39/1000 [06:07, reward=3039.07, ts/ep=400, sec/ep=7.45, ms/ts=18.6, agent=18.7%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   4%|▍         | 40/1000 [06:15, reward=3043.94, ts/ep=400, sec/ep=8.07, ms/ts=20.2, agent=18.1%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   4%|▍         | 41/1000 [06:27, reward=3042.50, ts/ep=400, sec/ep=12.50, ms/ts=31.2, agent=50.3%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   4%|▍         | 42/1000 [06:35, reward=3037.79, ts/ep=400, sec/ep=7.58, ms/ts=19.0, agent=19.0%] 

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   4%|▍         | 43/1000 [06:43, reward=3043.55, ts/ep=400, sec/ep=7.48, ms/ts=18.7, agent=18.0%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   4%|▍         | 44/1000 [06:51, reward=3051.77, ts/ep=400, sec/ep=8.32, ms/ts=20.8, agent=16.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.17 w energy 0.20


Episodes:   4%|▍         | 45/1000 [06:59, reward=1984.23, ts/ep=400, sec/ep=7.87, ms/ts=19.7, agent=18.1%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   5%|▍         | 46/1000 [07:06, reward=3038.03, ts/ep=400, sec/ep=7.46, ms/ts=18.6, agent=18.9%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.17 w energy 0.20


Episodes:   5%|▍         | 47/1000 [07:15, reward=1994.70, ts/ep=400, sec/ep=8.44, ms/ts=21.1, agent=18.3%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   5%|▍         | 48/1000 [07:22, reward=3039.66, ts/ep=400, sec/ep=7.59, ms/ts=19.0, agent=19.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.81 w energy 0.19
found a new local minima! distance=0.36 w energy 0.20


Episodes:   5%|▍         | 49/1000 [07:30, reward=3052.35, ts/ep=400, sec/ep=8.02, ms/ts=20.1, agent=18.0%]

DependencyNotInstalled: Found neither the ffmpeg nor avconv executables. On OS X, you can install ffmpeg via `brew install ffmpeg`. On most Ubuntu variants, `sudo apt-get install ffmpeg` should do it. On Ubuntu 14.04, however, you'll need to install avconv with `sudo apt-get install libav-tools`.

# Run the DRL method in parallel (multiple environments)

In [None]:
# from tensorforce.execution import Runner

# runner = Runner(
#     agent=agent_spec,
#     environments=[setup_env(), setup_env()],
#     num_parallel=2,
#     max_episode_timesteps=400,
# )

# runner.run(num_episodes=1000)
# runner.run(num_episodes=100, evaluation=True)
# runner.close()

# Example of how to render the environment in a jupyter cell

In [None]:
env = MCS_gym
from matplotlib import pyplot as plt
%matplotlib inline

from IPython import display

img = plt.imshow(env.render(mode='rgb_array')) # only call this once
plt.axis('off')
for _ in range(100):
    img.set_data(env.render(mode='rgb_array')) # just update the data
    display.display(plt.gcf())
    display.clear_output(wait=True)
    action = env.action_space.sample()
    env.step(action)