In [1]:
%load_ext autoreload
%autoreload 2

# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [2]:
import gym
from surface_seg.envs.mcs_env import MCSEnv
import gym.wrappers
import numpy as np
import tensorforce 

In [3]:
def setup_env():
    
    # Define symmetry functions
    Gs = {}
    Gs["G2_etas"] = np.logspace(np.log10(0.05), np.log10(5.0), num=4)
    Gs["G2_rs_s"] = [0] * 4
    Gs["G4_etas"] = [0.005]
    Gs["G4_zetas"] = [1.0, 4.0]
    Gs["G4_gammas"] = [+1.0, -1]
    Gs["cutoff"] = 5.876798323827276  # EMT asap_cutoff: False

    # Set up gym
    MCS_gym = MCSEnv(fingerprints=True, Gs=Gs)
    
    # Wrap the gym to provide video rendering every 50 steps
    recording_MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         "./vid", 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%50==0)
    
    #Convert gym to tensorfce environment
    recording_env = tensorforce.environments.OpenAIGym(recording_MCS_gym,
                                         max_episode_timesteps=400,
                                         visualize=False)
    
    return recording_env

# Check action and state spaces are correctly converted from OpenAIGym to Tensorforce env

In [4]:
recording_env = setup_env()
recording_env.reset()
action = recording_env.environment.action_space.sample() # Random sampling
recording_env.execute(action) # Prints (dict[state], Termination[Bool], observed reward[float]

found a new local minima! distance=0.06 w energy -0.00




({'fingerprints': array([[2.05705119e+00, 3.44222291e+00, 3.98816006e+00, 1.93448392e+00,
          3.23891277e+00, 3.75741214e+00, 1.47768566e+00, 2.46896991e+00,
          2.88080825e+00, 5.18276089e-01, 8.30073498e-01, 9.87234759e-01,
          3.07025441e-01, 2.25622950e+00, 2.70624029e+00, 1.42951254e+00,
          4.51260141e+00, 1.88539979e+00, 1.21244101e-01, 8.10631261e-01,
          1.06943616e+00, 4.68847328e-01, 1.67638284e+00, 6.58763458e-01,
          1.45506226e-01, 1.16778279e+00, 1.28295355e+00, 7.33537408e-01,
          2.26734038e+00, 9.58503786e-01, 1.68633029e-02, 9.98445958e-02,
          1.12775787e-01, 4.44538547e-02, 1.88936022e-01, 8.23138680e-02],
         [3.22101893e+00, 2.23756979e+00, 4.01492378e+00, 3.04396314e+00,
          2.10878245e+00, 3.76768092e+00, 2.37012277e+00, 1.62817295e+00,
          2.83345829e+00, 8.65362677e-01, 6.09086569e-01, 8.94571498e-01,
          1.20780662e+00, 2.27653735e+00, 4.34088822e+00, 4.57655621e-01,
          2.97205129e

# Set up the gym and agent in tensorforce

In [5]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent='trpo', 
    environment=setup_env(), 
    batch_size=10, 
    learning_rate=1e-2,
    memory = 40000,
    max_episode_timesteps = 400,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.3, decay_steps=1000, decay_rate=0.5
    ))

agent_spec = agent.spec

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Run the DRL method on a single instance

In [None]:
from tensorforce.execution import Runner

runner = Runner(
    agent=agent_spec,
    environment=setup_env(),
    max_episode_timesteps=400,
)

runner.run(num_episodes=1000)
runner.run(num_episodes=100, evaluation=True)
runner.close()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Episodes:   0%|          | 0/1000 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

found a new local minima! distance=0.09 w energy -0.01
found a new local minima! distance=1.64 w energy 0.19
found a new local minima! distance=1.37 w energy 0.82


Episodes:   0%|          | 1/1000 [00:58, reward=677.66, ts/ep=400, sec/ep=58.98, ms/ts=147.5, agent=19.5%]

found a new local minima! distance=0.05 w energy -0.00
found a new local minima! distance=4.76 w energy -0.05
found a new local minima! distance=4.29 w energy -0.06
found a new local minima! distance=2.14 w energy 0.14


# Run the DRL method in parallel (multiple environments)

In [None]:
from tensorforce.execution import Runner

runner = Runner(
    agent=agent_spec,
    environments=[setup_env(), setup_env()],
    num_parallel=2,
    max_episode_timesteps=400,
)

runner.run(num_episodes=1000)
runner.run(num_episodes=100, evaluation=True)
runner.close()

# Example of how to render the environment in a jupyter cell

In [None]:
env = MCS_gym
from matplotlib import pyplot as plt
%matplotlib inline

from IPython import display

img = plt.imshow(env.render(mode='rgb_array')) # only call this once
plt.axis('off')
for _ in range(100):
    img.set_data(env.render(mode='rgb_array')) # just update the data
    display.display(plt.gcf())
    display.clear_output(wait=True)
    action = env.action_space.sample()
    env.step(action)