In [1]:
%load_ext autoreload
%autoreload 2

%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=1
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=1
env: CUDA_LAUNCH_BLOCKING=1


# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [2]:
import gym
from surface_seg.envs.mcs_env import MCSEnv
import gym.wrappers
import numpy as np
import tensorforce 
import copy


In [3]:
def setup_env():
    
    Gs = {}
    Gs["G2_etas"] = np.logspace(np.log10(0.05), np.log10(5.0), num=4)
    Gs["G2_rs_s"] = [0] * 4
    Gs["G4_etas"] = [0.005]
    Gs["G4_zetas"] = [1.0]
    Gs["G4_gammas"] = [+1.0, -1]
    Gs["cutoff"] = 6.5

    G = copy.deepcopy(Gs)

    # order descriptors for simple_nn
    cutoff = G["cutoff"]
    G["G2_etas"] = [a / cutoff**2 for a in G["G2_etas"]]
    G["G4_etas"] = [a / cutoff**2 for a in G["G4_etas"]]
    descriptors = (
        G["G2_etas"],
        G["G2_rs_s"],
        G["G4_etas"],
        G["cutoff"],
        G["G4_zetas"],
        G["G4_gammas"],
    )
    
    # Set up gym
    MCS_gym = MCSEnv(fingerprints=True, descriptors=descriptors)
    
    # Wrap the gym to provide video rendering every 50 steps
    recording_MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         "./vid", 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%50==0)
    
    #Convert gym to tensorfce environment
    recording_env = tensorforce.environments.OpenAIGym(recording_MCS_gym,
                                         max_episode_timesteps=400,
                                         visualize=False)
    
    return recording_env

# Check action and state spaces are correctly converted from OpenAIGym to Tensorforce env

In [4]:
recording_env = setup_env()
recording_env.reset()
action = recording_env.environment.action_space.sample() # Random sampling
recording_env.execute(action) # Prints (dict[state], Termination[Bool], observed reward[float]



({'fingerprints': array([[ 2.70877432,  3.2243721 ,  4.89211215,  2.5398308 ,  3.04458271,
           4.6387243 ,  1.92021011,  2.36985324,  3.65753778,  0.71798044,
           0.94719281,  1.4132759 ,  1.14141666,  4.07455698,  5.92265411,
           1.19175689,  6.76886376,  3.53520537,  0.33768746,  1.30319815,
           1.94409299,  0.52249506,  2.46357114,  1.37521526],
         [ 4.92404457,  3.60053502,  5.83458838,  4.66567603,  3.3662376 ,
           5.52105369,  3.68618549,  2.50910547,  4.3055561 ,  1.53988233,
           0.87035513,  1.55656067,  3.32924521,  6.01261925, 11.31608438,
           1.34494062,  7.47509361,  4.65369723,  1.52347078,  2.40358391,
           4.38681586,  0.5065031 ,  2.48246827,  1.80997586],
         [ 5.00674197,  4.89960533,  4.49921587,  4.73663686,  4.63287471,
           4.22632276,  3.70255547,  3.60957854,  3.21713353,  1.40949533,
           1.34553311,  1.19931568,  3.4449048 ,  9.35815552,  8.21782517,
           3.17922571,  8.0370018

# Set up the gym and agent in tensorforce

In [5]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent='trpo', 
    environment=setup_env(), 
    batch_size=10, 
    learning_rate=1e-2,
    memory = 40000,
    max_episode_timesteps = 400,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.3, decay_steps=1000, decay_rate=0.5
    ))

agent_spec = agent.spec

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Run the DRL method on a single instance

In [6]:
from tensorforce.execution import Runner

runner = Runner(
    agent=agent_spec,
    environment=setup_env(),
    max_episode_timesteps=400,
)

runner.run(num_episodes=1000)
runner.run(num_episodes=100, evaluation=True)
runner.close()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Episodes:   0%|          | 0/1000 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

found a new local minima! distance=0.08 w energy -0.01
[4.72357355] [4.58425969] [1.03038961] 0
[2.38237146] [2.53531483] [0.9396748] 0
[1.98189442] [0.95992314] [2.06463866] 0
[1.12320974] [0.477076] [2.35436227] 0
[0.36765047] [0.31032849] [1.18471388] 0
[0.31536312] [0.29157146] [1.08159807] 0
[0.28990493] [0.26958163] [1.07538829] 0
[0.12794494] [0.25725677] [0.49734335] 0
[0.07965157] [0.25326945] [0.31449342] 0
[0.10915604] [0.25807036] [0.42297006] 0
[0.01124812] [0.25700551] [0.04376606] 0
[0.01790987] [0.26201283] [0.06835495] 0
[0.01432735] [0.2619508] [0.0546948] 0
[0.02004678] [0.26687634] [0.07511636] 0
[0.00589113] [0.2668163] [0.02207934] 0
[0.01703152] [0.27179094] [0.06266405] 0
[0.00495737] [0.27173974] [0.01824309] 0
[0.03207119] [0.27656797] [0.11596133] 0
[0.01081088] [0.27643476] [0.03910826] 0
[0.02184814] [0.28056403] [0.07787222] 0
[0.00836897] [0.28043091] [0.02984325] 0
[0.02209896] [0.28329345] [0.0780073] 0
[0.01231899] [0.28310548] [0.04351377] 0
[0.024704

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-2d67cd25cc22>", line 9, in <module>
    runner.run(num_episodes=1000)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/execution/runner.py", line 469, in run
    observation = self.environments[n].receive_execute()
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 325, in receive_execute
    states, terminal, reward = self.execute(actions=self._actions)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 366, in execute
    states, terminal, reward = self.environment.execute(actions=actions)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce

KeyboardInterrupt: 

# Run the DRL method in parallel (multiple environments)

In [None]:
from tensorforce.execution import Runner

runner = Runner(
    agent=agent_spec,
    environments=[setup_env(), setup_env()],
    num_parallel=2,
    max_episode_timesteps=400,
)

runner.run(num_episodes=1000)
runner.run(num_episodes=100, evaluation=True)
runner.close()

# Example of how to render the environment in a jupyter cell

In [None]:
env = MCS_gym
from matplotlib import pyplot as plt
%matplotlib inline

from IPython import display

img = plt.imshow(env.render(mode='rgb_array')) # only call this once
plt.axis('off')
for _ in range(100):
    img.set_data(env.render(mode='rgb_array')) # just update the data
    display.display(plt.gcf())
    display.clear_output(wait=True)
    action = env.action_space.sample()
    env.step(action)