In [2]:
%load_ext autoreload
%autoreload 2

%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=1
%env CUDA_LAUNCH_BLOCKING=1

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=1
env: CUDA_LAUNCH_BLOCKING=1


In [16]:
!module load intel ffmpeg

/bin/sh: 1: module: not found


# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [4]:
import gym
from surface_seg.envs.mcs_env import MCSEnv
from surface_seg.utils.callback import Callback
import gym.wrappers
import numpy as np
import tensorforce 
import copy


In [5]:
def setup_env():
    
    Gs = {}
    Gs["G2_etas"] = np.logspace(np.log10(0.05), np.log10(5.0), num=4)
    Gs["G2_rs_s"] = [0] * 4
    Gs["G4_etas"] = [0.005]
    Gs["G4_zetas"] = [1.0]
    Gs["G4_gammas"] = [+1.0, -1]
    Gs["cutoff"] = 6.5

    G = copy.deepcopy(Gs)

    # order descriptors for simple_nn
    cutoff = G["cutoff"]
    G["G2_etas"] = [a / cutoff**2 for a in G["G2_etas"]]
    G["G4_etas"] = [a / cutoff**2 for a in G["G4_etas"]]
    descriptors = (
        G["G2_etas"],
        G["G2_rs_s"],
        G["G4_etas"],
        G["cutoff"],
        G["G4_zetas"],
        G["G4_gammas"],
    )
    
    # Set up gym
    MCS_gym = MCSEnv(fingerprints=True, descriptors=descriptors)
    
    # Wrap the gym to provide video rendering every 50 steps
    recording_MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         "./vid", 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%10==0)
    
    #Convert gym to tensorfce environment
    recording_env = tensorforce.environments.OpenAIGym(recording_MCS_gym,
                                         max_episode_timesteps=400,
                                         visualize=True)
    
    return recording_env

# Set up the gym and agent in tensorforce

In [6]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent='trpo', 
    environment=setup_env(), 
    batch_size=10, 
    learning_rate=1e-2,
    memory = 40000,
    max_episode_timesteps = 400,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.3, decay_steps=1000, decay_rate=0.5
    ),
    recorder = dict(
        directory = './recorder', frequency='every episode') #required for recording states and actions
    )
    

agent_spec = agent.spec

Instructions for updating:
If using Keras pass *_constraint arguments to layers.




Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Run the DRL method on a single instance

In [7]:
from tensorforce.execution import Runner
from surface_seg.utils.callback import Callback

callback = Callback('./result').episode_finish

runner = Runner(
    agent=agent_spec,
    environment=setup_env(),
    max_episode_timesteps=400,
)

runner.run(num_episodes=1000, callback=callback, callback_episode_frequency=1)
# runner.run(num_episodes=100, evaluation=True)
# runner.close()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Episodes:   0%|          | 0/1000 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.13 w energy 0.20
found a new local minima! distance=2.98 w energy 1.55


Episodes:   0%|          | 1/1000 [00:35, reward=1782.42, ts/ep=400, sec/ep=35.26, ms/ts=88.1, agent=36.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.96 w energy 0.19


Episodes:   0%|          | 2/1000 [00:53, reward=1937.48, ts/ep=400, sec/ep=18.28, ms/ts=45.7, agent=21.9%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.08 w energy 0.20
found a new local minima! distance=1.49 w energy 0.25


Episodes:   0%|          | 3/1000 [01:13, reward=2890.09, ts/ep=400, sec/ep=19.99, ms/ts=50.0, agent=19.6%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.16 w energy 0.20


Episodes:   0%|          | 4/1000 [01:31, reward=1894.10, ts/ep=400, sec/ep=17.28, ms/ts=43.2, agent=22.1%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.87 w energy 0.19


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-a6bca61ab9e9>", line 11, in <module>
    runner.run(num_episodes=1000, callback=callback, callback_episode_frequency=1)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/execution/runner.py", line 502, in run
    self.handle_act(parallel=n)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/execution/runner.py", line 530, in handle_act
    actions = self.agent.act(states=self.states[parallel], parallel=parallel)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/agents/agent.py", line 504, in act
    states=states, auxiliaries=auxiliaries, parallel=parallel, **kwargs
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tenso

KeyboardInterrupt: 

# Run the DRL method in parallel (multiple environments)

In [None]:
# from tensorforce.execution import Runner

# runner = Runner(
#     agent=agent_spec,
#     environments=[setup_env(), setup_env()],
#     num_parallel=2,
#     max_episode_timesteps=400,
# )

# runner.run(num_episodes=1000)
# runner.run(num_episodes=100, evaluation=True)
# runner.close()

# Example of how to render the environment in a jupyter cell

In [None]:
env = MCS_gym
from matplotlib import pyplot as plt
%matplotlib inline

from IPython import display

img = plt.imshow(env.render(mode='rgb_array')) # only call this once
plt.axis('off')
for _ in range(100):
    img.set_data(env.render(mode='rgb_array')) # just update the data
    display.display(plt.gcf())
    display.clear_output(wait=True)
    action = env.action_space.sample()
    env.step(action)