In [2]:
%load_ext autoreload
%autoreload 2
# %env CUDA_DEVICE_ORDER=PCI_BUS_ID
# %env CUDA_VISIBLE_DEVICES=1
# %env CUDA_LAUNCH_BLOCKING=1
%env MKL_NUM_THREADS=1
%env OMP_NUM_THREADS=1
%env NUMEXPR_NUM_THREADS=1
%env MKL_DEBUG_CPU_TYPE=5

env: MKL_NUM_THREADS=1
env: OMP_NUM_THREADS=1
env: NUMEXPR_NUM_THREADS=1
env: MKL_DEBUG_CPU_TYPE=5


In [3]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [4]:
import gym
# from surface_seg.envs.mcs_env_multi_frame import MCSEnv
from surface_seg.envs.surface_seg import MCSEnv
from surface_seg.utils.callback_simple import Callback
from tensorforce.execution import Runner
import gym.wrappers
import numpy as np
import tensorforce
import copy
import tensorflow as tf
import os

In [5]:
timesteps = 500
save_dir = './result_multi_env/surface_seg/'
num_parallel = 32
thermal_threshold = 3

In [6]:
def setup_env(recording=True, structure=None, structure_idx=None):
    
    # Set up gym
    MCS_gym = MCSEnv(observation_fingerprints=True, 
                     observation_forces=True,
                     permute_seed=42, 
                     save_dir = save_dir,
                     timesteps = timesteps,
                     thermal_threshold = thermal_threshold,
                     save_every_min = 1,
                     save_every = 30,
                     step_size = 0.1,
                    )
    
    if recording:
    # Wrap the gym to provide video rendering every 50 steps
        MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         os.path.join(save_dir, 'vid'), 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%30==0) #every 50, starting at 51
    
    #Convert gym to tensorforce environment
    env = tensorforce.environments.OpenAIGym(MCS_gym,
                                         max_episode_timesteps=timesteps,
                                         visualize=False)
    
    return env
    
env = setup_env().environment.env
print('initial energy', env.initial_energy)
print('thermal energy', env.thermal_energy)
n =thermal_threshold
print('%dKT' %n, n * env.thermal_energy)

initial energy 4.094021275192408
thermal energy 0.827232
3KT 2.481696




# Set up the gym and agent in tensorforce

In [9]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent=dict(type='trpo'),
#     agent=dict(type='trpo', critic_network='auto', critic_optimizer=1.0),
    environment=setup_env(), 
    batch_size=1,
    learning_rate=1e-3,
    memory = 50000,
#     memory = dict(type='replay',capacity=10000),
    max_episode_timesteps = timesteps,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.2, decay_steps=50000, decay_rate=0.5 #10000, 1000000
    ),
    
    parallel_interactions = num_parallel,
    
)
    

Instructions for updating:
If using Keras pass *_constraint arguments to layers.




Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Run the DRL method in parallel (multiple environments)

In [None]:
callback = Callback(save_dir).episode_finish

runner = Runner(
    agent=agent,
    environments=[setup_env(recording=False) for _ in range(num_parallel)],
    num_parallel=num_parallel,
    remote='multiprocessing',
    max_episode_timesteps=timesteps,
)

runner.run(num_episodes=num_parallel*1000, callback=callback, callback_episode_frequency=1)
# runner.run(num_episodes=100, evaluation=True)
runner.close()

Episodes:  20%|█▉        | 6375/32000 [4:38:24, reward=0.65, ts/ep=500, sec/ep=104.53, ms/ts=209.1, agent=1.7%, comm=63.8%] Exception in thread Thread-2732426:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/opt/conda/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/opt/conda/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 569, in finish_execute
    self.observation = self.execute(actions=actions)
  File "/opt/conda/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 539, in execute
    states, terminal, reward, seconds = self.receive(function='execute')
  File "/opt/conda/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 500, in receive
    raise TensorforceError(message='{}: {}'.format(etype, value)).with_traceback(traceback)
TypeError: __traceback__ must be a traceback o

# Run a single Env

In [None]:
#plot_frequency --> plotting energy and trajectories frequency
callback = Callback(save_dir).episode_finish

runner2 = Runner(
    agent=agent,
    environment=setup_env(recording=False),
    max_episode_timesteps=timesteps,
)

# %prun runner.run(num_episodes=2, callback=callback, callback_episode_frequency=1)

# callback_episode_frequency --> saving results and trajs frequency
runner2.run(num_episodes=20, callback=callback, callback_episode_frequency=1)
# runner2.run(num_episodes=100, evaluation=True)
# runner2.close()

Episodes:  75%|███████▌  | 15/20 [04:52, reward=0.66, ts/ep=500, sec/ep=30.07, ms/ts=60.1, agent=3.6%] 