In [1]:
%load_ext autoreload
%autoreload 2
# %env CUDA_DEVICE_ORDER=PCI_BUS_ID
# %env CUDA_VISIBLE_DEVICES=1
# %env CUDA_LAUNCH_BLOCKING=1
%env MKL_NUM_THREADS=1
%env OMP_NUM_THREADS=1
%env NUMEXPR_NUM_THREADS=1
%env MKL_DEBUG_CPU_TYPE=5

env: MKL_NUM_THREADS=1
env: OMP_NUM_THREADS=1
env: NUMEXPR_NUM_THREADS=1
env: MKL_DEBUG_CPU_TYPE=5


In [2]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [3]:
import gym
from surface_seg.envs.env_for_paper import MCSEnv
from surface_seg.utils.callback_simple import Callback
from tensorforce.execution import Runner
import gym.wrappers
import numpy as np
import tensorforce
import copy
import tensorflow as tf
import os

In [4]:
timesteps = 500
save_dir = './result_multi_env/env_for_paper/'
num_parallel = 32
thermal_threshold = 3

In [5]:
def setup_env(recording=True, structure=None, structure_idx=None):
    
    # Set up gym
    MCS_gym = MCSEnv(observation_fingerprints=True, 
                     observation_forces=True,
                     permute_seed=42, 
                     save_dir = save_dir,
                     timesteps = timesteps,
                     thermal_threshold = thermal_threshold,
                     save_every_min = 1,
                     save_every = 30,
                     step_size = 0.1,                    
                    )
    
    if recording:
    # Wrap the gym to provide video rendering every 50 steps
        MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         os.path.join(save_dir, 'vid'), 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%30==0) #every 50, starting at 51
    
    #Convert gym to tensorforce environment
    env = tensorforce.environments.OpenAIGym(MCS_gym,
                                         max_episode_timesteps=timesteps,
                                         visualize=False)
    
    return env
    
env = setup_env().environment.env
print('initial energy', env.initial_energy)
print('thermal energy', env.thermal_energy)
n =thermal_threshold
print('%dKT' %n, n * env.thermal_energy)

initial energy 4.094021275192408
thermal energy 0.827232
3KT 2.481696




# Set up the gym and agent in tensorforce

In [9]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent=dict(type='trpo'),
    environment=setup_env(), 
    batch_size=1,
    learning_rate=1e-3,
    memory = 50000,
    max_episode_timesteps = timesteps,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.2, decay_steps=50000, decay_rate=0.5 #10000, 50000, 1000000
    ),
    parallel_interactions = num_parallel,

    
)
    

Instructions for updating:
If using Keras pass *_constraint arguments to layers.




Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [10]:
agent.spec

OrderedDict([('agent', 'trpo'),
             ('states',
              {'element_type': {'type': 'float', 'shape': (24,)},
               'energy': {'type': 'float', 'shape': (1,)},
               'positions': {'type': 'float', 'shape': (24,)}}),
             ('actions',
              {'atom_selection': {'type': 'int', 'shape': (), 'num_values': 4},
               'layer_selection': {'type': 'int',
                'shape': (),
                'num_values': 2},
               'movement_bottom': {'type': 'int',
                'shape': (),
                'num_values': 6},
               'movement_top': {'type': 'int', 'shape': (), 'num_values': 2}}),
             ('max_episode_timesteps', 700),
             ('batch_size', 1),
             ('network', 'auto'),
             ('use_beta_distribution', True),
             ('memory', 50000),
             ('update_frequency', None),
             ('learning_rate', 0.001),
             ('discount', 0.99),
             ('estimate_terminal', False)

# Run the DRL method in parallel (multiple environments)

In [None]:
callback = Callback(save_dir).episode_finish

runner = Runner(
    agent=agent,
    environments=[setup_env(recording=False) for _ in range(num_parallel)],
    num_parallel=num_parallel,
    remote='multiprocessing',
    max_episode_timesteps=timesteps,
)

runner.run(num_episodes=num_parallel*2000, callback=callback, callback_episode_frequency=1)
# runner.run(num_episodes=100, evaluation=True)
# runner.close()

Episodes:  12%|█▏        | 7633/64000 [4:22:49, reward=-2.83, ts/ep=510, sec/ep=82.42, ms/ts=161.6, agent=2.0%, comm=84.7%] IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Episodes:  52%|█████▏    | 33344/64000 [30:09:12, reward=0.34, ts/ep=700, sec/ep=183.43, ms/ts=262.0, agent=1.2%, comm=90.9%] 