In [1]:
%load_ext autoreload
%autoreload 2
# %env CUDA_DEVICE_ORDER=PCI_BUS_ID
# %env CUDA_VISIBLE_DEVICES=1
# %env CUDA_LAUNCH_BLOCKING=1
%env MKL_NUM_THREADS=1
%env OMP_NUM_THREADS=1
%env NUMEXPR_NUM_THREADS=1
%env MKL_DEBUG_CPU_TYPE=5

env: MKL_NUM_THREADS=1
env: OMP_NUM_THREADS=1
env: NUMEXPR_NUM_THREADS=1
env: MKL_DEBUG_CPU_TYPE=5


# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [2]:
import gym
from surface_seg.envs.mcs_env import MCSEnv
from surface_seg.utils.callback_simple import Callback
from tensorforce.execution import Runner
import gym.wrappers
import numpy as np
import tensorforce
import copy
import tensorflow as tf
import os

In [3]:
timesteps=500
save_dir = './result_single_env/test/'
multi_env = False

In [4]:
def setup_env(recording=True):
    
    # Set up gym
    MCS_gym = MCSEnv(observation_fingerprints=False, 
                     observation_forces=False,
                     permute_seed=42,
                     save_dir = save_dir,
                     timesteps = timesteps,
                     save_every = 1,
                     plot_every = 1,
                     multi_env = multi_env
                    )
    
    if recording:
    # Wrap the gym to provide video rendering every 50 steps
        MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         os.path.join(save_dir, 'vid'), 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%2==0) #every 50, starting at 51
    
    #Convert gym to tensorforce environment
    env = tensorforce.environments.OpenAIGym(MCS_gym,
                                         max_episode_timesteps=timesteps,
                                         visualize=False)
    
    return env
    
env = setup_env().environment.env
print('initial energy', env.initial_energy)
print('thermal energy', env.thermal_energy)
n = 3
print('%dKT' %n, n * env.thermal_energy)

initial energy 4.100605630583811
thermal energy 0.827232
3KT 2.481696




# Set up the gym and agent in tensorforce

In [5]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent='trpo', 
    environment=setup_env(), 
    batch_size=1, 
    learning_rate=1e-3,
    memory = 40000,
    max_episode_timesteps = timesteps,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.2, decay_steps=300000, decay_rate=0.5
    ),
    recorder = dict(
        directory = os.path.join(save_dir, 'recorder'), frequency=1), #required for recording states and actions
    summarizer = dict(
        directory = os.path.join(save_dir, 'tb'), labels='all', frequency=1, #Tensorboard summarizer
    )
)
    

agent_spec = agent.spec

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Run the DRL method using single environment

In [6]:
#plot_frequency --> plotting energy and trajectories frequency
callback = Callback(save_dir).episode_finish

runner2 = Runner(
    agent=agent,
    environment=setup_env(recording=True),
    max_episode_timesteps=timesteps,
)

# %prun runner.run(num_episodes=2, callback=callback, callback_episode_frequency=1)

# callback_episode_frequency --> saving results and trajs frequency
runner2.run(num_episodes=2, callback=callback, callback_episode_frequency=1)
# runner2.run(num_episodes=100, evaluation=True)
runner2.close()

Episodes:   0%|          | 0/2 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

min True 0.028352301050368567 -0.007047875558818362
min True 0.028352301050403206 -0.007047875558839678
min True 0.4395465996163317 -0.007311387464422836
min True 0.4395465996163317 -0.007311387464422836
min True 1.1375624156738269 -0.006913680929223176
min True 1.45660321014301 -0.007125772474327441
min True 1.45660321014301 -0.0071257724743203354
min True 1.884962231281488 -0.007174834732653679
min True 1.884962231281488 -0.007174834732653679
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.880750883903646 -0.007008543029082048
min True 2.8

min True 10.209155913874776 -0.007282478164757578
min True 10.209155913874776 -0.007282478164757578
min True 10.209155913874776 -0.007282478164757578
min True 9.56845064247045 -0.007149144011318498
min True 10.48717318050591 -0.0072490578673631845
min True 10.371557228730904 -0.007324904818018396
min True 10.371557228730907 -0.007324904818034383
min True 10.122594079048511 -0.007063592683643094
min True 9.724243308078897 -0.00712459048737113
min True 9.724243308078897 -0.00712459048737113
min True 9.724243308078897 -0.00712459048737113
min True 9.724243308078897 -0.00712459048737113
min True 9.994089908129443 -0.0070286371579939555


Episodes:  50%|█████     | 1/2 [00:35, reward=-3912.55, ts/ep=500, sec/ep=35.03, ms/ts=70.1, agent=26.9%]

min True 0.0 -0.00669472427538409
min True 0.04487665837263499 -0.007332755061909779
min True 0.8429621021139724 -0.00717526294284454
min True 0.8429621021139724 -0.00717526294284454
min True 0.9790576953408356 -0.007084753905050256
min True 1.1370490843464385 -0.007292222350930189
min True 1.3416283218092317 -0.007155218875385394
min True 1.3416283218092317 -0.007155218875385394
min True 1.6480942678769281 -0.0068876551497840666
min True 1.6480942678769281 -0.0068876551497840666
min True 1.6480942678769281 -0.0068876551497840666
min True 1.6480942678769281 -0.0068876551497840666
min True 1.8532287842080342 -0.006578478378343355
min True 1.8532287842080342 -0.006578478378343355
min True 1.3617336069199695 -0.006991955746508793
min True 1.7542054380483698 -0.00707941136199608
min True 1.7542054380483698 -0.00707941136199608
min True 1.7542054380483698 -0.00707941136199608
min True 1.7542054380483698 -0.00707941136199608
min True 1.7542054380483698 -0.00707941136199608
min True 1.7542054

min True 11.65779780314799 -0.007111924012824566
min True 11.95443697044151 -0.006457809593635844
min True 11.95443697044151 -0.0064578095936642654
min True 11.95443697044151 -0.0064578095936642654
min True 11.549831784726202 -0.00695136666363716
min True 11.549831784726202 -0.00695136666363716
min True 11.806273321368986 -0.006902294848807777
min True 12.165803386704301 -0.006884545344491677
min True 12.459712867618098 -0.006777646707109675
min True 12.459712867618105 -0.006777646707051943
min True 12.459712867618105 -0.006777646707051943
min True 13.006668015615183 -0.006799521517416807
min True 13.006668015615183 -0.006799521517416807
min True 12.603963213096012 -0.006331491623448926
min True 12.603963213096012 -0.006331491623448926
min True 13.480148042716895 -0.007234801647063804
min True 13.480148042716895 -0.007234801647023836
min True 13.480148042716895 -0.007234801647023836
min True 13.506064732477444 -0.00722017420725507
min True 13.50606473247743 -0.007220174206175045
min Tr

Episodes: 100%|██████████| 2/2 [01:57, reward=-3369.47, ts/ep=500, sec/ep=82.53, ms/ts=165.1, agent=3.2%]


In [9]:
from ase.io import write, read
minima = read(os.path.join(save_dir,'minima.traj'), index=':')
print(len(minima))

1
