# This is a notebook to evaluate trained  TRPO agent in CatGym environment

In [1]:
%load_ext autoreload
%autoreload 2
%env MKL_NUM_THREADS=1
%env OMP_NUM_THREADS=1
%env NUMEXPR_NUM_THREADS=1
%env MKL_DEBUG_CPU_TYPE=5

env: MKL_NUM_THREADS=1
env: OMP_NUM_THREADS=1
env: NUMEXPR_NUM_THREADS=1
env: MKL_DEBUG_CPU_TYPE=5


In [2]:
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
seed = 30
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

# Set up the environment

In [3]:
import gym
from surface_seg.envs.catgym_env import MCSEnv
from surface_seg.utils.callback_new import Callback
from tensorforce.execution import Runner
import gym.wrappers
import numpy as np
import tensorforce
import copy
import os

In [9]:
timesteps = 500
thermal_threshold = 3

In [5]:
def setup_env(recording=True, structure=None, structure_idx=None):
    
    # Set up gym
    MCS_gym = MCSEnv(observation_fingerprints=True, 
                     observation_forces=True,
                     permute_seed=42, 
                     save_dir = save_dir,
                     timesteps = timesteps,
                     thermal_threshold = thermal_threshold,
                     save_every_min = 1,
                     save_every = 50,
                     step_size = 0.1,                    
                    )
    
    if recording:
    # Wrap the gym to provide video rendering every 50 steps
        MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         os.path.join(save_dir, 'vid'), 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%50==0) #every 50, starting at 51
    
    #Convert gym to tensorforce environment
    env = tensorforce.environments.OpenAIGym(MCS_gym,
                                         max_episode_timesteps=timesteps,
                                         visualize=False)
    
    return env


"""
Create a environment for checking the intial energy and thermal energy
"""
env = setup_env().environment.env
print('initial energy', env.initial_energy)
print('thermal energy', env.thermal_energy)
n =thermal_threshold
print('%dKT' %n, n * env.thermal_energy)

initial energy 4.094021275192408
thermal energy 0.827232
3KT 2.481696




# Set up the gym and agent in tensorforce

In [6]:
from tensorforce.agents import Agent
# Substitute your own path to saved trained agent
saved_agent_dir = './result_multi_env/test_catgym/checkpoints'
agent = Agent.load(saved_agent_dir)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Create CheckpointSaverHook.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./result_multi_env/experiments/test_catgym/checkpoints/agent-0
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 0...
INFO:tensorflow:Saving checkpoints for 0 into ./result_multi_env/experiments/test_catgym/checkpoints/agent.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 0

In [7]:
# Check agent specifications
print(agent.spec)

OrderedDict([('agent', 'trpo'), ('states', {'TS': {'type': 'float', 'shape': [1]}, 'energy': {'type': 'float', 'shape': [1]}, 'fingerprints': {'type': 'float', 'shape': [192]}, 'forces': {'type': 'float', 'shape': [24]}, 'positions': {'type': 'float', 'shape': [24]}}), ('actions', {'action_type': {'type': 'int', 'shape': [], 'num_values': 4}, 'atom_selection': {'type': 'int', 'shape': [], 'num_values': 8}, 'movement': {'type': 'float', 'shape': [1, 3], 'min_value': -0.10000000149011612, 'max_value': 0.10000000149011612}}), ('max_episode_timesteps', 500), ('batch_size', 1), ('network', 'auto'), ('use_beta_distribution', True), ('memory', 50000), ('update_frequency', None), ('learning_rate', 0.001), ('discount', 0.99), ('estimate_terminal', False), ('critic_network', None), ('critic_optimizer', None), ('preprocessing', None), ('exploration', {'type': 'decaying', 'unit': 'timesteps', 'decay': 'exponential', 'initial_value': 0.2, 'decay_steps': 50000, 'decay_rate': 0.5}), ('variable_noise'

# Run the DRL method in parallel (multiple environments)

In [8]:
num_episode = 300
# Substitute your own path to save file during evaluation
save_dir = './result_multi_env/test_catgym_eval'

callback = Callback(num_episode, save_dir).episode_finish

runner = Runner( 
    agent=agent,
    environment=setup_env(recording=False),
    max_episode_timesteps=timesteps,
)

runner.run(num_episodes=num_episode , evaluation=True)
runner.close()

Episodes: 100%|██████████| 20/20 [07:54, reward=0.00, ts/ep=500, sec/ep=19.80, ms/ts=39.6, agent=3.6%]
