In [11]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [8]:
import gym
from surface_seg.envs.mcs_env import MCSEnv
import gym.wrappers
import numpy as np

In [9]:
# Define symmetry functions
Gs = {}
Gs["G2_etas"] = np.logspace(np.log10(0.05), np.log10(5.0), num=4)
Gs["G2_rs_s"] = [0] * 4
Gs["G4_etas"] = [0.005]
Gs["G4_zetas"] = [1.0, 4.0]
Gs["G4_gammas"] = [+1.0, -1]
Gs["cutoff"] = 5.876798323827276  # EMT asap_cutoff: False

In [30]:
MCS_gym = MCSEnv(fingerprints=True, Gs=Gs)
recording_MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         "./vid", 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%50==0)

print('Starting energy = %1.2f eV'%MCS_gym.atoms.get_potential_energy())

Starting energy = 2.49 eV


# Set up the gym and agent in tensorforce

In [31]:
import tensorforce 
from tensorforce.agents import Agent

recording_env = tensorforce.environments.OpenAIGym(recording_MCS_gym,
                                         max_episode_timesteps=400,
                                         visualize=False)
agent = Agent.create(
    agent='trpo', 
    environment=recording_env, 
    batch_size=10, 
    learning_rate=1e-2,
    memory = 40000,
    max_episode_timesteps = 400,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.3, decay_steps=1000, decay_rate=0.5
    ))

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [32]:
agent.spec

OrderedDict([('agent', 'trpo'),
             ('states',
              {'energy': {'type': 'float', 'shape': (1,)},
               'fingerprints': {'type': 'float', 'shape': (8, 36)},
               'forces': {'type': 'float', 'shape': (8, 3)}}),
             ('actions',
              {'action_type': {'type': 'int', 'shape': (), 'num_values': 3},
               'atom_selection': {'type': 'int', 'shape': (), 'num_values': 8},
               'movement': {'type': 'float',
                'shape': (1, 3),
                'min_value': -0.4000000059604645,
                'max_value': 0.4000000059604645}}),
             ('max_episode_timesteps', 400),
             ('network', 'auto'),
             ('batch_size', 10),
             ('update_frequency', None),
             ('learning_rate', 0.01),
             ('likelihood_ratio_clipping', 0.2),
             ('discount', 0.99),
             ('estimate_terminal', False),
             ('critic_network', None),
             ('critic_optimizer', Non

# Check action and state spaces are correctly converted from OpenAIGym to Tensorforce env

In [35]:
recording_env.reset()
action = MCS_gym.action_space.sample() # Random sampling
recording_env.execute(action) # Prints (dict[state], Termination[Bool], observed reward[float]

({'fingerprints': array([[2.05705191e+00, 3.44220790e+00, 3.98815613e+00, 1.93448449e+00,
          3.23889858e+00, 3.75740814e+00, 1.47768565e+00, 2.46895895e+00,
          2.88080418e+00, 5.18275581e-01, 8.30069802e-01, 9.87232035e-01,
          3.07024645e-01, 2.25621902e+00, 2.70623699e+00, 1.42950002e+00,
          4.51256679e+00, 1.88539539e+00, 1.21243102e-01, 8.10626237e-01,
          1.06943474e+00, 4.68843071e-01, 1.67637138e+00, 6.58759592e-01,
          1.45507917e-01, 1.16777703e+00, 1.28295061e+00, 7.33530497e-01,
          2.26732235e+00, 9.58502482e-01, 1.68639584e-02, 9.98432646e-02,
          1.12774790e-01, 4.44534459e-02, 1.88934984e-01, 8.23127745e-02],
         [3.22101974e+00, 2.23756024e+00, 4.01492022e+00, 3.04396388e+00,
          2.10877329e+00, 3.76767722e+00, 2.37012322e+00, 1.62816524e+00,
          2.83345411e+00, 8.65362714e-01, 6.09082071e-01, 8.94567435e-01,
          1.20780520e+00, 2.27652497e+00, 4.34088181e+00, 4.57653343e-01,
          2.97202999e

# Run the DRL method for a while

In [None]:
from tensorforce.execution import Runner

runner = Runner(
    agent=agent,
    environment=recording_env,
    max_episode_timesteps=400,
)

runner.run(num_episodes=1000)


In [None]:
%debug

In [None]:
# from tensorforce.execution import Runner

# runner = Runner(
#     agent=agent,
#     environment=env,
#     max_episode_timesteps=200,
# )

# runner.run(num_episodes=100)

# runner = Runner(
#     agent=agent,
#     environment=recording_env,
#     max_episode_timesteps=200,
# )

# runner.run(num_episodes=1000)

# # runner.run(num_episodes=10, evaluation=True)

# runner.close()

# Example of how to render the environment in a jupyter cell

In [None]:
env = MCS_gym
from matplotlib import pyplot as plt
%matplotlib inline

from IPython import display

img = plt.imshow(env.render(mode='rgb_array')) # only call this once
plt.axis('off')
for _ in range(100):
    img.set_data(env.render(mode='rgb_array')) # just update the data
    display.display(plt.gcf())
    display.clear_output(wait=True)
    action = env.action_space.sample()
    env.step(action)