In [2]:
import os
import sys
sys.path.insert(0,'/home/junwoony/Desktop/ARPAE/')

import argparse
import importlib
import json
import logging
import time
import numpy as np

%env CUDA_DEVICE_ORDER=PCI_BUS_ID
%env CUDA_VISIBLE_DEVICES=0
%env CUDA_LAUNCH_BLOCKING=1

import ase.io

from env.surface_env import *
# from utils.terminate import 

from matplotlib import pyplot as plt
from tensorforce import TensorForceError
from tensorforce.agents import DQNAgent
from tensorforce.execution import Runner
import tensorflow as tf


env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=0
env: CUDA_LAUNCH_BLOCKING=1


In [3]:
horizon=200
episodes=10000
env = SurfaceEnv(horizon)
print('Initial energy:', env.get_energy())
seed = 42
np.random.seed(seed)
tf.random.set_random_seed(seed)

Initial energy: 2.17972257998324


In [5]:
network_spec = [
    {
        "type": "dense",
        "size": 64,
        "activation": "relu"
    },
    {
        "type": "dense",
        "size": 32,
        "activation": "relu"
    }
]

agent = DQNAgent(
    states=env.states,
    actions=env.actions,
    network=network_spec,
    batched_observe=True, 
    batching_capacity=8000,
    execution=dict(
        type='single',
        session_config=None,
        distributed_spec=None
    ), 

    states_preprocessing=None,
    reward_preprocessing=None,

    update_mode=dict(
        unit='timesteps',
        batch_size=10,
        frequency=10
    ),
    memory=dict(
        type='replay',
        include_next_states=True,
        capacity=40000
    ),

    optimizer=dict(
        type='clipped_step',
        clipping_value=0.1,
        optimizer=dict(
            type='adam',
            learning_rate=1e-3
        )
    ),
    actions_exploration=dict(
        type='epsilon_anneal',
        initial_epsilon=0.5,
        final_epsilon=0.05,
        timesteps=1000000
    ),
    discount=1,
    distributions=None,
    entropy_regularization=0.01,
    target_sync_frequency=1000,
    target_update_weight=1.0,
    double_q_model=False,
    huber_loss=None,

    summarizer=dict(
        directory=None,
        labels=['graph', 'total-loss']
    ),
)

runner = Runner(
    agent=agent,
    environment=env,
    repeat_actions=1
)

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [6]:
def plot_energy(energy, xlabel, ylabel, save_path):
    plt.figure()
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(xlabel+ ' vs. ' + ylabel)
    plt.plot(energy)
    plt.savefig(save_path)
    print('figure saved as {}'.format(save_path))
    return 

def episode_finished(r):
    # if r.episode % 50 == 0:
    #     positions = env.get_positions()
    #     pos_fn = '_'.join(['pos_3', str(r.episode)])
    #     pos_dir = os.path.join('new_pos', pos_fn)
    #     np.save(pos_dir, positions)
    # if r.episode % 50 == 0:
    #     agent_fn = '_'.join(['agent_3', str(r.episode)])
    #     agent_path = os.path.join('new_agents', agent_fn)
    #     r.agent.save_model(agent_path)
    #     print("Saving agent to {}".format(agent_dir))
    # if r.episode % 50 == 0:
    #     rew_fn = '.'.join(['_'.join(['reward_3', str(r.episode)]), 'png'])
    #     rew_dir = os.path.join('new_plots', rew_fn)
    #     plot_energy(r.episode_rewards, 'accumulated reward', rew_dir)
    #     energy_fn = '.'.join(['_'.join(['final', 'energy_3', str(r.episode)]), 'png'])
    #     energy_dir = os.path.join('new_plots', energy_fn)
    #     plot_energy(env.final_energy, 'final energy', energy_dir)

    print("Finished episode {ep} after {ts} timesteps (reward: {reward})".
        format(ep=r.episode, ts=r.episode_timestep,reward=r.episode_rewards[-1]))

    if r.episode % 1 == 0:
        traj_dir = os.path.join('traj_files', 'seed_'+str(seed), str(r.episode))
        if not os.path.exists(traj_dir):
            os.makedirs(traj_dir)
        env.save_traj(traj_dir)

        E_dir = os.path.join('E_figs', 'seed_'+str(seed))
        if not os.path.exists(E_dir):
            os.makedirs(E_dir)
        E_fn = 'E_' + str(r.episode) + '_%f' %env.ts_energy[-1] + '.png'            
        E_fn = os.path.join(E_dir, E_fn)
        plot_energy(env.energies, 'actions', 'energy', E_fn)
#         fig_dir = os.path.join('atom_figs', 'seed_'+str(seed), str(r.episode))
#         if not os.path.exists(fig_dir):
#             os.makedirs(fig_dir)
#         env.save_fig(fig_dir)

    if r.episode % 50 == 0:
        model_dir = os.path.join('models', 'seed_'+str(seed))
        if not os.path.exists(model_dir):
            os.makedirs(model_dir)
        model_fn = os.path.join(model_dir, str(r.episode))
        r.agent.save_model(model_fn)
        print("Model saved to {}".format(model_fn))

        rew_dir = os.path.join('rew_figs', 'seed_'+str(seed))
        if not os.path.exists(rew_dir):
            os.makedirs(rew_dir)
        rew_fn = 'rew_' + str(r.episode) + '.png'
        rew_fn = os.path.join(rew_dir, rew_fn)
        plot_energy(r.episode_rewards, 'training episode', 'accumulated reward', rew_fn)
        energy_fn = 'final_energy_' + str(r.episode) + '.png'
        energy_fn = os.path.join(rew_dir, energy_fn)
        plot_energy(env.final_energy, 'training episode', 'final energy', energy_fn)
        energy_fn = 'ts_energy_' + str(r.episode) + '.png'
        energy_fn = os.path.join(rew_dir, energy_fn)
        plot_energy(env.ts_energy, 'training episode', 'ts energy', energy_fn)

    return True

In [7]:
runner.run(
        num_episodes=episodes,
        max_episode_timesteps=horizon,
        deterministic=False,
        episode_finished=episode_finished
    )
runner.close()

     Step     Time          Energy         fmax         cmax       rtrust          rho
Sella   0 21:37:52       83.726758     186.5631       0.0000       0.0312       1.0000
[465.06684816] [995.45486511] [0.46719029] 0
[118.32988397] [248.51172331] [0.47615413] 0
[103.01815089] [-278.11516025] [-0.37041545] 0
Sella   1 21:37:52       78.613428     169.6588       0.0000       0.0359       1.0006
Sella   2 21:37:52       73.134475     151.9610       0.0000       0.0413       1.0013
Sella   3 21:37:52       67.586318     134.5981       0.0000       0.0475       1.0013
Sella   4 21:37:52       61.996642     117.9361       0.0000       0.0546       1.0008
Sella   5 21:37:52       56.379379     102.2686       0.0000       0.0628       0.9999
Sella   6 21:37:52       50.744389      87.7521       0.0000       0.0722       0.9985
Sella   7 21:37:52       45.103967      74.3643       0.0000       0.0830       0.9966
Sella   8 21:37:53       39.480398      61.9361       0.0000       0.0954       

Sella  60 21:37:57        3.954688       0.1517       0.0000       0.1855       1.3638
Sella  61 21:37:57        3.942788       0.2119       0.0000       0.1855       0.9429
Sella  62 21:37:57        3.936964       0.2724       0.0000       0.1855       1.4451
Sella  63 21:37:57        3.926452       0.1887       0.0000       0.1855       1.4164
Sella  64 21:37:57        3.907364       0.1509       0.0000       0.1855       0.9140
Sella  65 21:37:57        3.895143       0.2039       0.0000       0.1855       1.0838
Sella  66 21:37:57        3.874162       0.2629       0.0000       0.1855       1.0672
Sella  67 21:37:57        3.846410       0.4467       0.0000       0.1855       1.2370
Sella  68 21:37:57        3.880249       1.0554       0.0000       0.1205      -4.0820
Sella  69 21:37:57        3.841780       0.5309       0.0000       0.1386       0.9963
Sella  70 21:37:58        3.831359       0.3227       0.0000       0.1386       1.3140
Sella  71 21:37:58        3.814323       0.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/junwoony/miniconda3/envs/schnet2/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3326, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-7-f08f6c73ef60>", line 5, in <module>
    episode_finished=episode_finished
  File "/home/junwoony/miniconda3/envs/schnet2/lib/python3.7/site-packages/tensorforce/execution/runner.py", line 104, in run
    state, terminal, step_reward = self.environment.execute(action=action)
  File "/home/junwoony/Desktop/ARPAE/env/surface_env.py", line 258, in execute
    after_energy = self.do_action(action)
  File "/home/junwoony/Desktop/ARPAE/env/surface_env.py", line 199, in do_action
    dyn.run(1e-2, steps = 100)
  File "/home/junwoony/miniconda3/envs/schnet2/lib/python3.7/site-packages/ase/optimize/optimize.py", line 272, in run
    return Dynamics.run(self)
  File "/home/junwoony/miniconda3/envs/schnet2/lib/python3.7/site-packages/ase/optimize/optimize

KeyboardInterrupt: 