In [1]:
import argparse
import importlib
import json
import logging
import os
import time
import sys
import numpy as np

from matplotlib import pyplot as plt
from tensorforce import TensorForceError
from tensorforce.agents import DQNAgent
from tensorforce.execution import Runner
from surface_env import *
import ase.io


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
W0813 20:09:04.237091 139962960201536 deprecation_wrapper.py:119] From /home/zulissi/miniconda3/envs/tensorforce/lib/python3.7/site-packages/tensorforce/core/optimizers/tf_optimizer.py:31: The name tf.train.AdadeltaOptimizer is deprecated. Please use tf.compat.v1.train.AdadeltaOptimizer instead.

W0813 20:09:04.237678 139962960201536 deprecation_wrapper.py:119] From /home/zulissi

In [2]:
def plot_energy(energy, ylabel, save_path):
    plt.figure()
    plt.xlabel('training episode')
    plt.ylabel(ylabel)
    plt.title('episode vs. ' + ylabel)
    plt.plot(energy)
    plt.savefig(save_path)
    print('figure saved as {}'.format(save_path))
    return 

In [3]:
episodes=100000
horizon=100
deterministic=False

In [None]:

env = SurfaceEnv(horizon)
print('Initial energy:', env.get_energy())

# lattice = Surface()
# lattice.reset()
# print(lattice.current_positions())
# print(lattice.current_positions().shape)
# print(type(lattice.current_positions()))

network_spec = [
    {
        "type": "dense",
        "size": 64,
        "activation": "relu"
    },
    {
        "type": "dense",
        "size": 32,
        "activation": "relu"
    }
    # {
    #     "type": "dense",
    #     "size": 6+32,
    #     "activation": "softmax"
    # }
]

# print(env.states)
# print(env.states['shape'])

agent = DQNAgent(
    states=env.states,
    actions=env.actions,
    network=network_spec,
    batched_observe=True, 
    batching_capacity=8000,
    execution=dict(
        type='single',
        session_config=None,
        distributed_spec=None
    ), 

    states_preprocessing=None,
    reward_preprocessing=None,

    update_mode=dict(
        unit='timesteps',
        batch_size=10,
        frequency=10
    ),
    memory=dict(
        type='replay',
        include_next_states=True,
        capacity=40000
    ),

    optimizer=dict(
        type='clipped_step',
        clipping_value=0.1,
        optimizer=dict(
            type='adam',
            learning_rate=1e-3
        )
    ),
    actions_exploration=dict(
        type='epsilon_anneal',
        initial_epsilon=0.5,
        final_epsilon=0.05,
        timesteps=1000000
    ),
    discount=1,
    distributions=None,
    entropy_regularization=0.01,
    target_sync_frequency=1000,
    target_update_weight=1.0,
    double_q_model=False,
    huber_loss=None,

    summarizer=dict(
        directory=None,
        labels=['graph', 'total-loss']
    ),
)

runner = Runner(
    agent=agent,
    environment=env,
    repeat_actions=1
)

cur_min = -1e6

def episode_finished(r):
    
    global cur_min
    # positions = env.get_positions()
    # print("pos diff:", np.sum(positions[1,:] - positions[0,:]))
    # np.save('shit', positions)
    
    if r.episode % 100 == 0 or r.episode_rewards[-1]>cur_min:
        final_energy = env.get_energy()
        positions = env.get_positions()
        
        image_list = []
        with ase.io.trajectory.TrajectoryWriter('gifs/%d_%1.2f_%1.2f.traj'%(r.episode,final_energy, r.episode_rewards[-1])) as traj:
            for pos in positions:
                env._lattice.set_free_atoms(pos)
                copy_atoms = env._lattice.atoms.copy()
                image_list.append(copy_atoms.repeat((2,2,1)))
                traj.write(copy_atoms, energy=env.get_energy())

#         ase.io.write('gifs/%d_%1.2f.gif'%(r.episode,final_energy) , image_list, interval=100)
        
        if final_energy > cur_min:
            cur_min = r.episode_rewards[-1]
    if r.episode % 50 == 0:
        positions = env.get_positions()
        pos_fn = '_'.join(['pos_3', str(r.episode)])
        pos_dir = os.path.join('new_pos', pos_fn)
        np.save(pos_dir, positions)
    if r.episode % 50 == 0:
        agent_fn = '_'.join(['agent_3', str(r.episode)])
        agent_dir = os.path.join('new_agents', agent_fn)
        r.agent.save_model(agent_dir)
        print("Saving agent to {}".format(agent_dir))
    if r.episode % 50 == 0:
        rew_fn = '.'.join(['_'.join(['reward_3', str(r.episode)]), 'png'])
        rew_dir = os.path.join('new_plots', rew_fn)
        plot_energy(r.episode_rewards, 'accumulated reward', rew_dir)
        energy_fn = '.'.join(['_'.join(['final', 'energy_3', str(r.episode)]), 'png'])
        energy_dir = os.path.join('new_plots', energy_fn)
        plot_energy(env.final_energy, 'final energy', energy_dir)
    print("Finished episode {ep} after {ts} timesteps (reward: {reward})".
        format(ep=r.episode, ts=r.episode_timestep,reward=r.episode_rewards[-1]))
    return True

runner.run(
    # num_timesteps=args.timesteps,
    num_episodes=episodes,
    max_episode_timesteps=horizon,
    deterministic=deterministic,
    episode_finished=episode_finished
)
runner.close()

W0813 20:09:04.796082 139962960201536 deprecation_wrapper.py:119] From /home/zulissi/miniconda3/envs/tensorforce/lib/python3.7/site-packages/tensorforce/models/model.py:252: The name tf.variable_scope is deprecated. Please use tf.compat.v1.variable_scope instead.

W0813 20:09:04.796555 139962960201536 deprecation_wrapper.py:119] From /home/zulissi/miniconda3/envs/tensorforce/lib/python3.7/site-packages/tensorforce/models/model.py:457: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0813 20:09:04.797903 139962960201536 deprecation_wrapper.py:119] From /home/zulissi/miniconda3/envs/tensorforce/lib/python3.7/site-packages/tensorforce/core/explorations/exploration.py:42: The name tf.make_template is deprecated. Please use tf.compat.v1.make_template instead.

W0813 20:09:04.818248 139962960201536 deprecation_wrapper.py:119] From /home/zulissi/miniconda3/envs/tensorforce/lib/python3.7/site-packages/tensorforce/models/model.py:846: The name tf.get_variabl

Initial energy: 2.1797225799832667


  0%|          | 0/100000 [00:00<?, ?it/s]

Episode ends and final energy: 2.203453130743803


  0%|          | 1/100000 [00:04<130:43:19,  4.71s/it]

Finished episode 1 after 99 timesteps (reward: -1218.7341642328806)


  0%|          | 2/100000 [00:12<154:17:09,  5.55s/it]

Episode ends and final energy: 5.6811491844662
Finished episode 2 after 99 timesteps (reward: -4443.203246955412)


  0%|          | 3/100000 [00:16<140:46:35,  5.07s/it]

Episode ends and final energy: 2.1875079314645465
Finished episode 3 after 99 timesteps (reward: -82.04087422985182)


  0%|          | 4/100000 [00:23<160:59:54,  5.80s/it]

Episode ends and final energy: 3.2864978392534514
Finished episode 4 after 99 timesteps (reward: -1151.1087289120285)


  0%|          | 5/100000 [00:28<154:40:12,  5.57s/it]

Episode ends and final energy: 3.277389172563579
Finished episode 5 after 99 timesteps (reward: -3625.377881347485)


  0%|          | 6/100000 [00:33<144:08:21,  5.19s/it]

Episode ends and final energy: 12.943111183990183
Finished episode 6 after 99 timesteps (reward: -11622.287061279838)


  0%|          | 7/100000 [00:36<131:03:05,  4.72s/it]

Episode ends and final energy: 3.8125977761393246
Finished episode 7 after 99 timesteps (reward: -3431.2214739502715)


  0%|          | 8/100000 [00:40<122:03:50,  4.39s/it]

Episode ends and final energy: 3.493426395182972
Finished episode 8 after 99 timesteps (reward: -1381.1016530299858)


  0%|          | 9/100000 [00:44<122:43:33,  4.42s/it]

Episode ends and final energy: 3.3485182783211656
Finished episode 9 after 99 timesteps (reward: -2331.3357348431655)


  0%|          | 10/100000 [00:49<126:23:06,  4.55s/it]

Episode ends and final energy: 13.713877313056651
Finished episode 10 after 99 timesteps (reward: -12031.52542138605)


  0%|          | 11/100000 [00:54<130:27:52,  4.70s/it]

Episode ends and final energy: 3.2678328461600934
Finished episode 11 after 99 timesteps (reward: -1343.8391707526216)


  0%|          | 12/100000 [00:59<129:11:55,  4.65s/it]

Episode ends and final energy: 2.3228914572984003
Finished episode 12 after 99 timesteps (reward: -1440.3424458980617)


  0%|          | 13/100000 [01:04<131:33:36,  4.74s/it]

Episode ends and final energy: 2.1788282454484627
Finished episode 13 after 99 timesteps (reward: -48.45631671561708)


  0%|          | 14/100000 [01:11<151:02:44,  5.44s/it]

Episode ends and final energy: 2.179334259036505
Finished episode 14 after 99 timesteps (reward: -657.7728868360778)


  0%|          | 15/100000 [01:15<141:45:04,  5.10s/it]

Episode ends and final energy: 3.286494863028242
Finished episode 15 after 99 timesteps (reward: -2489.232419969305)


  0%|          | 16/100000 [01:19<134:36:30,  4.85s/it]

Episode ends and final energy: 3.188327376721487
Finished episode 16 after 99 timesteps (reward: -1092.6066142686075)


  0%|          | 17/100000 [01:24<135:30:28,  4.88s/it]

Episode ends and final energy: 2.178928564906739
Finished episode 17 after 99 timesteps (reward: -188.32214594555785)


  0%|          | 18/100000 [01:28<126:14:47,  4.55s/it]

Episode ends and final energy: 3.2848599403151066
Finished episode 18 after 99 timesteps (reward: -1155.1663520386605)


  0%|          | 19/100000 [01:32<118:25:20,  4.26s/it]

Episode ends and final energy: 5.738544457058266
Finished episode 19 after 99 timesteps (reward: -4748.076814792938)


  0%|          | 20/100000 [01:36<123:15:56,  4.44s/it]

Episode ends and final energy: 3.7080918315362137
Finished episode 20 after 99 timesteps (reward: -2084.081013066807)


  0%|          | 21/100000 [01:40<116:05:20,  4.18s/it]

Episode ends and final energy: 2.180148664429808
Finished episode 21 after 99 timesteps (reward: -1377.785500670012)


  0%|          | 22/100000 [01:44<118:05:28,  4.25s/it]

Episode ends and final energy: 3.7897952887762383
Finished episode 22 after 99 timesteps (reward: -1925.0191984815312)


  0%|          | 23/100000 [01:49<119:51:53,  4.32s/it]

Episode ends and final energy: 2.1983369524573906
Finished episode 23 after 99 timesteps (reward: -545.6723120853201)


  0%|          | 24/100000 [01:53<119:40:24,  4.31s/it]

Episode ends and final energy: 7.033445506642959
Finished episode 24 after 99 timesteps (reward: -5495.852851761852)


  0%|          | 25/100000 [01:58<122:30:20,  4.41s/it]

Episode ends and final energy: 2.2998468308594244
Finished episode 25 after 99 timesteps (reward: -1360.9672844150841)


  0%|          | 26/100000 [02:03<130:24:08,  4.70s/it]

Episode ends and final energy: 2.1779076284937915
Finished episode 26 after 99 timesteps (reward: -344.1297858174627)


  0%|          | 27/100000 [02:07<126:56:37,  4.57s/it]

Episode ends and final energy: 24.65352567065112
Finished episode 27 after 99 timesteps (reward: -24289.75695094935)


  0%|          | 28/100000 [02:11<118:09:21,  4.25s/it]

Episode ends and final energy: 2.1844567786233444
Finished episode 28 after 99 timesteps (reward: -1949.8013158375272)


  0%|          | 29/100000 [02:16<127:00:06,  4.57s/it]

Episode ends and final energy: 3.830743338730894
Finished episode 29 after 99 timesteps (reward: -3021.998532325596)


  0%|          | 30/100000 [02:21<128:57:57,  4.64s/it]

Episode ends and final energy: 2.294244128971629
Finished episode 30 after 99 timesteps (reward: -197.571074241127)


  0%|          | 31/100000 [02:26<127:16:29,  4.58s/it]

Episode ends and final energy: 2.179850612785142
Finished episode 31 after 99 timesteps (reward: -255.7460370371204)


  0%|          | 32/100000 [02:29<121:12:20,  4.36s/it]

Episode ends and final energy: 27.198226120152768
Finished episode 32 after 99 timesteps (reward: -25924.82605760337)


  0%|          | 33/100000 [02:34<127:06:01,  4.58s/it]

Episode ends and final energy: 3.1764694786187384
Finished episode 33 after 99 timesteps (reward: -2164.8222164504914)


  0%|          | 34/100000 [02:38<118:16:26,  4.26s/it]

Episode ends and final energy: 6.598390937522028
Finished episode 34 after 99 timesteps (reward: -4839.348605099076)


  0%|          | 35/100000 [02:43<124:30:38,  4.48s/it]

Episode ends and final energy: 2.1788612666917455
Finished episode 35 after 99 timesteps (reward: -952.7894360523804)


  0%|          | 36/100000 [02:48<131:22:41,  4.73s/it]

Episode ends and final energy: 2.2989634341093588
Finished episode 36 after 99 timesteps (reward: -845.0714680664379)


  0%|          | 37/100000 [02:53<128:04:46,  4.61s/it]

Episode ends and final energy: 6.166770354158556
Finished episode 37 after 99 timesteps (reward: -5400.104495453827)


  0%|          | 38/100000 [02:56<118:36:35,  4.27s/it]

Episode ends and final energy: 2.1845057420905665
Finished episode 38 after 99 timesteps (reward: -641.1916469988714)


  0%|          | 39/100000 [03:01<123:24:52,  4.44s/it]

Episode ends and final energy: 2.198581466845271
Finished episode 39 after 99 timesteps (reward: -858.4048844602773)


  0%|          | 40/100000 [03:06<128:19:23,  4.62s/it]

Episode ends and final energy: 2.197123121333812
Finished episode 40 after 99 timesteps (reward: -400.4160726082769)


  0%|          | 41/100000 [03:12<137:29:46,  4.95s/it]

Episode ends and final energy: 22.42091259171094
Finished episode 41 after 99 timesteps (reward: -20710.49396342371)


  0%|          | 42/100000 [03:16<129:42:08,  4.67s/it]

Episode ends and final energy: 14.592684542847987
Finished episode 42 after 99 timesteps (reward: -12547.153672831373)


  0%|          | 43/100000 [03:21<134:38:26,  4.85s/it]

Episode ends and final energy: 3.9310592562964874
Finished episode 43 after 99 timesteps (reward: -1950.532636394462)


  0%|          | 44/100000 [03:25<129:53:31,  4.68s/it]

Episode ends and final energy: 2.182536380702908
Finished episode 44 after 99 timesteps (reward: -176.90678013080816)


  0%|          | 45/100000 [03:30<129:01:27,  4.65s/it]

Episode ends and final energy: 5.734890622595401
Finished episode 45 after 99 timesteps (reward: -5426.771972529091)
