In [1]:
%load_ext autoreload
%autoreload 2
# %env CUDA_DEVICE_ORDER=PCI_BUS_ID
# %env CUDA_VISIBLE_DEVICES=1
# %env CUDA_LAUNCH_BLOCKING=1
%env MKL_NUM_THREADS=1
%env OMP_NUM_THREADS=1
%env NUMEXPR_NUM_THREADS=1
%env MKL_DEBUG_CPU_TYPE=5

env: MKL_NUM_THREADS=1
env: OMP_NUM_THREADS=1
env: NUMEXPR_NUM_THREADS=1
env: MKL_DEBUG_CPU_TYPE=5


# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [2]:
import gym
# from gym_recording.wrappers import TraceRecordingWrapper
from surface_seg.envs.mcs_env_new_save import MCSEnv
# from surface_seg.utils.callback import Callback
from surface_seg.utils.callback_multiple_envs import Callback
from tensorforce.execution import Runner
import gym.wrappers
import numpy as np
import tensorforce
import copy
import tensorflow as tf

In [3]:
timesteps=500

In [4]:
def setup_env(recording=True):
    
    # Set up gym
    MCS_gym = MCSEnv(observation_fingerprints=False, 
                     observation_forces=False,
                     permute_seed=42,
                     save_dir = './result_trpo/pos4/results',
                     timesteps = timesteps,
                     save_every = 1,
                     plot_every = ,
                    )
    
    if recording:
    # Wrap the gym to provide video rendering every 50 steps
        MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         "./vid_trpo/pos4", 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%30==0) #every 50, starting at 51
    
    #Convert gym to tensorforce environment
    env = tensorforce.environments.OpenAIGym(MCS_gym,
                                         max_episode_timesteps=timesteps,
                                         visualize=False)
    
    return env
    
env = setup_env().environment.env
print('initial energy', env.initial_energy)
print('thermal energy', env.thermal_energy)
n = 3
print('%dKT' %n, n * env.thermal_energy)

initial energy 4.100605630583811
thermal energy 0.827232
3KT 2.481696




# Set up the gym and agent in tensorforce

In [5]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent='trpo', 
    environment=setup_env(), 
    batch_size=1, 
    learning_rate=1e-3,
    memory = 40000,
    max_episode_timesteps = timesteps,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.3, decay_steps=3000000, decay_rate=0.5
    ),
    recorder = dict(
        directory = './recorder/pos4', frequency=1), #required for recording states and actions
    summarizer = dict(
        directory = 'tb/pos4', labels='all', frequency=1, #Tensorboard summarizer
    )
)
    

agent_spec = agent.spec

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Run the DRL method in parallel (multiple environments)

In [6]:
callback = Callback('./result_trpo/pos4', plot_frequency=20).episode_finish

num_parallel = 32
runner = Runner(
    agent=agent_spec,
    environments=[setup_env(recording=True) for _ in range(num_parallel)],
    num_parallel=num_parallel,
    remote='multiprocessing',
    max_episode_timesteps=timesteps,
)

runner.run(num_episodes=10000, callback=callback, callback_episode_frequency=1)
# runner.run(num_episodes=100, evaluation=True)
runner.close()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Episodes:   9%|▉         | 893/10000 [20:34, reward=86.12, ts/ep=500, sec/ep=40.54, ms/ts=81.1, agent=2.2%, comm=36.8%]  Exception in thread Thread-453396:
Traceback (most recent call last):
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/threading.py", line 926, in _bootstrap_inner
    self.run()
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/threading.py", line 870, in run
    self._target(*self._args, **self._kwargs)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 570, in finish_execute
    self.observation = self.execute(actions=actions)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 540, in execute
    states, terminal, reward, seconds = self.receive(function='execute')
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 501, in receive
    raise Te

KeyboardInterrupt: 

# Run the DRL method using single environment

In [6]:
#plot_frequency --> plotting energy and trajectories frequency
callback = Callback('./result_trpo/test', plot_frequency=20).episode_finish

runner2 = Runner(
    agent=agent,
    environment=setup_env(recording=True),
    max_episode_timesteps=timesteps,
)

# %prun runner.run(num_episodes=2, callback=callback, callback_episode_frequency=1)

# callback_episode_frequency --> saving results and trajs frequency
runner2.run(num_episodes=2000, callback=callback, callback_episode_frequency=1)
# runner2.run(num_episodes=100, evaluation=True)
runner2.close()

Episodes:   0%|          | 0/2000 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

0


Episodes:   0%|          | 2/2000 [01:28, reward=-329.65, ts/ep=500, sec/ep=38.63, ms/ts=77.3, agent=7.2%] 

1


Episodes:   0%|          | 3/2000 [02:08, reward=-272.34, ts/ep=500, sec/ep=39.40, ms/ts=78.8, agent=7.2%]

2


Episodes:   0%|          | 4/2000 [02:46, reward=-363.86, ts/ep=500, sec/ep=37.49, ms/ts=75.0, agent=7.4%]

3


Episodes:   0%|          | 5/2000 [03:24, reward=-279.18, ts/ep=500, sec/ep=38.41, ms/ts=76.8, agent=7.0%]

4


Episodes:   0%|          | 6/2000 [03:59, reward=-327.86, ts/ep=500, sec/ep=34.76, ms/ts=69.5, agent=7.8%]

5
6


Episodes:   0%|          | 8/2000 [05:21, reward=-373.63, ts/ep=500, sec/ep=43.11, ms/ts=86.2, agent=6.3%]

7


Episodes:   0%|          | 9/2000 [05:56, reward=-333.93, ts/ep=500, sec/ep=35.16, ms/ts=70.3, agent=7.8%]

8


Episodes:   0%|          | 10/2000 [06:34, reward=-317.46, ts/ep=500, sec/ep=37.69, ms/ts=75.4, agent=7.3%]

9


Episodes:   1%|          | 11/2000 [07:12, reward=-433.67, ts/ep=500, sec/ep=37.04, ms/ts=74.1, agent=7.4%]

10


Episodes:   1%|          | 12/2000 [07:49, reward=-305.30, ts/ep=500, sec/ep=37.53, ms/ts=75.1, agent=7.3%]

11
12


Episodes:   1%|          | 14/2000 [09:11, reward=-356.09, ts/ep=500, sec/ep=41.01, ms/ts=82.0, agent=6.6%]

13


Episodes:   1%|          | 15/2000 [09:45, reward=-386.89, ts/ep=500, sec/ep=33.75, ms/ts=67.5, agent=8.2%]

14


Episodes:   1%|          | 16/2000 [10:28, reward=-347.66, ts/ep=500, sec/ep=42.29, ms/ts=84.6, agent=6.5%]

15


Episodes:   1%|          | 17/2000 [11:06, reward=-245.02, ts/ep=500, sec/ep=37.57, ms/ts=75.1, agent=7.4%]

16


Episodes:   1%|          | 18/2000 [11:42, reward=-281.50, ts/ep=500, sec/ep=36.34, ms/ts=72.7, agent=7.7%]

17
18


Episodes:   1%|          | 19/2000 [12:21, reward=-416.66, ts/ep=500, sec/ep=38.38, ms/ts=76.8, agent=7.2%]ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-6-c0ae9133dc6f>", line 13, in <module>
    runner2.run(num_episodes=2000, callback=callback, callback_episode_frequency=1)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/execution/runner.py", line 516, in run
    observation = self.environments[n].receive_execute()
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 330, in receive_execute
    states, terminal, reward = self.execute(actions=self._actions)
  File "/home/junwoony/miniconda3/envs/arpae/lib/python3.7/site-packages/tensorforce/environments/environment.py", line 377, in execute
    states, terminal, reward = self.environment.execute(actions=actions)
  File "/home/junwoony/miniconda3

KeyboardInterrupt: 