In [None]:
%load_ext autoreload
%autoreload 2
# %env CUDA_DEVICE_ORDER=PCI_BUS_ID
# %env CUDA_VISIBLE_DEVICES=1
# %env CUDA_LAUNCH_BLOCKING=1

# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [2]:
import gym
from surface_seg.envs.mcs_env import MCSEnv
from surface_seg.utils.callback import Callback
import gym.wrappers
import numpy as np
import tensorforce 
import copy
import tensorflow as tf

In [3]:
timesteps = 400

In [4]:
def setup_env(recording=True):
    
    # Set up gym
    MCS_gym = MCSEnv(observation_fingerprints=False, 
                     observation_forces=False,
                    permute_seed=42)
    
    if recording:
    # Wrap the gym to provide video rendering every 50 steps
        MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         "./vid_trpo/fps", 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id+1)%50==0) #every 50, starting at 51
    
    #Convert gym to tensorforce environment
    env = tensorforce.environments.OpenAIGym(MCS_gym,
                                         max_episode_timesteps=timesteps,
                                         visualize=False)
    
    return env

# Set up the gym and agent in tensorforce

In [5]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent='trpo', 
    environment=setup_env(), 
    batch_size=1, 
    learning_rate=1e-3,
    memory = 40000,
    max_episode_timesteps = timesteps,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.1, decay_steps=80000, decay_rate=0.5
    ),
    recorder = dict(
        directory = './recorder/fps', frequency=1), #required for recording states and actions
    summarizer = dict(
        directory = 'tb/fps', labels='all', frequency=1, #Tensorboard summarizer
    )
)
    

agent_spec = agent.spec

Instructions for updating:
If using Keras pass *_constraint arguments to layers.




Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [6]:
from tensorforce.execution import Runner
from surface_seg.utils.callback import Callback

#plot_frequency --> plotting energy and trajectories frequency
callback = Callback('./result_trpo/fps', plot_frequency=50).episode_finish

runner = Runner(
    agent=agent,
    environment=setup_env(recording=True),
    max_episode_timesteps=timesteps,
)

%prun runner.run(num_episodes=2, callback=callback, callback_episode_frequency=1)

#callback_episode_frequency --> saving results and trajs frequency
# runner.run(num_episodes=2000, callback=callback, callback_episode_frequency=1)
# runner.run(num_episodes=100, evaluation=True)
# runner.close()

Episodes: 100%|██████████| 2/2 [00:24, reward=-2331.67, ts/ep=400, sec/ep=8.65, ms/ts=21.6, agent=23.1%] 

 

         17655734 function calls (16658425 primitive calls) in 24.981 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1601   10.558    0.007   10.558    0.007 {built-in method tensorflow.python._pywrap_tf_session.TF_SessionRun_wrapper}
   540757    0.970    0.000    0.970    0.000 {method 'reduce' of 'numpy.ufunc' objects}
   153814    0.934    0.000    2.289    0.000 numeric.py:2244(within_tol)
     8472    0.874    0.000    0.880    0.000 {method 'get_forces' of '_asap.Potential' objects}
1324152/551354    0.784    0.000    6.854    0.000 {built-in method numpy.core._multiarray_umath.implement_array_function}
   725207    0.697    0.000    0.734    0.000 {built-in method numpy.array}
     8472    0.667    0.000    0.672    0.000 {method 'get_potential_energy' of '_asap.Potential' objects}
   153814    0.607    0.000    4.848    0.000 numeric.py:2167(isclose)
   478903    0.462    0.000    1.414    0.000 fromnumeric.p

# Run the DRL method in parallel (multiple environments)

In [7]:
from tensorforce.execution import Runner

num_parallel = 32
runner = Runner(
    agent=agent_spec,
    environments=[setup_env() for _ in range(num_parallel)],
    num_parallel=num_parallel,
    remote='multiprocessing',
    max_episode_timesteps=400,
)

runner.run(num_episodes=100000)
# runner.run(num_episodes=100, evaluation=True)
runner.close()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.



Episodes:   0%|          | 0/100000 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%, comm=0.0%][A
Episodes:   0%|          | 1/100000 [00:31, reward=-2105.69, ts/ep=400, sec/ep=31.27, ms/ts=78.2, agent=25.8%, comm=58.8%][A
Episodes:   0%|          | 3/100000 [00:31, reward=-2176.92, ts/ep=400, sec/ep=31.44, ms/ts=78.6, agent=2.4%, comm=83.2%] [A
Episodes:   0%|          | 5/100000 [00:31, reward=-2120.93, ts/ep=400, sec/ep=31.60, ms/ts=79.0, agent=2.3%, comm=83.1%][A
Episodes:   0%|          | 7/100000 [00:31, reward=-1991.64, ts/ep=400, sec/ep=31.75, ms/ts=79.4, agent=2.3%, comm=82.4%][A
Episodes:   0%|          | 9/100000 [00:31, reward=-2220.68, ts/ep=400, sec/ep=31.92, ms/ts=79.8, agent=2.3%, comm=82.6%][A
Episodes:   0%|          | 11/100000 [00:32, reward=-2322.89, ts/ep=400, sec/ep=32.07, ms/ts=80.2, agent=2.3%, comm=82.4%][A
Episodes:   0%|          | 13/100000 [00:32, reward=-2243.12, ts/ep=400, sec/ep=32.22, ms/ts=80.6, agent=2.3%, comm=82.4%][A
Epis

KeyboardInterrupt: 

In [None]:
%debug