In [1]:
%load_ext autoreload
%autoreload 2

# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [2]:
import gym
from surface_seg.envs.mcs_env import MCSEnv
from surface_seg.utils.callback import Callback
import gym.wrappers
import numpy as np
import tensorforce 
import copy

In [3]:
def setup_env(recording=False):
    
    # Set up gym
    MCS_gym = MCSEnv(observation_fingerprints=False, 
                     observation_forces=False,
                    permute_seed=42)
    
    if recording:
    # Wrap the gym to provide video rendering every 50 steps
        MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         "./vid", 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id)%50==0) #every 50, starting at 51
    
    #Convert gym to tensorforce environment
    env = tensorforce.environments.OpenAIGym(MCS_gym,
                                         max_episode_timesteps=400,
                                         visualize=False)
    
    return env

# Set up the gym and agent in tensorforce

In [4]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent='trpo', 
    environment=setup_env(), 
    batch_size=1, 
    learning_rate=1e-3,
    memory = 40000,
    max_episode_timesteps = 400,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=1.0, decay_steps=10000, decay_rate=0.5
    ),
    recorder = dict(
        directory = './recorder', frequency='every episode') #required for recording states and actions
    )
    

agent_spec = agent.spec

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Run the DRL method on a single instance

In [None]:
from tensorforce.execution import Runner
from surface_seg.utils.callback import Callback

callback = Callback('./result').episode_finish

runner = Runner(
    agent=agent_spec,
    environment=setup_env(recording=True),
    max_episode_timesteps=400,
)

runner.run(num_episodes=1000, callback=callback, callback_episode_frequency=1)
# runner.run(num_episodes=100, evaluation=True)
# runner.close()



INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Episodes:   0%|          | 0/1000 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

found a new local minima! distance=0.16 w energy -0.01
found a new local minima! distance=6.05 w energy 0.19
found a new local minima! distance=0.25 w energy 0.19
found a new local minima! distance=6.08 w energy -0.01
found a new local minima! distance=2.46 w energy 0.11
found a new local minima! distance=2.64 w energy -0.00
found a new local minima! distance=0.29 w energy -0.00
found a new local minima! distance=5.15 w energy 0.20
found a new local minima! distance=0.32 w energy 0.19
found a new local minima! distance=0.37 w energy 0.20
found a new local minima! distance=0.41 w energy 0.19
found a new local minima! distance=5.16 w energy -0.00
found a new local minima! distance=2.44 w energy 0.11
found a new local minima! distance=5.38 w energy -0.01
found a new local minima! distance=0.21 w energy -0.01
found a new local minima! distance=0.22 w energy -0.01
found a new local minima! distance=2.69 w energy 0.12
found a new local minima! distance=0.39 w energy 0.12
found a new local mi

Episodes:   0%|          | 1/1000 [03:39, reward=24140.61, ts/ep=400, sec/ep=219.73, ms/ts=549.3, agent=5.1%]

found a new local minima! distance=0.14 w energy -0.01
found a new local minima! distance=0.22 w energy -0.01
found a new local minima! distance=0.28 w energy -0.01
found a new local minima! distance=0.21 w energy -0.01
found a new local minima! distance=2.45 w energy 0.11
found a new local minima! distance=0.26 w energy 0.11
found a new local minima! distance=2.51 w energy -0.00
found a new local minima! distance=0.21 w energy 0.00
found a new local minima! distance=0.24 w energy 0.00
found a new local minima! distance=4.65 w energy 0.20
found a new local minima! distance=0.47 w energy 0.19
found a new local minima! distance=5.98 w energy -0.00
found a new local minima! distance=5.14 w energy -0.01
found a new local minima! distance=2.39 w energy 0.11
found a new local minima! distance=0.39 w energy 0.11
found a new local minima! distance=0.31 w energy -0.01
found a new local minima! distance=4.71 w energy 0.19
found a new local minima! distance=0.42 w energy 0.19
found a new local mi

Episodes:   0%|          | 2/1000 [04:25, reward=23320.37, ts/ep=400, sec/ep=45.04, ms/ts=112.6, agent=3.4%] 

found a new local minima! distance=0.18 w energy -0.01
found a new local minima! distance=6.36 w energy 0.19
found a new local minima! distance=5.81 w energy -0.01
found a new local minima! distance=5.30 w energy 0.20
found a new local minima! distance=0.20 w energy 0.20
found a new local minima! distance=5.34 w energy 0.00
found a new local minima! distance=6.49 w energy 0.21
found a new local minima! distance=6.03 w energy -0.01
found a new local minima! distance=5.13 w energy 0.19
found a new local minima! distance=0.46 w energy 0.19
found a new local minima! distance=4.84 w energy -0.01
found a new local minima! distance=0.21 w energy -0.01
found a new local minima! distance=2.84 w energy 0.25
found a new local minima! distance=0.39 w energy -0.00
found a new local minima! distance=2.45 w energy 0.11
found a new local minima! distance=2.48 w energy -0.00
found a new local minima! distance=0.28 w energy 0.00
found a new local minima! distance=0.22 w energy -0.00
found a new local mi

Episodes:   0%|          | 3/1000 [05:09, reward=25638.91, ts/ep=400, sec/ep=43.06, ms/ts=107.6, agent=3.3%]

found a new local minima! distance=0.21 w energy -0.01
found a new local minima! distance=4.67 w energy -0.03
found a new local minima! distance=5.39 w energy 0.18
found a new local minima! distance=0.20 w energy 0.18
found a new local minima! distance=5.26 w energy -0.03
found a new local minima! distance=0.21 w energy -0.03
found a new local minima! distance=0.36 w energy -0.02
found a new local minima! distance=0.30 w energy 0.17
found a new local minima! distance=0.25 w energy 0.17
found a new local minima! distance=2.51 w energy 0.11
found a new local minima! distance=0.32 w energy 0.11
found a new local minima! distance=2.18 w energy -0.01
found a new local minima! distance=0.27 w energy -0.01
found a new local minima! distance=5.60 w energy -0.00
found a new local minima! distance=5.53 w energy 0.21
found a new local minima! distance=0.23 w energy 0.21
found a new local minima! distance=3.19 w energy 0.29
found a new local minima! distance=3.20 w energy 0.01
found a new local mi

Episodes:   0%|          | 4/1000 [05:58, reward=26278.95, ts/ep=400, sec/ep=48.63, ms/ts=121.6, agent=3.1%]

found a new local minima! distance=0.17 w energy -0.01
found a new local minima! distance=5.35 w energy 0.18
found a new local minima! distance=0.29 w energy -0.01
found a new local minima! distance=3.11 w energy 0.27
found a new local minima! distance=3.21 w energy -0.01
found a new local minima! distance=5.06 w energy 0.20
found a new local minima! distance=5.45 w energy -0.01
found a new local minima! distance=6.18 w energy 0.20
found a new local minima! distance=0.23 w energy 0.20
found a new local minima! distance=0.31 w energy 0.20
found a new local minima! distance=5.63 w energy -0.00
found a new local minima! distance=0.20 w energy 0.00
found a new local minima! distance=4.41 w energy 0.19
found a new local minima! distance=0.80 w energy 0.20
found a new local minima! distance=0.24 w energy 0.20
found a new local minima! distance=5.35 w energy -0.01
found a new local minima! distance=2.51 w energy 0.11
found a new local minima! distance=2.73 w energy -0.03
found a new local min

# Run the DRL method in parallel (multiple environments)

In [None]:
# from tensorforce.execution import Runner

# runner = Runner(
#     agent=agent_spec,
#     environments=[setup_env(), setup_env()],
#     num_parallel=2,
#     max_episode_timesteps=400,
# )

# runner.run(num_episodes=1000)
# runner.run(num_episodes=100, evaluation=True)
# runner.close()