In [1]:
%load_ext autoreload
%autoreload 2

# %env CUDA_DEVICE_ORDER=PCI_BUS_ID
# %env CUDA_VISIBLE_DEVICES=1
# %env CUDA_LAUNCH_BLOCKING=1

# Set up the gym and wrap a monitor around it that will periodically record movies as it learns

In [2]:
import gym
from surface_seg.envs.mcs_env import MCSEnv
import gym.wrappers
import numpy as np
import tensorforce 
import copy


In [3]:
def setup_env(recording=False):
    
    # Set up gym
    MCS_gym = MCSEnv(fingerprints=True, 
                    permute_seed=None)
    
    if recording:
    # Wrap the gym to provide video rendering every 50 steps
        MCS_gym = gym.wrappers.Monitor(MCS_gym, 
                                         "./vid", 
                                         force=True,
                                        video_callable = lambda episode_id: (episode_id)%50==0) #every 50, starting at 51
    
    #Convert gym to tensorforce environment
    env = tensorforce.environments.OpenAIGym(MCS_gym,
                                         max_episode_timesteps=400,
                                         visualize=False)
    
    return env


# Set up the gym and agent in tensorforce

In [4]:
from tensorforce.agents import Agent

agent = Agent.create(
    agent='trpo', 
    environment=setup_env(), 
    batch_size=10, 
    learning_rate=1e-2,
    memory = 40000,
    max_episode_timesteps = 400,
    exploration=dict(
        type='decaying', unit='timesteps', decay='exponential',
        initial_value=0.3, decay_steps=1000, decay_rate=0.5
    ))

agent_spec = agent.spec

Instructions for updating:
If using Keras pass *_constraint arguments to layers.




Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.while_loop(c, b, vars, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.while_loop(c, b, vars))
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


# Run the DRL method on a single instance

In [5]:
from tensorforce.execution import Runner

runner = Runner(
    agent=agent_spec,
    environment=setup_env(),
    max_episode_timesteps=400,
)

runner.run(num_episodes=1000)
runner.run(num_episodes=100, evaluation=True)
runner.close()

INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


Episodes:   0%|          | 0/1000 [00:00, reward=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

found a new local minima! distance=0.12 w energy -0.01
found a new local minima! distance=1.39 w energy 0.11
found a new local minima! distance=1.91 w energy 1.29
found a new local minima! distance=2.31 w energy -0.02


Episodes:   0%|          | 1/1000 [01:43, reward=3545.63, ts/ep=400, sec/ep=103.58, ms/ts=259.0, agent=11.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=3.06 w energy 1.79
found a new local minima! distance=2.54 w energy 1.57
found a new local minima! distance=0.97 w energy 1.61
found a new local minima! distance=1.00 w energy 1.81
found a new local minima! distance=4.12 w energy -0.03


Episodes:   0%|          | 2/1000 [02:09, reward=3631.96, ts/ep=400, sec/ep=25.69, ms/ts=64.2, agent=6.9%]   

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.96 w energy 0.18


Episodes:   0%|          | 3/1000 [02:25, reward=1795.46, ts/ep=400, sec/ep=16.59, ms/ts=41.5, agent=10.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.10 w energy 0.20
found a new local minima! distance=4.21 w energy -0.02


Episodes:   0%|          | 4/1000 [02:42, reward=2711.76, ts/ep=400, sec/ep=16.58, ms/ts=41.5, agent=10.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.12 w energy 0.19


Episodes:   0%|          | 5/1000 [02:53, reward=1978.76, ts/ep=400, sec/ep=11.50, ms/ts=28.7, agent=14.7%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.88 w energy 0.19


Episodes:   1%|          | 6/1000 [03:07, reward=1896.35, ts/ep=400, sec/ep=13.14, ms/ts=32.9, agent=12.7%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.96 w energy 0.18


Episodes:   1%|          | 7/1000 [03:17, reward=1987.30, ts/ep=400, sec/ep=10.41, ms/ts=26.0, agent=16.4%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=2.10 w energy 0.20


Episodes:   1%|          | 8/1000 [03:28, reward=1924.44, ts/ep=400, sec/ep=11.21, ms/ts=28.0, agent=15.3%]

found a new local minima! distance=0.08 w energy -0.01
found a new local minima! distance=1.91 w energy 0.18


Episodes:   1%|          | 9/1000 [03:39, reward=1978.65, ts/ep=400, sec/ep=10.73, ms/ts=26.8, agent=15.9%]

found a new local minima! distance=0.08 w energy -0.01


KeyboardInterrupt: 

# Run the DRL method in parallel (multiple environments)

In [None]:
# from tensorforce.execution import Runner

# runner = Runner(
#     agent=agent_spec,
#     environments=[setup_env(), setup_env()],
#     num_parallel=2,
#     max_episode_timesteps=400,
# )

# runner.run(num_episodes=1000)
# runner.run(num_episodes=100, evaluation=True)
# runner.close()