In [8]:
# Copyright 2020 Tensorforce Team. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import gym
from threading import Thread

from tensorforce import Environment, Runner

In [29]:
import numpy as np

In [51]:
import time

In [63]:
class CustomEnvironment(Environment):

    def __init__(self,number):
        self.number=number
        super().__init__()

    def states(self):
        return dict(type='float', shape=(8,))

    def actions(self):
        return dict(type='int', num_values=4)

    # Optional: should only be defined if environment has a natural fixed
    # maximum episode length; otherwise specify maximum number of training
    # timesteps via Environment.create(..., max_episode_timesteps=???)
    def max_episode_timesteps(self):
        return super().max_episode_timesteps()

    # Optional additional steps to close environment
    def close(self):
        super().close()

    def reset(self):
        state = np.random.random(size=(8,))
        return state

    def execute(self, actions):
        next_state = np.random.random(size=(8,))
        terminal = False  # Always False if no "natural" terminal state
        time.sleep(0.0007)
        reward = self.number
        return next_state, terminal, reward

In [66]:
    """
    Train agent on experience collected in parallel from 4 local CartPole environments.
    Typical use case:
        time for batched agent.act() ~ time for agent.act() > time for environment.execute()
    """
agent = 'tensorforce/benchmarks/configs/ppo.json'
runner = Runner(agent=agent, environment=dict(environment=CustomEnvironment,number=2), num_parallel=4,max_episode_timesteps=100)
# Batch act/observe calls to agent, unless environment.is_vectorizable()
# (otherwise essentially equivalent to single environment)
runner.run(num_episodes=100, batch_agent_calls=True)
runner.close()



Episodes:   0%|          | 0/100 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

In [65]:
    """
    Train agent on experience collected in parallel from 4 local CartPole environments.
    Typical use case:
        time for batched agent.act() ~ time for agent.act() > time for environment.execute()
    """
agent = 'tensorforce/benchmarks/configs/ppo.json'
runner = Runner(agent=agent, environment=dict(environment=CustomEnvironment,number=2),max_episode_timesteps=100)
# Batch act/observe calls to agent, unless environment.is_vectorizable()
# (otherwise essentially equivalent to single environment)
runner.run(num_episodes=100)
runner.close()



Episodes:   0%|          | 0/100 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

In [23]:
# OpenAI-Gym environment specification
environment1 = Environment.create(
       environment='gym', level='CartPole-v1')

environment2 = Environment.create(
       environment='gym', level='CartPole-v1')

In [28]:
    """
    Train agent on experience collected in parallel from 4 local CartPole environments.
    Typical use case:
        time for batched agent.act() ~ time for agent.act() > time for environment.execute()
    """
agent = 'tensorforce/benchmarks/configs/ppo.json'
environment = 'tensorforce/benchmarks/configs/cartpole.json'
runner = Runner(agent=agent, environment=environment, num_parallel=2)
# Batch act/observe calls to agent, unless environment.is_vectorizable()
# (otherwise essentially equivalent to single environment)
runner.run(num_episodes=100, batch_agent_calls=True)
runner.close()

Episodes:   0%|          | 0/100 [00:00, return=0.00, ts/ep=0, sec/ep=0.00, ms/ts=0.0, agent=0.0%]

In [None]:
def local_vectorized():
    """
    Train agent on experience collected in parallel from one vectorized CartPole environment.
    Typical use case:
        time for vectorized environment < time for sequential execution
    """
    agent = 'benchmarks/configs/ppo.json'
    environment = 'custom_cartpole'
    runner = Runner(agent=agent, environment=environment, max_episode_timesteps=500, num_parallel=4)
    runner.run(num_episodes=100)
    runner.close()

In [22]:
"""
Train agent on experience collected in parallel from 4 CartPole environments running in
separate processes.
Typical use case:
    (a) time for batched agent.act() ~ time for agent.act()
                    > time for environment.execute() + remote communication
        --> batch_agent_calls = True
    (b) time for environment.execute() > time for agent.act() + process communication
        --> batch_agent_calls = False
"""
agent = 'benchmarks/configs/ppo.json'
environment = 'benchmarks/configs/cartpole.json'
runner = Runner(agent=agent, environment=environment, num_parallel=4, remote='multiprocessing')
runner.run(num_episodes=100, batch_agent_calls=True)  # optional: batch_agent_calls=True
runner.close()

TensorforceError: 
  File "/home/olaf/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/environments/environment.py", line 683, in remote
    env = Environment.create(
  File "/home/olaf/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/environments/environment.py", line 223, in create
    return Environment.create(
  File "/home/olaf/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/environments/environment.py", line 204, in create
    return Environment.create(
  File "/home/olaf/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/environments/environment.py", line 160, in create
    environment = environment(**kwargs)
  File "/home/olaf/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/environments/openai_gym.py", line 170, in __init__
    self.environment, self._max_episode_timesteps = self.__class__.create_level(
  File "/home/olaf/Desktop/Semantic-Reasoning-in-Reinforcement-Learning/tensorforce/tensorforce/environments/openai_gym.py", line 70, in create_level
    if level not in gym.envs.registry.env_specs:
  File "/home/olaf/anaconda3/envs/master/lib/python3.9/site-packages/gym/envs/registration.py", line 409, in __contains__
    namespace, name, version = parse_env_id(key)
  File "/home/olaf/anaconda3/envs/master/lib/python3.9/site-packages/gym/envs/registration.py", line 71, in parse_env_id
    raise error.Error(

<class 'gym.error.Error'>: Malformed environment ID: benchmarks/configs/cartpole.json.(Currently all IDs must be of the form re.compile('^(?:(?P<namespace>[\\w:-]+)\\/)?(?:(?P<name>[\\w:.-]+?))(?:-v(?P<version>\\d+))?$').)`.

In [None]:
def socket():
    """
    Train agent on experience collected in parallel from 2 CartPole environments running on
    another machine.
    Typical use case: same as mode 2, but generally remote communication socket > process
    Simulate remote environment, usually run on another machine via:
        python run.py --environment gym --level CartPole-v1 --remote socket-server --port 65432
    """
    agent = 'benchmarks/configs/ppo.json'
    environment = 'benchmarks/configs/cartpole.json'

    def server(port):
        Environment.create(environment=environment, remote='socket-server', port=port)

    server1 = Thread(target=server, kwargs=dict(port=65432))
    server2 = Thread(target=server, kwargs=dict(port=65433))
    server1.start()
    server2.start()

    runner = Runner(
        agent=agent, num_parallel=2, remote='socket-client', host='127.0.0.1', port=65432
    )
    runner.run(num_episodes=100)  # optional: batch_agent_calls=True
    runner.close()

    server1.join()
    server2.join()


if __name__ == '__main__':
    main()