## Toy example - random action, random reward

In [2]:
import random

class Environment:
    def __init__(self):
        self.steps_left=10
    def get_observation(self):
        return [0.,0.,0.]
    def get_actions(self):
        return [0,1]
    def is_done(self):
        return self.steps_left==0
    def action(self,action):
        if self.is_done():
            raise Exception("Game is over")
        self.steps_left-=1
        return random.random()

class Agent:
    def __init__(self):
        self.total_reward=0.
    def step(self,env):
        current_obs=env.get_observation() # Observe the environment
        actions=env.get_actions() # Make a decision about the aciton to take based on the observations
        reward=env.action(random.choice(actions)) # Submit the action to the environment
        self.total_reward+=reward # Get the reward for the current step
        
env=Environment()
agent=Agent()

while not env.is_done():
    agent.step(env)

print(f"Total reward got: {round(agent.total_reward,2)}")

Total reward got: 3.51


## OpenAI Gym API

In [3]:
import gym

### Toy example

In [4]:
env=gym.make("CartPole-v0")
total_reward=0.
total_steps=0
obs=env.reset()
while True:
    action=env.action_space.sample()
    obs,reward,done,_=env.step(action)
    total_reward+=reward
    total_steps+=1
    if done:
        break
print(f"Episode done in {total_steps} steps, total reward {int(total_reward)}")

Episode done in 16 steps, total reward 16


### Wrapper

In [19]:
import random
class RandomActionWrapper(gym.ActionWrapper):
    def __init__(self,env,epsilon=0.1):
        super(RandomActionWrapper,self).__init__(env)
        self.epsilon=epsilon
    def action(self,action):
        if random.random() < self.epsilon:
            print("Random action!")
            return self.env.action_space.sample()
        return action

class RandomObservationWrapper(gym.ObservationWrapper):
    def __init__(self,env,epsilon=0.1):
        super(RandomObservationWrapper,self).__init__(env)
        self.epsilon=epsilon
    def observation(self,obs):
        if random.random()<self.epsilon:
            print("Random obs!")
            return self.env.observation_space.sample()
        return obs
    
env=RandomObservationWrapper(RandomActionWrapper(gym.make("CartPole-v0")))
obs=env.reset()
total_reward=0.
while True:
    obs,reward,done,_=env.step(0)
    total_reward += reward
    if done:
        break
print(f"Reward got: {round(total_reward,2)}")

Random obs!
Random action!
Reward got: 9.0
