In [8]:
import gym
from typing import TypeVar
import random

"""
TypeVar: Type variable

Type variables exist primarily for the benefit of static type checkers. They serve as 
the parameters for generic types as well as for generic function definitions. See class 
Generic for more information on generic types.
"""

Action = TypeVar('Action') # Action can be anything: string, list, boolean, dictionary, integer, float, tuple


class RandomActionWrapper(gym.ActionWrapper):
    
    """
    Making class RandomActionWrapper which is a gym.ActionWrapper.
        This class will inherit all functions, attributes from  gym.ActionWrapper class
    """
    
    
    def __init__(self, env, epsilon=0.4):

        """
        Here, we initialized our wrapper by calling a parent's __init__ method 
        and saving epsilon (the probability of a random action).
        """

        super(RandomActionWrapper, self).__init__(env)
        self.epsilon = epsilon

    def action(self, action: Action) -> Action:
        
        """
        This is a method that we need to override from a parent's class to tweak 
        the agent's actions. Every time we roll the die, and with the probability 
        of epsilon, we sample a random action from the action space and return it 
        instead of the action the agent has sent to us. Note that using 
        action_space and wrapper abstractions, we were able to write abstract code, 
        which will work with any environment from Gym. Additionally, we must print 
        the message every time we replace the action, just to verify that our 
        wrapper is working.
        """
        
        if random.random() < self.epsilon:
            print("Random!")
            return self.env.action_space.sample()
        return action


if __name__ == "__main__":
    env = RandomActionWrapper(gym.make("CartPole-v0"))
    
    """
    Now it's time to apply our wrapper. We will create a normal CartPole environment 
    and pass it to our Wrapper constructor. From here on, we will use our wrapper as 
    a normal Env instance, instead of the original CartPole. As the Wrapper class 
    inherits the Env class and exposes the same interface, we can nest our wrappers 
    in any combination we want
    """
    
    obs = env.reset()
    total_reward = 0.0

    while True:
        obs, reward, done, _ = env.step(0)
        total_reward += reward
        if done:
            break

    print("Reward got: %.2f" % total_reward)

Random!
Reward got: 9.00
