In [2]:
import gym
import gym.envs
print(gym.__version__)
all_env = list(gym.envs.registry.all())
print("number of envs:", len(all_env))

# You could loop through and list all environments if you wanted
# [print(e) for e in all_env]
envs_starting_with_f = [e for e in all_env if str(e).startswith("EnvSpec(Frozen")]
envs_starting_with_f

0.22.0
number of envs: 35





[]

In [3]:
env_name = "FrozenLake-v1"

env = gym.make(env_name, is_slippery=False)
print(env)

print(f"env: {env}")
env_spec = env.spec
print(f"env_spec: {env_spec}")

<TimeLimit<OrderEnforcing<FrozenLakeEnv<FrozenLake-v1>>>>
env: <TimeLimit<OrderEnforcing<FrozenLakeEnv<FrozenLake-v1>>>>
env_spec: EnvSpec(entry_point='gym.envs.toy_text:FrozenLakeEnv', reward_threshold=0.7, nondeterministic=False, max_episode_steps=100, order_enforce=True, kwargs={'map_name': '4x4', 'is_slippery': False}, namespace=None, name='FrozenLake', version=1)


In [4]:
#check if gym is a instance
if isinstance(env, gym.Env):
    print("this is gym environment")

    print(env.action_space)
    print(env.observation_space)



this is gym environment
Discrete(4)
Discrete(16)


In [5]:
# inspect env.spec parameters
 
# View default env spec params that are hard-coded in Gym code itself
# Default parameters are fixed
print("Default spec params...")
print(f"id: {env_spec.id}")
# rewards above this value considered "success"
print(f"reward_threshold: {env_spec.reward_threshold}")
# env is deterministic or stochastic
print(f"nondeterministic: {env_spec.nondeterministic}")
# number of time steps per episode
print(f"max_episode_steps: {env_spec.max_episode_steps}")
# must reset before step or render
print(f"order_enforce: {env_spec.order_enforce}") 

# View runtime **kwargs .spec params.  These params set after env instantiated.
# print(f"type(env_spec._kwargs): {type(env_spec._kwargs)}") #dict
print()
print("Runtime spec params...")
# Note: gym > v21 use just .kwargs instead of ._kwargs
[print(f"{k}: {v}") for k,v in env_spec.kwargs.items()]
print()

# Note:  We can tell that our frozen lake environment is: 
# 1) Success criteria is rewards >= 0.7
# 2) Deterministic
# 3) Episode terminates when number time_steps = 100

Default spec params...
id: FrozenLake-v1
reward_threshold: 0.7
nondeterministic: False
max_episode_steps: 100
order_enforce: True

Runtime spec params...
map_name: 4x4
is_slippery: False



In [9]:
print(env.reset())
print(env.render())

0
None


In [10]:
# Take an action
# Recall the possible actions are: 0: LEFT, 1: DOWN, 2: RIGHT, 3: UP
new_obs, reward, done, info = env.step(2)
print(f"obs: {new_obs}, reward: {reward}, done: {done}")
env.render()
new_obs, reward, done, _ = env.step(1) #Down
print(f"obs: {new_obs}, reward: {reward}, done: {done}")
env.render()


obs: 1, reward: 0.0, done: False
obs: 5, reward: 0.0, done: True


In [13]:
from ipywidgets import Output
from IPython import display
import time

# The following three lines are for rendering purposes only.
# They allow us to render the env frame-by-frame in-place
# (w/o creating a huge output which we would then have to scroll through).
out = Output()
display.display(out)
with out:

    # Putting the Gym simple API methods together.
    # Here is a pattern for running a bunch of episodes.
    num_episodes = 5 # Number of episodes you want to run the agent
    total_reward = 0.0  # Initialize reward to 0

    # Loop through episodes
    for ep in range(num_episodes):

        # Reset the environment at the start of each episode
        obs = env.reset()
        done = False

        # Loop through time steps per episode
        while True:
            # take random action, but you can also do something more intelligent 
            action = env.action_space.sample()

            # apply the action
            new_obs, reward, done, info = env.step(action)
            total_reward += reward

            # If the epsiode is up, then start another one
            if done:
                break

            # Render the env (in place).
            time.sleep(0.3)
            out.clear_output(wait=True)
            print(f"episode: {ep}")
            print(f"obs: {new_obs}, reward: {total_reward}, done: {done}")
            env.render()

Output()

: 