# Frozenlake
- https://github.com/openai/gym/wiki/Table-of-environments 
- https://github.com/openai/gym/wiki/FrozenLake-v0
- https://github.com/openai/gym/blob/master/gym/envs/__init__.py
- https://github.com/openai/gym/blob/master/gym/envs/toy_text/frozen_lake.py

- the 4x4 map is fixed
- result of action is probabilisitc due to the slippery envornment
- solved requirements: 
    - reaching the goal without falling into the hole over 100 consecutive trials.
    
- You can play it for youself: frozenlake_game.py

In [1]:
import sys
sys.version

'3.8.10 (tags/v3.8.10:3d8993a, May  3 2021, 11:48:03) [MSC v.1928 64 bit (AMD64)]'

In [2]:
import numpy as np
import gym
from gym.envs.registration import register

In [3]:
"""
 Non-slippery 로 셋팅.
"""
register(
    id='FrozenLakeNotSlippery-v1',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name' : '4x4', 'is_slippery': False},
)

In [4]:
# OpenAI Gym에서 미리 정의되어 있는 action 매핑
LEFT = 0
DOWN = 1
RIGHT = 2
UP = 3

# Deterministic environment. 

- Action results in the exact destination state.

In [5]:
env = gym.make('FrozenLakeNotSlippery-v1')
state = env.reset();  # initial state should be 0
for i in range(3):
    action = env.action_space.sample()  # 
    newstate, reward, done, info = env.step(action)
    print(action, newstate, reward, done, info)

3 0 0.0 False {'prob': 1.0}
1 4 0.0 False {'prob': 1.0}
2 5 0.0 True {'prob': 1.0}


In [6]:
env = gym.make('FrozenLakeNotSlippery-v1')
state = env.reset();  # initial state should be 0
for i in range(3):
    action = 2  # 
    newstate, reward, done, info = env.step(action)
    env.render()
    print(action, newstate, reward, done, info)

  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
2 1 0.0 False {'prob': 1.0}
  (Right)
SF[41mF[0mF
FHFH
FFFH
HFFG
2 2 0.0 False {'prob': 1.0}
  (Right)
SFF[41mF[0m
FHFH
FFFH
HFFG
2 3 0.0 False {'prob': 1.0}


# Stochastic Environment

- Because of stochastic reaction of the environment, the agent can or cannot go to its intended place.

In [8]:
env = gym.make('FrozenLake-v1')

state = env.reset();  # initial state should be 0
for i in range(3):
    action = env.action_space.sample()  # 
    newstate, reward, done, info = env.step(action)
    env.render()
    print(action, newstate, reward, done, info)
    if done: break

  (Up)
[41mS[0mFFF
FHFH
FFFH
HFFG
3 0 0.0 False {'prob': 0.3333333333333333}
  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
2 1 0.0 False {'prob': 0.3333333333333333}
  (Down)
[41mS[0mFFF
FHFH
FFFH
HFFG
1 0 0.0 False {'prob': 0.3333333333333333}


## Just try to move right.

In [9]:
env = gym.make('FrozenLake-v1')
state = env.reset();  # initial state should be 0
for i in range(3):
    action = 2 # 
    newstate, reward, done, info = env.step(action)
    env.render()
    print(action, newstate, reward, done, info)
    if done: break

  (Right)
S[41mF[0mFF
FHFH
FFFH
HFFG
2 1 0.0 False {'prob': 0.3333333333333333}
  (Right)
SF[41mF[0mF
FHFH
FFFH
HFFG
2 2 0.0 False {'prob': 0.3333333333333333}
  (Right)
SFF[41mF[0m
FHFH
FFFH
HFFG
2 3 0.0 False {'prob': 0.3333333333333333}


## Now let's see the probability of transition from S0.

In [10]:
dstate = np.zeros((16,))
ntrials = 30000
for i in range(ntrials):
    state = env.reset();  # initial state should be 0
    action = 2  # go right
    newstate, reward, done, info = env.step(action)
    dstate[newstate] += 1
#
prob = dstate.reshape(4,4) / ntrials
print(f'{prob}')

[[0.33133333 0.33283333 0.         0.        ]
 [0.33583333 0.         0.         0.        ]
 [0.         0.         0.         0.        ]
 [0.         0.         0.         0.        ]]


## Let's build up a full state transition probability.
- action policy is 'random'
    - it may not reach the goal :)

In [11]:
stm = np.zeros((16,16))
counts = np.ones((16,), dtype=int)

nepisodes = 100_000
for i in range(nepisodes):
    state = env.reset()
    done = False
    while done == False:
        counts[state] += 1
        
        action = env.action_space.sample()
        newstate, reward, done, info = env.step(action)
        
        stm[state, newstate] += 1
        state = newstate
        if done:
            break 

In [12]:
env.reset(); env.render(); np.arange(1,17).reshape(4,4)


[41mS[0mFFF
FHFH
FFFH
HFFG


array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12],
       [13, 14, 15, 16]])

In [13]:
for i in range(16):
    p = stm[i,:] / counts[i]
    print(f'{i+1:2}: ', end=' ')
    for si in p:
        print(f'{si:5.2f}', end=' ')
    print('')

 1:   0.50  0.25  0.00  0.00  0.25  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00 
 2:   0.25  0.25  0.25  0.00  0.00  0.25  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00 
 3:   0.00  0.25  0.25  0.25  0.00  0.00  0.25  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00 
 4:   0.00  0.00  0.25  0.50  0.00  0.00  0.00  0.25  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00 
 5:   0.25  0.00  0.00  0.00  0.25  0.25  0.00  0.00  0.25  0.00  0.00  0.00  0.00  0.00  0.00  0.00 
 6:   0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00 
 7:   0.00  0.00  0.25  0.00  0.00  0.25  0.00  0.25  0.00  0.00  0.25  0.00  0.00  0.00  0.00  0.00 
 8:   0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00  0.00 
 9:   0.00  0.00  0.00  0.00  0.25  0.00  0.00  0.00  0.25  0.25  0.00  0.00  0.25  0.00  0.00  0.00 
10:   0.00  0.00  0.00  0.00  0.00  0.25  0.00  0.00  0.25  0.00  0.25  0.00  0.00

In [14]:
stm

array([[162674.,  81574.,      0.,      0.,  81472.,      0.,      0.,
             0.,      0.,      0.,      0.,      0.,      0.,      0.,
             0.,      0.],
       [ 32166.,  32026.,  31873.,      0.,      0.,  31959.,      0.,
             0.,      0.,      0.,      0.,      0.,      0.,      0.,
             0.,      0.],
       [     0.,  14424.,  14328.,  14532.,      0.,      0.,  14342.,
             0.,      0.,      0.,      0.,      0.,      0.,      0.,
             0.,      0.],
       [     0.,      0.,   7232.,  14732.,      0.,      0.,      0.,
          7300.,      0.,      0.,      0.,      0.,      0.,      0.,
             0.,      0.],
       [ 30880.,      0.,      0.,      0.,  31212.,  31308.,      0.,
             0.,  31040.,      0.,      0.,      0.,      0.,      0.,
             0.,      0.],
       [     0.,      0.,      0.,      0.,      0.,      0.,      0.,
             0.,      0.,      0.,      0.,      0.,      0.,      0.,
             

In [15]:
counts

array([325721, 128025,  57627,  29265, 124441,      1,  16775,      1,
        46467,  15646,   9540,      1,      1,   7081,   5536,      1])