In [31]:
import gymnasium as gym
import numpy as np

### ***`Making training env`***

In [32]:
env =  gym.make("FrozenLake-v1", is_slippery=False, render_mode=None)
state, info = env.reset()

In [33]:
state_size = env.observation_space.n
action_size = env.action_space.n
q_table = np.zeros((state_size, action_size))
done = False

### learning parameters

In [34]:
LEARNING_RATE = 0.8
DISCOUNT_FACTOR = 0.95
TOTAL_EPISODES = 5000
MAX_STEPS_PER_EPISODE = 30
episodes = 0

### epsilon

In [35]:
eps_start = 1
eps_decay = 0.999
eps_end = 0.05
epsilon = max(eps_end, eps_start*(eps_decay)**episodes)


### Function to choose action





In [36]:
def selectAction(Q, S, epsilon):
    if np.random.random()<epsilon:
        return env.action_space.sample()
    else:
        return np.argmax(Q[S,:])


### Training Function

In [37]:
def epsGreedyQLearning(alpha, gamma):
    epsilon = eps_start
    q_table = np.zeros((state_size, action_size))
    for episodes in range(TOTAL_EPISODES):
        state, info = env.reset()
        for step in range(MAX_STEPS_PER_EPISODE):
            action = selectAction(q_table, state, epsilon)
            new_state, reward, terminated, truncated, info = env.step(action)
            max_future_q = np.max(q_table[new_state, :])
            old_q_table = q_table[state, action]
            q_table[state, action]  = old_q_table + alpha*(reward+gamma*max_future_q-old_q_table)

            state = new_state
            if terminated or truncated:
                break
        epsilon = max(eps_end, eps_start*(eps_decay)**episodes)
    return q_table


### Testing env

In [38]:
q_table = epsGreedyQLearning(LEARNING_RATE, DISCOUNT_FACTOR)
env = gym.make("FrozenLake-v1", is_slippery = False, render_mode = "human")
state , info = env.reset()

Testing episodes

In [39]:
testing_ep = 3
for episodes in range(testing_ep):
  while not done:
    action = np.argmax(q_table[state, :])
    new_state, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    state = new_state
  print(reward)

1
1
1
