# Mountain Car

Q Learning to play Mountain Car, using q table. Based on the tutorial by  sentdex [here](https://www.youtube.com/watch?v=yMk_XtIEzH8&list=PLQVvvaa0QuDezJFIOU5wDdfy4e9vdnx-7). Code updated from the tutorial to comply with changes in the latest version of OpenAI gym

In [1]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
import gym
import numpy as np
import pickle
import os.path
np.bool = np.bool_

env = gym.make("MountainCar-v0",render_mode='human')
#Discretize the observation space to make it manageable 
DISCRETE_OS_SIZE = [20] * len((env.observation_space.high))

# Just like any DL
LEARNING_RATE = 0.1

# A measure of how future rewards are valued 
DISCOUNT = 0.95

EPISODES = 5000 #0

epsilon = 1
START_EPSILON_DECAYING = 1
END_EPSILON_DECAYING = EPISODES//2

epsilon_decay_value = epsilon/(END_EPSILON_DECAYING - START_EPSILON_DECAYING)

show_every = 100
discrete_obs_win_size = (env.observation_space.high - env.observation_space.low) / DISCRETE_OS_SIZE
q_table_file = "mountain_car_q_table.pkl"
if os.path.isfile(q_table_file):
    with open(q_table_file, "rb") as fp:   #Pickling
        q_table = pickle.load(fp)
else:
    q_table = np.random.uniform(low=-2, high=0, size=(DISCRETE_OS_SIZE + [env.action_space.n]))
print(q_table.shape)
print(q_table)

(20, 20, 3)
[[[-7.53240742e-01 -8.15045646e-01 -1.51088096e+00]
  [-7.57390838e+00 -4.90005799e+00 -6.85065727e+00]
  [-4.28239330e+01 -4.64317950e+01 -4.63791979e+01]
  ...
  [-1.44497650e+00 -1.27341686e-01 -1.74576352e+00]
  [-4.65108777e-01 -8.95860805e-02 -1.71996091e+00]
  [-1.24931005e-01 -1.88223196e+00 -7.70006183e-01]]

 [[-8.69083502e-01 -1.79682836e+00 -1.04324814e+00]
  [-3.80901702e+01 -3.75474884e+01 -3.80009233e+01]
  [-4.71482686e+01 -4.70927041e+01 -4.54978956e+01]
  ...
  [-1.68907936e+00 -1.15445862e+00 -1.78680844e+00]
  [-9.87051365e-01 -1.16424217e+00 -1.16226603e+00]
  [-3.03265671e-01 -5.95276882e-01 -1.56838826e+00]]

 [[-1.14501971e+00 -1.09670117e+00 -1.46494966e+00]
  [-4.07605171e+01 -4.15748579e+01 -4.19538369e+01]
  [-4.86832205e+01 -4.80779995e+01 -4.87174953e+01]
  ...
  [-2.92553078e-01 -1.57994165e-02 -5.58328177e-01]
  [-4.07022288e-01 -1.50811461e+00 -6.83982275e-01]
  [-4.68175456e-01 -1.64706724e+00 -1.91034396e+00]]

 ...

 [[-9.07669691e-01 -1.

In [3]:
def get_discerete_state(state):
    discrete_state = (state-env.observation_space.low)/discrete_obs_win_size
    return tuple(discrete_state.astype(np.int32))

In [4]:
obs,_ = env.reset()
ds = get_discerete_state(obs)

print(ds)

(7, 10)


In [5]:
for episode in range(EPISODES):
    if episode%show_every==0:
        render_mode = "human"
    else:
        render_mode = None
    env = gym.make("MountainCar-v0", render_mode=render_mode)
    
    
    init_state,_ = env.reset()
    discrete_state = get_discerete_state(init_state)
    done = False
    while not done:
        if np.random.random() > epsilon:
            # Get action from Q table
            action = np.argmax(q_table[discrete_state])
        else:
            # Get random action
            action = np.random.randint(0, env.action_space.n)

        obs, reward, terminated, truncated, info = env.step(action)
        new_discrete_state = get_discerete_state(obs)
        done = (terminated or truncated)
        if not done:
            max_future_q = np.max(q_table[new_discrete_state])
            current_q = q_table[discrete_state + (action, )]
            new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward * DISCOUNT + max_future_q)
            q_table[discrete_state+(action,)] = new_q
        elif obs[0] >= env.goal_position:
            q_table[discrete_state + (action,)] = 0
            print(f"Made it on episode {episode}")
        discrete_state = new_discrete_state
    # Decaying is being done every episode if episode number is within decaying range
    if END_EPSILON_DECAYING >= episode >= START_EPSILON_DECAYING:
        epsilon -= epsilon_decay_value
env.close()        

Made it on episode 887
Made it on episode 1170
Made it on episode 1205
Made it on episode 1212


### Save the Q-Table 

In [None]:
with open(q_table_file, "wb") as fp:   #Pickling
    pickle.dump(q_table, fp)

## Let's run the game a few times with the learned Q-Table 

In [None]:
for i in range(10):
    env = gym.make("MountainCar-v0", render_mode='human')
    init_state, _ = env.reset()
    discrete_state = get_discerete_state(init_state)

    done = False
    while not done:
        action = np.argmax(q_table[discrete_state])
        obs, reward, terminated, truncated, info = env.step(action)
        new_discrete_state = get_discerete_state(obs)
        done = (terminated or truncated)
        if not done:
            max_future_q = np.max(q_table[new_discrete_state])
            current_q = q_table[discrete_state + (action, )]
        elif obs[0] >= env.goal_position:
            print(f"Made it on iteration {i}")
        discrete_state = new_discrete_state
    env.close()