# Mountain Car

Q Learning to play Mountain Car, using q table. Based on the tutorial by  sentdex [here](https://www.youtube.com/watch?v=yMk_XtIEzH8&list=PLQVvvaa0QuDezJFIOU5wDdfy4e9vdnx-7). Code updated from the tutorial to comply with changes in the latest version of OpenAI gym

In [1]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [6]:
import gym
import numpy as np
import pickle
import os.path

env = gym.make("MountainCar-v0",render_mode='human')
#Discretize the observation space to make it manageable 
DISCRETE_OS_SIZE = [20] * len((env.observation_space.high))

# Just like any DL
LEARNING_RATE = 0.1

# A measure of how future rewards are valued 
DISCOUNT = 0.95

EPISODES = 5000 #0

show_every = 100
discrete_obs_win_size = (env.observation_space.high - env.observation_space.low) / DISCRETE_OS_SIZE
q_table_file = "mountain_car_q_table.pkl"
if os.path.isfile(q_table_file):
    with open(q_table_file, "rb") as fp:   #Pickling
        q_table = pickle.load(fp)
else:
    q_table = np.random.uniform(low=-2, high=0, size=(DISCRETE_OS_SIZE + [env.action_space.n]))
print(q_table.shape)
print(q_table)

(20, 20, 3)
[[[-0.75324074 -0.81504565 -1.51088096]
  [-0.80147867 -0.85887103 -0.16999227]
  [-0.19877145 -0.31998135 -0.80462146]
  ...
  [-1.4449765  -0.12734169 -1.74576352]
  [-0.46510878 -0.08958608 -1.71996091]
  [-0.12493101 -1.88223196 -0.77000618]]

 [[-0.8690835  -1.79682836 -1.04324814]
  [-0.60512106 -0.39698977 -1.44613054]
  [-1.68504171 -1.63493646 -0.77641199]
  ...
  [-1.68907936 -1.15445862 -1.78680844]
  [-0.98705137 -1.16424217 -1.16226603]
  [-0.30326567 -0.59527688 -1.56838826]]

 [[-1.14501971 -0.56630539 -0.87542822]
  [-1.47824842 -0.92931215 -0.72224223]
  [-0.2639277  -1.47197087 -0.98570558]
  ...
  [-0.29255308 -0.01579942 -0.55832818]
  [-0.40702229 -1.50811461 -0.68398227]
  [-0.46817546 -1.64706724 -1.91034396]]

 ...

 [[-0.90766969 -0.16628387 -0.254933  ]
  [-0.37304599 -1.63205892 -1.86523563]
  [-1.12832657 -1.8264033  -1.38019099]
  ...
  [-0.36055478 -1.7476436  -0.61293489]
  [-0.88279645 -1.00797498 -0.60685957]
  [-1.46897767 -0.35385577 -0.37

In [7]:
def get_discerete_state(state):
    discrete_state = (state-env.observation_space.low)/discrete_obs_win_size
    return tuple(discrete_state.astype(np.int32))

In [8]:
obs,_ = env.reset()
ds = get_discerete_state(obs)

print(ds)

(7, 10)


In [9]:
for episode in range(EPISODES):
    if episode%show_every==0:
        render_mode = "human"
    else:
        render_mode = None
    env = gym.make("MountainCar-v0", render_mode=render_mode)
    
    
    init_state,_ = env.reset()
    discrete_state = get_discerete_state(init_state)
    done = False
    while not done:
        action = np.argmax(q_table[discrete_state])
        obs, reward, terminated, truncated, info = env.step(action)
        new_discrete_state = get_discerete_state(obs)
        done = (terminated or truncated)
        if not done:
            max_future_q = np.max(q_table[new_discrete_state])
            current_q = q_table[discrete_state + (action, )]
            new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward * DISCOUNT + max_future_q)
            q_table[discrete_state+(action,)] = new_q
        elif obs[0] >= env.goal_position:
            q_table[discrete_state + (action,)] = 0
            print(f"Made it on episode {episode}")
        discrete_state = new_discrete_state
env.close()        

  if not isinstance(terminated, (bool, np.bool8)):


Made it on episode 295
Made it on episode 401
Made it on episode 493
Made it on episode 543
Made it on episode 545
Made it on episode 555
Made it on episode 558
Made it on episode 584
Made it on episode 590
Made it on episode 641
Made it on episode 643
Made it on episode 647
Made it on episode 703
Made it on episode 721
Made it on episode 723
Made it on episode 725
Made it on episode 727
Made it on episode 729
Made it on episode 731
Made it on episode 826
Made it on episode 839
Made it on episode 843
Made it on episode 846
Made it on episode 855
Made it on episode 857
Made it on episode 863
Made it on episode 878
Made it on episode 882
Made it on episode 883
Made it on episode 886
Made it on episode 892
Made it on episode 894
Made it on episode 895
Made it on episode 941
Made it on episode 946
Made it on episode 947
Made it on episode 949
Made it on episode 951
Made it on episode 952
Made it on episode 954
Made it on episode 955
Made it on episode 956
Made it on episode 957
Made it on 

### Save the Q-Table 

In [10]:
with open(q_table_file, "wb") as fp:   #Pickling
    pickle.dump(q_table, fp)

## Let's run the game a few times with the learned Q-Table 

In [21]:
for i in range(10):
    env = gym.make("MountainCar-v0", render_mode='human')
    init_state, _ = env.reset()
    discrete_state = get_discerete_state(init_state)

    done = False
    while not done:
        action = np.argmax(q_table[discrete_state])
        obs, reward, terminated, truncated, info = env.step(action)
        new_discrete_state = get_discerete_state(obs)
        done = (terminated or truncated)
        if not done:
            max_future_q = np.max(q_table[new_discrete_state])
            current_q = q_table[discrete_state + (action, )]
        elif obs[0] >= env.goal_position:
            print(f"Made it on iteration {i}")
        discrete_state = new_discrete_state
    env.close()

  if not isinstance(terminated, (bool, np.bool8)):


Made it on iteration 0
Made it on iteration 1
Made it on iteration 2
Made it on iteration 3
Made it on iteration 4
