# Open AI Gym CartPole

In [1]:
import gym

import numpy as np
from IPython.display import clear_output
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def plot(frame, rewards):
    clear_output(True)
    plt.figure(figsize=(20, 5))
    plt.subplot(131)
    plt.title("frame %s. reward: %s" % (frame, np.mean(rewards[-10:])))
    plt.plot(rewards)
    plt.show()

### Random Action Policy

In [None]:
env = gym.make('CartPole-v0')

all_rewards = []
frame = 0

for i_episode in range(100):
    observation = env.reset()
    episode_reward = 0
    for t in range(100):
        frame += 1
        episode_reward += 1
        env.render()
        #random action taken
        action = env.action_space.sample()
        observation, reward, done, info = env.step(action)
        if done:
            #print("Episode finished after {} timesteps".format(t+1))
            all_rewards.append(episode_reward)
            break
        if frame % 200 == 0:
            plot(frame, all_rewards)
env.close()

[ 0.02610482  0.02360697 -0.03798513  0.01484781]
1
[ 0.02657696  0.21925251 -0.03768818 -0.28957374]
0
[ 0.03096201  0.02468768 -0.04347965 -0.00901145]
1
[ 0.03145576  0.22040535 -0.04365988 -0.31508948]
0
[ 0.03586387  0.02593163 -0.04996167 -0.03648881]
0
[ 0.0363825  -0.16843958 -0.05069145  0.24002157]
0
[ 0.03301371 -0.36280213 -0.04589102  0.51629398]
0
[ 0.02575767 -0.55724884 -0.03556514  0.79416976]
0
[ 0.01461269 -0.75186504 -0.01968174  1.07545551]
1
[-4.24611198e-04 -5.56488645e-01  1.82736869e-03  7.76661617e-01]
1
[-0.01155438 -0.36139187  0.0173606   0.48455419]
0
[-0.01878222 -0.55675447  0.02705168  0.78265769]
0
[-0.02991731 -0.75223756  0.04270484  1.08372727]
1
[-0.04496206 -0.55770434  0.06437938  0.80474488]
0
[-0.05611615 -0.75364705  0.08047428  1.11696453]
0
[-0.07118909 -0.94972758  0.10281357  1.43376737]
1
[-0.09018364 -0.75601323  0.13148892  1.17490421]
0
[-0.10530391 -0.95257545  0.154987    1.50574885]
1
[-0.12435541 -0.75963577  0.18510198  1.26519229

### Rule Based Policy

In [1]:
def get_action(observation):
    cart_position = observation[0]
    cart_velocity = observation[1]
    pole_angle = observation[2]
    pole_velocity_at_tip = observation[3]
    if pole_angle < 0:
        # push cart to left
        return 0
    else:
        # push cart to right
        return 1

In [None]:
env = gym.make('CartPole-v0')

all_rewards = []
frame = 0

for i_episode in range(100):
    observation = env.reset()
    episode_reward = 0
    for t in range(100):
        frame += 1
        episode_reward += 1
        env.render()
        #follow our rule
        action = get_action(observation)
        observation, reward, done, info = env.step(action)
        if done:
            #print("Episode finished after {} timesteps".format(t+1))
            all_rewards.append(episode_reward)
            break
        if frame % 200 == 0:
            plot(frame, all_rewards)
env.close()