### RL learning using OpenAI gym

- Control theory problems from the classic RL literature.
- Links: 
    - *https://gym.openai.com/docs/*
    - *https://gym.openai.com/envs/#classic_control*
    - *https://towardsdatascience.com/reinforcement-learning-with-openai-d445c2c687d2*


In [1]:
import gym

In [2]:
# Create an environment for CartPole using openai gym
# Every environment comes with an action_space and an observation_space
env_cp = gym.make('CartPole-v0')

print(env_cp.action_space) 
#[Output: ] Discrete(2) : 
# Two valid actions: either 0 or 1 - applying force to the left or right for CartPole balancing

print(env_cp.observation_space) 
# [Output: ] Box(4,) : 
# 4-dimensional array for box’s bounds for valid observations

print(env_cp.observation_space.high) 
# The HIGH values of cart_position, cart_velocity, pole_angle and pole_velocity

print(env_cp.observation_space.low)
# The LOW values of cart_position, cart_velocity, pole_angle and pole_velocity

Discrete(2)
Box(4,)
[4.8000002e+00 3.4028235e+38 4.1887903e-01 3.4028235e+38]
[-4.8000002e+00 -3.4028235e+38 -4.1887903e-01 -3.4028235e+38]


In [3]:
import gym

def main():
    env = gym.make('CartPole-v0')

    for i_episode in range(20):
        observation = env.reset()
    
        for i_steps in range(100):
                env.render()
                action = env.action_space.sample() # take a random action            
                observation, reward, done, info = env.step(action)
                #print(observation)
                print(observation, reward, done, info)

                if done:
                    print("Finished after {} timesteps".format(i_steps+1))
                    break


if __name__ == "__main__":
    main()               

[ 0.01382191 -0.15482845  0.03193579  0.32155089] 1.0 False {}
[0.01072534 0.03982451 0.0383668  0.03910786] 1.0 False {}
[ 0.01152183  0.23437588  0.03914896 -0.24122736] 1.0 False {}
[0.01620935 0.0387172  0.03432441 0.06354264] 1.0 False {}
[ 0.01698369  0.23333064  0.03559527 -0.21811618] 1.0 False {}
[ 0.0216503   0.42792615  0.03123294 -0.49936175] 1.0 False {}
[ 0.03020883  0.62259417  0.02124571 -0.78204038] 1.0 False {}
[ 0.04266071  0.81741776  0.0056049  -1.06796405] 1.0 False {}
[ 0.05900907  1.01246512 -0.01575438 -1.35888266] 1.0 False {}
[ 0.07925837  1.20778105 -0.04293203 -1.65645179] 1.0 False {}
[ 0.10341399  1.4033767  -0.07606107 -1.96219306] 1.0 False {}
[ 0.13148152  1.20913801 -0.11530493 -1.69401781] 1.0 False {}
[ 0.15566428  1.0155205  -0.14918529 -1.43934463] 1.0 False {}
[ 0.17597469  1.21213215 -0.17797218 -1.77468427] 1.0 False {}
[ 0.20021734  1.01940702 -0.21346587 -1.54221084] 1.0 True {}
Finished after 15 timesteps
[-0.04291758 -0.19054267 -0.04384089

[ 0.05339781  1.15998904 -0.1411701  -1.82912332] 1.0 False {}
[ 0.07659759  1.35636506 -0.17775257 -2.16212333] 1.0 False {}
[ 0.10372489  1.16337405 -0.22099504 -1.9291826 ] 1.0 True {}
Finished after 10 timesteps
[ 0.02379324  0.21542018 -0.03364288 -0.34301239] 1.0 False {}
[ 0.02810165  0.4110042  -0.04050312 -0.64611156] 1.0 False {}
[ 0.03632173  0.21646934 -0.05342536 -0.36645324] 1.0 False {}
[ 0.04065112  0.02214568 -0.06075442 -0.09108322] 1.0 False {}
[ 0.04109403  0.21808345 -0.06257608 -0.40229844] 1.0 False {}
[ 0.0454557   0.02390228 -0.07062205 -0.12998214] 1.0 False {}
[ 0.04593375 -0.17014068 -0.0732217   0.13961113] 1.0 False {}
[ 0.04253093 -0.36414169 -0.07042947  0.40832497] 1.0 False {}
[ 0.0352481  -0.55819805 -0.06226297  0.67799871] 1.0 False {}
[ 0.02408414 -0.36226887 -0.048703    0.36638094] 1.0 False {}
[ 0.01683876 -0.16648988 -0.04137538  0.05874765] 1.0 False {}
[ 0.01350896 -0.36099492 -0.04020043  0.33809461] 1.0 False {}
[ 0.00628906 -0.55552245 -0.

[ 0.03727599  0.73956029 -0.10300782 -1.26995223] 1.0 False {}
[ 0.05206719  0.93583545 -0.12840687 -1.59303518] 1.0 False {}
[ 0.0707839   1.13222609 -0.16026757 -1.92284381] 1.0 False {}
[ 0.09342842  0.9391476  -0.19872445 -1.68385485] 1.0 False {}
[ 0.11221138  0.74680362 -0.23240154 -1.45905831] 1.0 True {}
Finished after 20 timesteps
[-0.01542636  0.14911407  0.01875544 -0.24772357] 1.0 False {}
[-0.01244408  0.34396321  0.01380097 -0.5344321 ] 1.0 False {}
[-0.00556481  0.14864993  0.00311232 -0.23743272] 1.0 False {}
[-0.00259182 -0.04651635 -0.00163633  0.0562303 ] 1.0 False {}
[-0.00352214 -0.2416148  -0.00051172  0.3483965 ] 1.0 False {}
[-0.00835444 -0.04648558  0.00645621  0.05555225] 1.0 False {}
[-0.00928415  0.14854321  0.00756725 -0.23508672] 1.0 False {}
[-0.00631329 -0.04668603  0.00286552  0.05997352] 1.0 False {}
[-0.00724701  0.14839471  0.00406499 -0.23180394] 1.0 False {}
[-0.00427911  0.34345835 -0.00057109 -0.52320187] 1.0 False {}
[ 0.00259005  0.53858833 -0.

[ 0.09925408  0.82075888 -0.16936469 -1.42763897] 1.0 False {}
[ 0.11566926  1.01751939 -0.19791747 -1.76811238] 1.0 False {}
[ 0.13601965  1.21424996 -0.23327972 -2.11525604] 1.0 True {}
Finished after 22 timesteps
