-
Notifications
You must be signed in to change notification settings - Fork 6.4k
/
Copy pathgym_review.py
65 lines (47 loc) · 1.44 KB
/
gym_review.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# https://deeplearningcourses.com/c/cutting-edge-artificial-intelligence
import gym
import numpy as np
import matplotlib.pyplot as plt
gym_minor_version = int(gym.__version__.split('.')[1])
if gym_minor_version >= 19:
exit("Please install OpenAI Gym 0.19.0 or earlier")
def get_action(s, w):
return 1 if s.dot(w) > 0 else 0
def play_one_episode(env, params):
observation = env.reset()
done = False
t = 0
r = 0
while not done and t < 10000:
t += 1
action = get_action(observation, params)
observation, reward, done, info = env.step(action)
r += reward
return r
def play_multiple_episodes(env, T, params):
episode_rewards = np.empty(T)
for i in range(T):
episode_rewards[i] = play_one_episode(env, params)
avg_reward = episode_rewards.mean()
print("avg reward:", avg_reward)
return avg_reward
def random_search(env):
episode_rewards = []
best = 0
params = None
for t in range(100):
new_params = np.random.random(4)*2 - 1
avg_reward = play_multiple_episodes(env, 100, new_params)
episode_rewards.append(avg_reward)
if avg_reward > best:
params = new_params
best = avg_reward
return episode_rewards, params
if __name__ == '__main__':
env = gym.make('CartPole-v0')
episode_rewards, params = random_search(env)
plt.plot(episode_rewards)
plt.show()
# play a final set of episodes
print("***Final run with final weights***")
play_multiple_episodes(env, 100, params)