In [8]:
import gym

In [9]:
env = gym.make("SpaceInvaders-v0")

In [10]:
episodes = 10

for episode in range(1,episodes):
    state = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        state, reward, done, info = env.step(env.action_space.sample())
        score += reward
    print('Episode: {}\nScore: {}'.format(episode,score))

env.close()

Episode: 1
Score: 195.0
Episode: 2
Score: 320.0
Episode: 3
Score: 290.0
Episode: 4
Score: 145.0
Episode: 5
Score: 320.0
Episode: 6
Score: 110.0
Episode: 7
Score: 155.0
Episode: 8
Score: 5.0
Episode: 9
Score: 180.0


In [12]:
#Import Packages
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam

In [13]:
def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Conv2D(32, (8,8), strides=(4,4), activation="relu", input_shape=(3, height, width, channels)))
    model.add(Conv2D(64,(4,4), strides=(2,2), activation="relu"))
    model.add(Flatten())
    model.add(Dense(512, activation="relu"))
    model.add(Dense(256, activation="relu"))
    model.add(Dense(actions, activation="linear"))
    return model

In [14]:
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [15]:
model = build_model(height, width, channels, actions)

In [16]:
#Import keras-rl2 reinforcment learning functions
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [26]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=1, value_test=2, nb_steps = 10000)
    memory = SequentialMemory(limit = 2000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy,
                    enable_dueling_network=True, dueling_type='avg',
                    nb_actions=actions, nb_steps_warmup=1000)
    return dqn

In [27]:
dqn = build_agent(model, actions)

In [28]:
dqn.compile(Adam(lr=0.0001))

In [31]:
dqn.fit(env, nb_steps=40000, visualize=False, verbose=1)

Training for 40000 steps ...
Interval 1 (0 steps performed)
14 episodes - episode_reward: 173.929 [20.000, 515.000] - loss: 1.139 - mean_q: 12.489 - mean_eps: 1.000 - ale.lives: 2.022

Interval 2 (10000 steps performed)
15 episodes - episode_reward: 163.667 [45.000, 445.000] - loss: 0.904 - mean_q: 14.082 - mean_eps: 1.000 - ale.lives: 2.098

Interval 3 (20000 steps performed)
12 episodes - episode_reward: 223.333 [75.000, 465.000] - loss: 0.807 - mean_q: 14.042 - mean_eps: 1.000 - ale.lives: 2.177

Interval 4 (30000 steps performed)
done, took 22675.582 seconds


<tensorflow.python.keras.callbacks.History at 0x7f83000eca00>

In [32]:
scores = dqn.test(env, nb_episodes=10, visualize=True)
print(np.mean(scores.history['episode_reward']))

Testing for 10 episodes ...
Episode 1: reward: 285.000, steps: 1400
Episode 2: reward: 180.000, steps: 866
Episode 3: reward: 265.000, steps: 1091
Episode 4: reward: 180.000, steps: 823
Episode 5: reward: 160.000, steps: 802
Episode 6: reward: 180.000, steps: 811
Episode 7: reward: 140.000, steps: 849
Episode 8: reward: 235.000, steps: 976
Episode 9: reward: 155.000, steps: 815
Episode 10: reward: 180.000, steps: 815
196.0


In [None]:
dqn.save_weights('models/dqn.h5')