## Learn to play at Breakout 

### Requirements

- [install `keras-rl`](https://github.com/keras-rl/keras-rl#installation)

      pip install keras-rl
      
- install the `gym_breakout_pygame` package

      pip install gym_breakout_pygame
      

In [1]:
import numpy as np
import gym
from gym_breakout_pygame.wrappers.observation_space import BreakoutN

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory


pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


Using TensorFlow backend.


In [2]:
env = BreakoutN(encode=False)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())


W0629 23:48:49.540298 140024841986176 deprecation_wrapper.py:119] From /home/marcofavorito/.virtualenvs/gym-breakout-pygame-7UQzWS9l/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0629 23:48:49.551870 140024841986176 deprecation_wrapper.py:119] From /home/marcofavorito/.virtualenvs/gym-breakout-pygame-7UQzWS9l/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0629 23:48:49.561582 140024841986176 deprecation_wrapper.py:119] From /home/marcofavorito/.virtualenvs/gym-breakout-pygame-7UQzWS9l/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                320       
_________________________________________________________________
activation_1 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)                4160      
_________________________________________________________________
activation_2 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 195       
_________________________________________________________________
activation_3 (Activation)    (None, 3)                 0         
Total para

In [4]:
# Configure and compile the RL agent
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# learn
dqn.fit(env, nb_steps=30000, visualize=False, verbose=2)

# save 
dqn.save_weights('dqn_{}_weights.h5f'.format("breakout-n"), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.
dqn.test(env, nb_episodes=5, visualize=True)

Training for 10000 steps ...




  151/10000: episode: 1, duration: 1.291s, episode steps: 151, steps per second: 117, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 1.265 [0.000, 2.000], mean observation: 12.472 [0.000, 47.000], loss: 0.139938, mean_absolute_error: 6.834894, mean_q: 10.322103
  302/10000: episode: 2, duration: 0.695s, episode steps: 151, steps per second: 217, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 1.212 [0.000, 2.000], mean observation: 13.139 [2.000, 47.000], loss: 0.302389, mean_absolute_error: 6.738727, mean_q: 10.205843
  453/10000: episode: 3, duration: 0.721s, episode steps: 151, steps per second: 209, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 1.166 [0.000, 2.000], mean observation: 12.513 [0.000, 47.000], loss: 0.212746, mean_absolute_error: 6.262754, mean_q: 9.478497
 1004/10000: episode: 4, duration: 2.522s, episode steps: 551, steps per second: 218, episode reward: 20.000, mean reward: 0.036 [0.000, 5.000]

 7620/10000: episode: 30, duration: 0.839s, episode steps: 151, steps per second: 180, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 0.940 [0.000, 2.000], mean observation: 12.500 [0.000, 47.000], loss: 0.099431, mean_absolute_error: 5.778156, mean_q: 8.661280
 7771/10000: episode: 31, duration: 1.003s, episode steps: 151, steps per second: 151, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 1.026 [0.000, 2.000], mean observation: 12.709 [2.000, 47.000], loss: 0.099584, mean_absolute_error: 5.764544, mean_q: 8.633820
 7924/10000: episode: 32, duration: 0.724s, episode steps: 153, steps per second: 211, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 0.876 [0.000, 2.000], mean observation: 11.891 [0.000, 47.000], loss: 0.096048, mean_absolute_error: 5.745294, mean_q: 8.614525
 8213/10000: episode: 33, duration: 1.584s, episode steps: 289, steps per second: 182, episode reward: 10.000, mean reward: 0.035 [0.000, 5.00

<keras.callbacks.History at 0x7f59802d95f8>