## Learn to play at Breakout 

### Requirements

- In the repo root directory, do `pipenv install --dev` 
- Or, install the needed packages:

      pip install keras-rl gym_breakout_pygame keras tensorflow-cpu


In [3]:
from gym.wrappers import Monitor
from gym_breakout_pygame.wrappers.normal_space import BreakoutNMultiDiscrete
from keras.layers import Dense, Activation, Flatten
from keras.models import Sequential
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy
import numpy as np


pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


Using TensorFlow backend.


In [5]:
env = BreakoutNMultiDiscrete()
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                320       
_________________________________________________________________
activation_4 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 64)                4160      
_________________________________________________________________
activation_5 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 195       
_________________________________________________________________
activation_6 (Activation)    (None, 3)                 0         
Total para

In [9]:
# Configure and compile the RL agent
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# learn
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

# save 
dqn.save_weights('dqn_{}_weights.h5f'.format("breakout-n"), overwrite=True)


Training for 40000 steps ...
   151/40000: episode: 1, duration: 1.333s, episode steps: 151, steps per second: 113, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 1.013 [0.000, 2.000], mean observation: 12.293 [0.000, 47.000], loss: 0.028942, mean_absolute_error: 6.766470, mean_q: 10.180932
   570/40000: episode: 2, duration: 1.926s, episode steps: 419, steps per second: 218, episode reward: 15.000, mean reward: 0.036 [0.000, 5.000], mean action: 1.086 [0.000, 2.000], mean observation: 11.298 [0.000, 47.000], loss: 0.070822, mean_absolute_error: 6.393366, mean_q: 9.582656
   851/40000: episode: 3, duration: 1.333s, episode steps: 281, steps per second: 211, episode reward: 10.000, mean reward: 0.036 [0.000, 5.000], mean action: 0.961 [0.000, 2.000], mean observation: 10.996 [0.000, 47.000], loss: 0.082614, mean_absolute_error: 6.643369, mean_q: 9.987751
  1002/40000: episode: 4, duration: 0.697s, episode steps: 151, steps per second: 217, episode reward: 5.000, 



In [10]:

# Evaluate for 5 episodes.
dqn.test(Monitor(env, ".", force=True), nb_episodes=5, visualize=False)

Testing for 5 episodes ...




error: display Surface quit


Now check the `examples/` folder, you should be able to see the recordings of the learned policy.
