## Learn to play at Breakout 

### Requirements

- In the repo root directory, do `pipenv install --dev` 
- Or, install the needed packages:

      pip install keras-rl gym_breakout_pygame keras tensorflow-cpu


In [16]:
import numpy as np
import gym
from gym.wrappers import Monitor
from gym_breakout_pygame.wrappers.observation_space import BreakoutN

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory


In [17]:
env = BreakoutN(encode=False)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_3 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_7 (Dense)              (None, 64)                320       
_________________________________________________________________
activation_7 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 64)                4160      
_________________________________________________________________
activation_8 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_9 (Dense)              (None, 3)                 195       
_________________________________________________________________
activation_9 (Activation)    (None, 3)                 0         
Total para

In [18]:
# Configure and compile the RL agent
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# learn
dqn.fit(env, nb_steps=30000, visualize=False, verbose=2)

# save 
dqn.save_weights('dqn_{}_weights.h5f'.format("breakout-n"), overwrite=True)


Training for 30000 steps ...
   151/30000: episode: 1, duration: 1.470s, episode steps: 151, steps per second: 103, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 0.325 [0.000, 1.000], mean observation: 12.275 [0.000, 47.000], loss: 0.479828, mean_absolute_error: 2.318425, mean_q: 1.703398
   172/30000: episode: 2, duration: 0.116s, episode steps: 21, steps per second: 180, episode reward: 0.000, mean reward: 0.000 [0.000, 0.000], mean action: 0.476 [0.000, 1.000], mean observation: 15.274 [1.000, 47.000], loss: 0.487880, mean_absolute_error: 2.015612, mean_q: 0.596119
   193/30000: episode: 3, duration: 0.116s, episode steps: 21, steps per second: 181, episode reward: 0.000, mean reward: 0.000 [0.000, 0.000], mean action: 0.857 [0.000, 1.000], mean observation: 15.024 [0.000, 47.000], loss: 0.435630, mean_absolute_error: 2.082524, mean_q: 0.867428
   214/30000: episode: 4, duration: 0.112s, episode steps: 21, steps per second: 187, episode reward: 0.000, mean r



In [19]:

# Evaluate for 5 episodes.
dqn.test(Monitor(env, ".", force=True), nb_episodes=5, visualize=False)

Testing for 5 episodes ...
Episode 1: reward: 45.000, steps: 1819
Episode 2: reward: 45.000, steps: 1819
Episode 3: reward: 45.000, steps: 1819
Episode 4: reward: 45.000, steps: 1819
Episode 5: reward: 45.000, steps: 1819




<keras.callbacks.History at 0x7fa6382a82e8>


Now check the `examples/` folder, you should be able to see the recordings of the learned policy.
