## Learn to play at Breakout 

### Requirements

- In the repo root directory, do `pipenv install --dev` 
- Or, install the needed packages:

      pip install keras-rl gym_breakout_pygame keras tensorflow-cpu


In [5]:
from gym.wrappers import Monitor
from gym_breakout_pygame.wrappers.normal_space import BreakoutNMultiDiscrete
from keras.layers import Dense, Activation, Flatten
from keras.models import Sequential
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy
import numpy as np


In [6]:
env = BreakoutNMultiDiscrete()
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                320       
_________________________________________________________________
activation_4 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 64)                4160      
_________________________________________________________________
activation_5 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 195       
_________________________________________________________________
activation_6 (Activation)    (None, 3)                

In [None]:
# Configure and compile the RL agent
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# learn
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)

# save 
dqn.save_weights('dqn_{}_weights.h5f'.format("breakout-n"), overwrite=True)


Training for 50000 steps ...
   151/50000: episode: 1, duration: 1.793s, episode steps: 151, steps per second: 84, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 1.073 [0.000, 2.000], mean observation: 12.233 [2.000, 47.000], loss: 0.127331, mae: 0.433560, mean_q: -0.177764
   172/50000: episode: 2, duration: 0.151s, episode steps: 21, steps per second: 139, episode reward: 0.000, mean reward: 0.000 [0.000, 0.000], mean action: 0.952 [0.000, 2.000], mean observation: 15.452 [2.000, 47.000], loss: 0.060645, mae: 0.358177, mean_q: -0.180976
   453/50000: episode: 3, duration: 1.962s, episode steps: 281, steps per second: 143, episode reward: 10.000, mean reward: 0.036 [0.000, 5.000], mean action: 1.078 [0.000, 2.000], mean observation: 11.617 [0.000, 47.000], loss: 0.054258, mae: 0.315772, mean_q: -0.111330
   604/50000: episode: 4, duration: 1.099s, episode steps: 151, steps per second: 137, episode reward: 5.000, mean reward: 0.033 [0.000, 5.000], mean action: 1



In [None]:

# Evaluate for 5 episodes.
dqn.test(Monitor(env, ".", force=True), nb_episodes=5, visualize=False)

In [None]:

Now check the `examples/` folder, you should be able to see the recordings of the learned policy.
