## Learn to play at Breakout 

### Requirements

- In the repo root directory, do `pipenv install --dev` 
- Or, install the needed packages:

      pip install keras-rl gym_breakout_pygame keras tensorflow-cpu


In [1]:
from gym.wrappers import Monitor
from gym_breakout_pygame.wrappers.normal_space import BreakoutNMultiDiscrete
from keras.layers import Dense, Activation, Flatten
from keras.models import Sequential
from keras.optimizers import Adam
from rl.agents.dqn import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import BoltzmannQPolicy
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
import numpy as np


pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
env = BreakoutNMultiDiscrete()
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
print(env.observation_space.shape)
window_length = 4
# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(window_length,) + env.observation_space.shape))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())


(5,)
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 5)                 0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                384       
_________________________________________________________________
activation_4 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_5 (Dense)              (None, 64)                4160      
_________________________________________________________________
activation_5 (Activation)    (None, 64)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 3)                 195       
_________________________________________________________________
activation_6 (Activation)    (None, 3)           

In [4]:
# Configure and compile the RL agent
nb_steps = 60000
memory = SequentialMemory(limit=50000, window_length=window_length)
# policy = BoltzmannQPolicy()
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.0, nb_steps=nb_steps * 0.4)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# learn
dqn.fit(env, nb_steps=nb_steps, visualize=False, verbose=2)

# save 
dqn.save_weights('dqn_{}_weights.h5f'.format("breakout-n"), overwrite=True)


Training for 50000 steps ...

   106/50000: episode: 1, duration: 1.571s, episode steps: 106, steps per second: 67, episode reward: 5.000, mean reward: 0.047 [0.000, 5.000], mean action: 1.208 [0.000, 2.000], mean observation: 8.772 [0.000, 47.000], loss: 0.310973, mae: 0.631189, mean_q: -0.047010
   313/50000: episode: 2, duration: 1.383s, episode steps: 207, steps per second: 150, episode reward: 10.000, mean reward: 0.048 [0.000, 5.000], mean action: 1.019 [0.000, 2.000], mean observation: 8.131 [0.000, 47.000], loss: 0.167612, mae: 0.392361, mean_q: 0.022715
   328/50000: episode: 3, duration: 0.101s, episode steps: 15, steps per second: 149, episode reward: 0.000, mean reward: 0.000 [0.000, 0.000], mean action: 1.067 [0.000, 2.000], mean observation: 11.307 [1.000, 47.000], loss: 0.101809, mae: 0.352385, mean_q: 0.123184
   343/50000: episode: 4, duration: 0.106s, episode steps: 15, steps per second: 142, episode reward: 0.000, mean reward: 0.000 [0.000, 0.000], mean action: 1.000



In [11]:

# Evaluate for 5 episodes.
dqn.test(Monitor(env, ".", force=True), nb_episodes=5, visualize=False)

Testing for 5 episodes ...


Exception ignored in: <function Monitor.__del__ at 0x7fc077ecb048>
Traceback (most recent call last):
  File "/home/marcofavorito/.virtualenvs/gym-breakout-pygame-7UQzWS9l/lib/python3.7/site-packages/gym/wrappers/monitor.py", line 229, in __del__
    self.close()
  File "/home/marcofavorito/.virtualenvs/gym-breakout-pygame-7UQzWS9l/lib/python3.7/site-packages/gym/wrappers/monitor.py", line 140, in close
    self._close_video_recorder()
  File "/home/marcofavorito/.virtualenvs/gym-breakout-pygame-7UQzWS9l/lib/python3.7/site-packages/gym/wrappers/monitor.py", line 212, in _close_video_recorder
    self.video_recorder.close()
  File "/home/marcofavorito/.virtualenvs/gym-breakout-pygame-7UQzWS9l/lib/python3.7/site-packages/gym/wrappers/monitoring/video_recorder.py", line 129, in close
    os.remove(self.path)
FileNotFoundError: [Errno 2] No such file or directory: '/home/marcofavorito/workfolder/gym-breakout-pygame/examples/openaigym.video.2.7936.video000000.mp4'
Exception ignored in: <fun

error: display Surface quit


Now check the `examples/` folder, you should be able to see the recordings of the learned policy.
