# Space Invaders v01 Model Training

Install dependancies

In [None]:
!pip install -I gym==0.17.3
!pip install keras
!pip install keras-rl2
!pip install atari-py
!pip install autorom


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gym==0.17.3
  Downloading gym-0.17.3.tar.gz (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 5.5 MB/s 
[?25hCollecting scipy
  Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
[K     |████████████████████████████████| 38.1 MB 1.6 MB/s 
[?25hCollecting numpy>=1.10.4
  Downloading numpy-1.21.6-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (15.7 MB)
[K     |████████████████████████████████| 15.7 MB 34.6 MB/s 
[?25hCollecting pyglet<=1.5.0,>=1.4.0
  Downloading pyglet-1.5.0-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 45.5 MB/s 
[?25hCollecting cloudpickle<1.7.0,>=1.2.0
  Downloading cloudpickle-1.6.0-py3-none-any.whl (23 kB)
Collecting future
  Downloading future-0.18.2.tar.gz (829 kB)
[K     |████████████████████████████████| 829 kB 45.7 MB/s 
[?25hBuilding wheels for colle

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
[K     |████████████████████████████████| 52 kB 725 kB/s 
Installing collected packages: keras-rl2
Successfully installed keras-rl2-1.0.5
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting autorom
  Downloading AutoROM-0.4.2-py3-none-any.whl (16 kB)
Installing collected packages: autorom
Successfully installed autorom-0.4.2


Install Atari ROMS

In [None]:
!mkdir sample_data/ROMS
!AutoROM --install-dir sample_data/ROMS --accept-license
!python -m atari_py.import_roms sample_data/ROMS

AutoROM will download the Atari 2600 ROMs.
They will be installed to:
	/content/sample_data/ROMS

Existing ROMs will be overwritten.
Installed sample_data/ROMS/adventure.bin
Installed sample_data/ROMS/air_raid.bin
Installed sample_data/ROMS/alien.bin
Installed sample_data/ROMS/amidar.bin
Installed sample_data/ROMS/assault.bin
Installed sample_data/ROMS/asterix.bin
Installed sample_data/ROMS/asteroids.bin
Installed sample_data/ROMS/atlantis.bin
Installed sample_data/ROMS/atlantis2.bin
Installed sample_data/ROMS/backgammon.bin
Installed sample_data/ROMS/bank_heist.bin
Installed sample_data/ROMS/basic_math.bin
Installed sample_data/ROMS/battle_zone.bin
Installed sample_data/ROMS/beam_rider.bin
Installed sample_data/ROMS/berzerk.bin
Installed sample_data/ROMS/blackjack.bin
Installed sample_data/ROMS/bowling.bin
Installed sample_data/ROMS/boxing.bin
Installed sample_data/ROMS/breakout.bin
Installed sample_data/ROMS/carnival.bin
Installed sample_data/ROMS/casino.bin
Installed sample_data/ROM

In [None]:
import gym
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent  # pip install keras-rl2
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy




In [None]:
env = gym.make("SpaceInvaders-v0")


def build_model(height, width, channels, actions):
    model = Sequential()
    model.add(Conv2D(32, (8, 8), strides=(4, 4), activation='relu', input_shape=(3, height, width, channels)))
    model.add(Conv2D(64, (4, 4), strides=(2, 2), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model


height, width, channels = env.observation_space.shape
actions = env.action_space.n

model = build_model(height, width, channels, actions)


def build_agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1, value_min=1, value_test=2,
                                  nb_steps=10000)
    memory = SequentialMemory(limit=2000, window_length=3)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, enable_dueling_network=True, dueling_type='avg',
                   nb_actions=actions, nb_steps_warmup=1000)
    return dqn


dqn = build_agent(model, actions)

dqn.compile(Adam(lr=0.0001))

dqn.fit(env, nb_steps=20000, visualize=False, verbose=1)
dqn.save_weights('sample_data/SpaceInvaders-v0.h5f', overwrite=True)
print('Saved model to sample_data/SpaceInvaders-v0.h5f')




  super(Adam, self).__init__(name, **kwargs)


Training for 20000 steps ...
Interval 1 (0 steps performed)


  updates=self.state_updates,


12 episodes - episode_reward: 223.750 [95.000, 515.000] - loss: 2.869 - mean_q: 14.237 - mean_eps: 1.000 - ale.lives: 1.976

Interval 2 (10000 steps performed)

# New section

# New section