# Space Invaders

## Dependencies

In [1]:
!pip install tensorflow==2.9.1 gym keras-rl2 gym[atari]

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow==2.9.1
  Downloading tensorflow-2.9.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (511.7 MB)
[K     |████████████████████████████████| 511.7 MB 6.5 kB/s 
Collecting keras-rl2
  Downloading keras_rl2-1.0.5-py3-none-any.whl (52 kB)
[K     |████████████████████████████████| 52 kB 849 kB/s 
[?25hCollecting gast<=0.4.0,>=0.2.1
  Downloading gast-0.4.0-py3-none-any.whl (9.8 kB)
Collecting flatbuffers<2,>=1.12
  Downloading flatbuffers-1.12-py2.py3-none-any.whl (15 kB)
Collecting keras<2.10.0,>=2.9.0rc0
  Downloading keras-2.9.0-py2.py3-none-any.whl (1.6 MB)
[K     |████████████████████████████████| 1.6 MB 35.7 MB/s 
[?25hCollecting tensorboard<2.10,>=2.9
  Downloading tensorboard-2.9.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 45.7 MB/s 
Collecting tensorflow-estimator<2.10.0,>=2.9.0rc0
  Downloading tensorflow_estimator

ROM instructions: https://github.com/openai/atari-py#roms

In [2]:
!python -m atari_py.import_roms roms

copying space_invaders.bin from roms/Space Invaders.bin to /usr/local/lib/python3.7/dist-packages/atari_py/atari_roms/space_invaders.bin


## Exploration and baseline

In [3]:
import gym
import random
import numpy as np

In [4]:
env = gym.make("SpaceInvaders-v4")
print(env.observation_space.shape)

(210, 160, 3)


In [5]:
env.unwrapped.get_action_meanings()

['NOOP', 'FIRE', 'RIGHT', 'LEFT', 'RIGHTFIRE', 'LEFTFIRE']

In [6]:
EPISODES = 100
scores = []
for episode in range(1, EPISODES + 1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        # env.render()
        action = random.choice(range(env.action_space.n))
        n_state, reward, done, info = env.step(action)
        score += reward
    
    scores.append(score)
    print(f"Episode {episode}: Reward == {score}")

avg = np.mean(scores)
print(f"Average reward: {avg}")
env.close()

Episode 1: Reward == 260.0
Episode 2: Reward == 210.0
Episode 3: Reward == 200.0
Episode 4: Reward == 230.0
Episode 5: Reward == 180.0
Episode 6: Reward == 210.0
Episode 7: Reward == 165.0
Episode 8: Reward == 55.0
Episode 9: Reward == 45.0
Episode 10: Reward == 340.0
Episode 11: Reward == 155.0
Episode 12: Reward == 80.0
Episode 13: Reward == 285.0
Episode 14: Reward == 60.0
Episode 15: Reward == 5.0
Episode 16: Reward == 110.0
Episode 17: Reward == 180.0
Episode 18: Reward == 210.0
Episode 19: Reward == 120.0
Episode 20: Reward == 170.0
Episode 21: Reward == 180.0
Episode 22: Reward == 135.0
Episode 23: Reward == 120.0
Episode 24: Reward == 75.0
Episode 25: Reward == 225.0
Episode 26: Reward == 105.0
Episode 27: Reward == 155.0
Episode 28: Reward == 395.0
Episode 29: Reward == 485.0
Episode 30: Reward == 385.0
Episode 31: Reward == 80.0
Episode 32: Reward == 15.0
Episode 33: Reward == 75.0
Episode 34: Reward == 155.0
Episode 35: Reward == 50.0
Episode 36: Reward == 110.0
Episode 37: 

So the baseline is around 150.

## Model

In [7]:
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D, Resizing, Rescaling, Reshape
from tensorflow.python.keras.layers.core import Lambda
from tensorflow.keras.optimizers import Adam
from tensorflow.image import rgb_to_grayscale
from tensorflow.keras.layers import Layer
from tensorflow.keras.utils import register_keras_serializable

In [8]:
@register_keras_serializable("atari")
class GrayscaleLayer(Layer):
  def call(self, input):
    return rgb_to_grayscale(input)


In [39]:
def build_model(window_size, height, width, channels, actions):
    model = Sequential()
    model.add(Input(shape=(window_size, height, width, channels)))
    model.add(Reshape((window_size * height, width, channels)))
    model.add(GrayscaleLayer(name="grayscale"))
    model.add(Resizing((window_size * height) // 2, width // 2))
    model.add(Rescaling(1./255)) # normalize to [0, 1]
    model.add(Reshape((window_size, height // 2, width // 2, 1)))
    model.add(Convolution2D(32, (8,8), strides=(4,4), activation='relu'))
    model.add(Convolution2D(64, (4,4), strides=(2,2), activation='relu'))
    model.add(Convolution2D(64, (3,3), activation='relu'))
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    # model.add(Dense(256, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [40]:
WINDOW_SIZE = 4
height, width, channels = env.observation_space.shape
actions = env.action_space.n

In [41]:
model = build_model(WINDOW_SIZE, height, width, channels, actions)

In [42]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 reshape_4 (Reshape)         (None, 840, 160, 3)       0         
                                                                 
 grayscale (GrayscaleLayer)  (None, 840, 160, 1)       0         
                                                                 
 resizing_6 (Resizing)       (None, 420, 80, 1)        0         
                                                                 
 rescaling_5 (Rescaling)     (None, 420, 80, 1)        0         
                                                                 
 reshape_5 (Reshape)         (None, 4, 105, 80, 1)     0         
                                                                 
 conv2d_12 (Conv2D)          (None, 4, 25, 19, 32)     2080      
                                                                 
 conv2d_13 (Conv2D)          (None, 4, 11, 8, 64)     

## Agent

In [43]:
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy

In [44]:
def build_agent(model, actions):
    policy = LinearAnnealedPolicy(
        EpsGreedyQPolicy(), 
        attr='eps', 
        value_max=1.0, 
        value_min=0.1, 
        value_test=0.2, 
        nb_steps=10000
    )
    memory = SequentialMemory(
        limit=1000, 
        window_length=4
    )
    dqn = DQNAgent(
        model=model, 
        memory=memory, 
        policy=policy,
        enable_dueling_network=True, 
        dueling_type='avg', 
        nb_actions=actions, 
        nb_steps_warmup=1000
    )
    return dqn

In [45]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-4))

  super(Adam, self).__init__(name, **kwargs)


## Train

In [46]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)

Training for 10000 steps ...


  updates=self.state_updates,


done, took 8.612 seconds


<keras.callbacks.History at 0x7f3df8c0eb10>

In [None]:
scores = dqn.test(env, nb_episodes=20, visualize=False)
np.mean(scores.history["episode_reward"])