# DQN with Atari Space Invaders

In [1]:
# Bit of formatting because inline code is not styled very good by default:
from IPython.core.display import HTML
HTML("""<style> .rendered_html code { 
    padding: 2px 4px;
    color: #c7254e;
    background-color: #f9f2f4;
    border-radius: 4px;
} </style>""")

In [2]:
# Get necessary libraries
import tensorflow as tf   
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np           
import retro                 

from skimage import transform 
from skimage.color import rgb2gray 

import matplotlib.pyplot as plt 

from collections import deque

import random

import warnings
warnings.filterwarnings('ignore')

## Step 1: Create Environment from OpenAI Retro
Note: Need to download <a href="http://www.atarimania.com/rom_collection_archive_atari_2600_roms.html">Atari ROM</a> before initializing gym environment



Command Line to Import ROM: python -m retro.import ./path/to/your/ROMs/directory/

In [3]:
env = retro.make(game='SpaceInvaders-Atari2600')
env.reset()                    

print("Action space: ", env.action_space.n)
print("Observation space: ", env.observation_space)

# Here we create an hot encoded version of our actions
possible_actions = np.array(np.identity(env.action_space.n,dtype=int).tolist())

Action space:  8
Observation space:  Box(210, 160, 3)


## Step 2: Define Preprocessing Functions
- RGB to Grayscale
- Crop Frame
- Normalize Pixel Values
- Resize Frame

In [4]:
def preprocess_frame(frame):
    # Greyscale frame 
    gray = rgb2gray(frame)
    
    # Crop the screen (remove the part below the player)
    # [Up: Down, Left: right]
    cropped_frame = gray[8:-12,4:-12]
    
    # Normalize Pixel Values
    normalized_frame = cropped_frame/255.0
    
    # Resize
    preprocessed_frame = transform.resize(normalized_frame, [110,84])
    
    return preprocessed_frame # 110x84x1 frame

## Step 3: Stack Frames to Give a Sense of Motion

In [5]:
stack_size = 4 

# Initialize deque with zero-images one array for each image
stacked_frames  =  deque([np.zeros((110,84), dtype=np.int) for i in range(stack_size)], maxlen=4)

def stack_frames(stacked_frames, state, is_new_episode):
    # Preprocess frame
    frame = preprocess_frame(state)
    
    if is_new_episode:
        # Clear our stacked_frames
        stacked_frames = deque([np.zeros((110,84), dtype=np.int) for i in range(stack_size)], maxlen=4)
        
        # Because we're in a new episode, copy the same frame 4x
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        stacked_frames.append(frame)
        
        # Stack the frames
        stacked_state = np.stack(stacked_frames, axis=2)
        
    else:
        # Append frame to deque, automatically removes the oldest frame
        stacked_frames.append(frame)

        # Build the stacked state (first dimension specifies different frames)
        stacked_state = np.stack(stacked_frames, axis=2) 
    
    return stacked_state, stacked_frames

## Step 4: Set Hyperparameters

In [6]:
# MODEL HYPERPARAMETERS
state_size = [110, 84, 4]      
action_size = env.action_space.n 
learning_rate =  0.001

# TRAINING HYPERPARAMETERS
total_episodes = 50          
max_steps = 50000              
batch_size = 64               

# Exploration parameters for epsilon greedy strategy
explore_start = 1.0            
explore_stop = 0.01             
decay_rate = 0.001

# Q learning hyperparameters
gamma = 0.9                  # Discounting rate

# MEMORY HYPERPARAMETERS
pretrain_length = batch_size # Number of experiences stored in the Memory when initialized for the first time
memory_size = 1000000        # Number of experiences the Memory can keep

# PREPROCESSING HYPERPARAMETERS
stack_size = 4               # Number of frames stacked

# MODIFY THIS TO FALSE IF YOU JUST WANT TO SEE THE TRAINED AGENT
training = False

# TURN THIS TO TRUE IF YOU WANT TO RENDER THE ENVIRONMENT
episode_render = False

## Step 5: Create Deep Q-learning Model
1. Stack 4 frames as input
2. Add 3 Convolutional Layers
3. Flatten Layer
4. Add 2 Fully Connected Layers
5. Outputs a Q Value for each Action

In [7]:
# Define Model
inputs = keras.Input(shape=(84,84,4), dtype="float32", name="inputs")
        
conv1 = layers.Conv2D(32, 8, strides=(4,4), activation='elu')(inputs)
conv2 = layers.Conv2D(64, 4, strides=(2,2), activation='elu')(conv1)
conv3 = layers.Conv2D(64, 2, strides=(2,2), activation='elu')(conv2)
        
flat = layers.Flatten()(conv3)
        
fc = layers.Dense(512, activation='elu')(flat)
out = layers.Dense(action_size)(fc)
        
model = keras.Model(inputs=inputs, outputs=out) 
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          [(None, 84, 84, 4)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 20, 20, 32)        8224      
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 9, 9, 64)          32832     
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 4, 4, 64)          16448     
_________________________________________________________________
flatten (Flatten)            (None, 1024)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               524800    
_________________________________________________________________
dense_1 (Dense)              (None, 8)                 4104  

In [None]:
#actions = tf.Variable(tf.zeros(shape=(None, action_size)), name="actions")
            
# Remember that target_Q is the R(s,a) + ymax Qhat(s', a')
#targetQ = tf.Variable(tf.zeros(shape=(None)), name="target")
                     
# Q is our predicted Q value.
#Q = tf.math.reduce_mean(tf.math.multiply(out, actions))
            
# The loss is the difference between our predicted Q_values and the Q_target
# Sum(Qtarget - Q)^2
#loss = tf.math.reduce_mean(tf.math.square(target_Q - Q))
#optimizer = keras.optimizers.Adam(learning_rate)