# DQN with Images

**imports**

In [7]:
import numpy as np
from PIL import Image

import gym

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution2D, Permute
from tensorflow.keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

  and should_run_async(code)


**setup env**

In [2]:
env = gym.make("BreakoutDeterministic-v4")

nb_actions = env.action_space.n
nb_actions

  and should_run_async(code)
  deprecation(
  deprecation(


4

**const**

In [6]:
IMG_SHAPE = (84, 84)
WINDOW_LENGTH = 4
INPUT_SHAPE = (WINDOW_LENGTH, IMG_SHAPE[0], IMG_SHAPE[1])

  and should_run_async(code)


**image preprocessing**

In [5]:
class ImageProcessor(Processor):

    def process_observation(self, observation):
        img = Image.fromarray(observation)
        img = img.resize(IMG_SHAPE)
        img = img.convert("L")
        img = np.array(img)
        return img.astype('uint8')

    def process_state_batch(self, batch):
        processed_batch = batch.astype('float32') / 255.0
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1.0, 1.0)

  and should_run_async(code)


**create CNN**

In [9]:
# Con2D <--- (Batch, 84, 84, 4)
model = Sequential()

# reshape image -> (2,3,1) means: grap 2 then 3 and then 1 index from input_shape
model.add(Permute((2,3,1), input_shape=INPUT_SHAPE))

model.add(Convolution2D(filters=32, kernel_size=(8,8), strides=(4,4), kernel_initializer='he_normal'))
model.add(Activation('relu'))

model.add(Convolution2D(filters=64, kernel_size=(4,4), strides=(2,2), kernel_initializer='he_normal'))
model.add(Activation('relu'))

model.add(Convolution2D(filters=64, kernel_size=(3,3), strides=(1,1), kernel_initializer='he_normal'))
model.add(Activation('relu'))

model.add(Flatten())

model.add(Dense(512))
model.add(Activation('relu'))

model.add(Dense(nb_actions))
model.add(Activation('linear'))

  and should_run_async(code)


In [10]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_1 (Permute)          (None, 84, 84, 4)         0         
_________________________________________________________________
conv2d (Conv2D)              (None, 20, 20, 32)        8224      
_________________________________________________________________
activation (Activation)      (None, 20, 20, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 9, 9, 64)          32832     
_________________________________________________________________
activation_1 (Activation)    (None, 9, 9, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 64)          36928     
_________________________________________________________________
activation_2 (Activation)    (None, 7, 7, 64)         

**create DQN-Agent**

In [11]:
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)

  and should_run_async(code)


In [12]:
processor = ImageProcessor()

  and should_run_async(code)


In [13]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), 
                                attr='eps',
                                value_max=1.0,
                                value_min=0.1,
                                value_test=0.05,
                                nb_steps=1000000)

  and should_run_async(code)


In [14]:
dqn = DQNAgent(model=model, policy=policy, memory=memory,
                nb_actions=nb_actions, processor=processor, nb_steps_warmup=50000, 
                gamma=0.99, target_model_update=10000, train_interval=4,
                delta_clip=1)

  and should_run_async(code)


In [15]:
dqn.compile(Adam(lr=0.00025), metrics=['mae'])

  and should_run_async(code)


In [16]:
weights_filename = "DQN_BO.h5f"
checkpoint_filename = "DQN_CHECKPOINT.h5f"

checkpoint_callback = ModelIntervalCheckpoint(checkpoint_filename, interval=100000)

  and should_run_async(code)


In [None]:
#dqn.fit(env, nb_steps=1000, callbacks=[checkpoint_callback], log_interval=500, visualize=False)

**laoding startpoint**

In [18]:
# if loading + training -> eps to 0.2 or some like that
model.load_weights("../COURSE_NOTEBOOKS/09-Deep-Q-Learning-On-Images/weights/dqn_BreakoutDeterministic-v4_weights_900000.h5f")

  and should_run_async(code)


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1b616c1d130>

In [19]:
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), 
                                attr='eps',
                                value_max=0.2,
                                value_min=0.1,
                                value_test=0.05,
                                nb_steps=1000000)

dqn = DQNAgent(model=model, policy=policy, memory=memory,
                nb_actions=nb_actions, processor=processor, nb_steps_warmup=50000, 
                gamma=0.99, target_model_update=10000, train_interval=4,
                delta_clip=1)

dqn.compile(Adam(lr=0.00025), metrics=['mae'])



In [None]:
dqn.fit(env, nb_steps=1000, callbacks=[checkpoint_callback], log_interval=500, visualize=False)

**use agent**

In [22]:
env = gym.make("BreakoutDeterministic-v4", render_mode='human')
dqn.test(env, nb_episodes=1, visualize=True)
env.close()

Testing for 1 episodes ...


Error: render(mode='human') is deprecated. Please supply `render_mode` when constructing your environment, e.g., gym.make(ID, render_mode='human'). The new `render_mode` keyword argument supports DPI scaling, audio, and native framerates.

In [23]:
model.load_weights("../COURSE_NOTEBOOKS/09-Deep-Q-Learning-On-Images/weights/dqn_BreakoutDeterministic-v4_weights_1200000.h5f")

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1b616c71700>

In [24]:
policy = EpsGreedyQPolicy(0.05)

  and should_run_async(code)


In [25]:
dqn = DQNAgent(model=model,
                nb_actions=nb_actions,
                memory=memory,
                processor=processor)

dqn.compile(Adam(lr=0.00025), metrics=['mae'])

  and should_run_async(code)


: 

In [None]:
env = gym.make("BreakoutDeterministic-v4", render_mode='human')
dqn.test(env, nb_episodes=1, visualize=True)
env.close()