<a href="https://colab.research.google.com/github/wisrovi/RedesNeuronales/blob/master/AprendizajePorRefuerzo-Breakout.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install tensorflow==1.13.1
!pip install keras-rl==0.4.2
!pip install processor
#!pip install keras=2.0.0

In [0]:
from __future__ import division
from PIL import Image

import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute
from keras.optimizers import Adam

import keras.backend as K

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy, LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.core import Processor

from rl.callbacks import FileLogger, ModelIntervalCheckpoint

In [0]:
INPUT_SHAPE = (84, 84)
WINDOW_LENGTH = 4

In [0]:
# In this example, we need to preprocess the observations
class AtariProcessor(Processor):
    def process_observation(self, observation):
        assert observation.ndim == 3  # (height, width, channel)
        img = Image.fromarray(observation)
        img = img.resize(INPUT_SHAPE).convert('L')
        processed_observation = np.array(img)
        assert processed_observation.shape == INPUT_SHAPE
        return processed_observation.astype('uint8')

    def process_state_batch(self, batch):
        processed_batch = batch.astype('float32') / 255.
        return processed_batch

    def process_reward(self, reward):
        return np.clip(reward, -1., 1.)

In [0]:
ENV_NAME = 'PongDeterministic-v0'
ENV_NAME = 'CartPole-v0'
ENV_NAME = 'BreakoutDeterministic-v4'

In [21]:
# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
nb_actions = env.action_space.n

# random seed
np.random.seed(123)
env.seed(123)

[123, 151010689]

In [0]:
input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE

In [25]:
# Next, we build our model. We use the same model that was described by Mnih et al. (2015).
def createModel():
  model = Sequential()

  if K.common.image_dim_ordering() == 'tf':
      # (width, height, channels)
      model.add(Permute((2, 3, 1), input_shape=input_shape))
  elif K.common.image_dim_ordering() == 'th':
      # (channels, width, height)
      model.add(Permute((1, 2, 3), input_shape=input_shape))
  else:
      raise RuntimeError('Unknown image_dim_ordering.')

  model.add(Convolution2D(32, (8, 8), strides=(4, 4)))
  model.add(Activation('relu'))
  model.add(Convolution2D(64, (4, 4), strides=(2, 2)))
  model.add(Activation('relu'))
  model.add(Convolution2D(64, (3, 3), strides=(1, 1)))
  model.add(Activation('relu'))
  model.add(Flatten())
  model.add(Dense(512))
  model.add(Activation('relu'))
  model.add(Dense(nb_actions))
  model.add(Activation('linear'))
  return model
  
model = createModel()
print(model.summary())



Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
permute_1 (Permute)          (None, 84, 84, 4)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 20, 20, 32)        8224      
_________________________________________________________________
activation_1 (Activation)    (None, 20, 20, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 64)          32832     
_________________________________________________________________
activation_2 (Activation)    (None, 9, 9, 64)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 64)          36928     
_________________________________________________________________
activation_3 (Activation)    (None, 7, 7, 64)       

In [0]:
# Let's define the memory for storing the experience
memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH)
processor = AtariProcessor()

In [0]:
# Define the policy that our agent will follow
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05,
                              nb_steps=1000000)

In [30]:
# Define the agent
dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory,
               processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000,
               train_interval=20)
dqn.compile(Adam(lr=.00025), metrics=['mae'])

TypeError: ignored

In [32]:
# Training part
weights_filename = 'dqn_{}_weights.h5f'.format(ENV_NAME)
checkpoint_weights_filename = 'dqn_' + ENV_NAME + '_weights_{step}.h5f'
log_filename = 'dqn_{}_log.json'.format(ENV_NAME)
callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=250000)]
callbacks += [FileLogger(log_filename, interval=100)]

dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000, visualize=True)

dqn.save_weights(weights_filename, overwrite=True)

NameError: ignored

In [0]:
# Finally, evaluate our algorithm for 5 episodes.
# Testing part
weights_filename = 'dqn_{}_weights.h5f'.format(env_name)
dqn.load_weights(weights_filename)
dqn.test(env, nb_episodes=10, visualize=True)

Testing for 5 episodes ...


NameError: ignored