In [6]:
# Import necessay libraries
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

In [15]:
# Initializing an environment using a pre-defined environment from OpenAI Gym 
# The environment used here is 'FrozenLake-v0'
env = gym.make("FrozenLake-v0")

# Get the number of actions within the environment
nb_actions = env.action_space.n


In [16]:
# Define a Feed-Forward Neural Network 

# Initialize a keras sequential model
model = Sequential()

# Flatten the input to have an input shape of (1,) + shape of the environment state space
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))

# Add Dense layers with Relu activation
# The number of hidden layers and number of nodes in each layer is your choice
model.add(Dense(128))
model.add(Activation('relu'))

model.add(Dense(256))
model.add(Activation('relu'))

# Add an output layer with number of nodes as the number of actions
model.add(Dense(nb_actions))
model.add(Activation('linear'))


In [0]:
# Define the policy to sample the actions
# We will be using the Epsilon-Greedy algorithm
policy = EpsGreedyQPolicy()

# To store our data initialize Sequential Memory with limit=500000 and window_length of 1
memory = SequentialMemory(limit=500000, window_length=1)


### **DQN AGENT**

<img src="./images/dqn.png" alt="DQN Agent" style="width:700px">

In [25]:

# Initialize the DQNAgent with the neural network model, nb_actions as the number of actions in the environment, 
# set the memory as the sequential memory defined above, nb_steps_warmup as 100, policy as the epsilon greedy policy defined above
# and set the target_model_update as 1e-2
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100, target_model_update=1e-2, policy=policy)

# Compile the DQN with Adam optimizer with learning rate of 1e-3 and metric as mse
dqn.compile(Adam(lr=1e-3), metrics=['mse'])

# Fit the DQN by passing with environment with nb_steps as 5000
# You have an option to visualize the output, which is done by implicitly calling the render function of the environment
# However, this will slow down the training process and is not recommended for EdStem
# To see the complete training details, set verbose as 2
dqn.fit(env, nb_steps=5000, visualize=False, verbose=0);




In [26]:
# Test your model by passing the environment and running for 10 episodes
dqn.test(env, nb_episodes=10, visualize=False)


Testing for 10 episodes ...
Episode 1: reward: 0.000, steps: 10
Episode 2: reward: 0.000, steps: 59
Episode 3: reward: 0.000, steps: 15
Episode 4: reward: 0.000, steps: 15
Episode 5: reward: 0.000, steps: 17
Episode 6: reward: 0.000, steps: 8
Episode 7: reward: 1.000, steps: 19
Episode 8: reward: 0.000, steps: 61
Episode 9: reward: 0.000, steps: 24
Episode 10: reward: 1.000, steps: 26


<tensorflow.python.keras.callbacks.History at 0x7f4903b70250>