In [10]:
import numpy as np
import gymnasium as gym
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
from rl.agents.dqn import DQNAgent 
from rl.policy import BoltzmannQPolicy
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory


ENV_NAME = 'LunarLander-v2' 
env = gym.make(ENV_NAME) 
env = gym.wrappers.Monitor(env, 'recordings12') 
np.random.seed()
env.seed() 
nb_actions = env.action_space.n 
# Next, we build a very simple model. 
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) 
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(64)) 
model.add(Activation('relu'))
model.add(Dense(32)) 
model.add(Activation('relu')) 
model.add(Dense(nb_actions)) 
model.add(Activation('linear')) 
print(model.summary())
# configure and compile our agent. 
memory = SequentialMemory(limit=1000000, window_length=1) 
policy = EpsGreedyQPolicy() 
earlystop = EarlyStopping(monitor = 'episode_reward', min_delta=.1, patience=5, verbose=1, mode='auto') 
callbacks = [earlystop] 
nb_steps_warmup = 1000 
target_model_update = .2 
gamma = .99 
lr = .0001 
training_steps = 4000000 
epochs = training_steps/1000 
decay = float(lr/epochs) 
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=nb_steps_warmup, target_model_update = target_model_update, policy=policy, gamma = gamma)
dqn.compile(Adam(lr=lr), metrics=['mae']) 
# Train model 
dqn.fit(env, nb_steps=training_steps, visualize=False, verbose=1) 
# After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

ModuleNotFoundError: No module named 'keras.utils.generic_utils'