In [8]:
import gym
import random
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [9]:
ymparisto = gym.make('CartPole-v0')
tilat = ymparisto.observation_space.shape[0]
liikkeet = ymparisto.action_space.n

In [10]:
def mallin_rakennus(tilat, liikkeet):
    malli = Sequential()
    malli.add(Flatten(input_shape=(1,tilat)))
    malli.add(Dense(24, activation='relu'))
    malli.add(Dense(24, activation='relu'))
    malli.add(Dense(liikkeet, activation ='linear'))
    return malli
    

In [11]:
malli = mallin_rakennus(tilat, liikkeet)
malli.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_3 (Dense)              (None, 24)                120       
_________________________________________________________________
dense_4 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_5 (Dense)              (None, 2)                 50        
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


In [12]:
def agentin_rakennus(malli, liikkeet):
    kaytanto = BoltzmannQPolicy()
    muisti = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=malli, memory=muisti, policy=kaytanto,
                   nb_actions=liikkeet, nb_steps_warmup=20, target_model_update=1e-2)
    return dqn

In [13]:
dqn = agentin_rakennus(malli, liikkeet)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(ymparisto, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
110 episodes - episode_reward: 90.409 [11.000, 200.000] - loss: 3.207 - mae: 19.216 - mean_q: 38.966

Interval 2 (10000 steps performed)
55 episodes - episode_reward: 179.982 [150.000, 200.000] - loss: 4.534 - mae: 37.533 - mean_q: 75.768

Interval 3 (20000 steps performed)
58 episodes - episode_reward: 174.672 [125.000, 200.000] - loss: 4.203 - mae: 39.288 - mean_q: 78.997

Interval 4 (30000 steps performed)
54 episodes - episode_reward: 185.056 [131.000, 200.000] - loss: 3.310 - mae: 37.348 - mean_q: 75.120

Interval 5 (40000 steps performed)
done, took 206.235 seconds


<tensorflow.python.keras.callbacks.History at 0x1fc73d16cf8>

In [14]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [15]:
ymparisto2 = gym.make('CartPole-v0')
liikkeet = ymparisto2.action_space.n
tilat = ymparisto2.observation_space.shape[0]
malli = mallin_rakennus(tilat, liikkeet)
dqn = agentin_rakennus(malli, liikkeet)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [16]:
dqn.load_weights('dqn_weights.h5f')

In [17]:
testi = dqn.test(ymparisto2, nb_episodes=10, visualize=True)

Testing for 10 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
