In [10]:
import gym
import random
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [11]:
ymparisto = gym.make('CartPole-v0')
tilat = ymparisto.observation_space.shape[0]
liikkeet = ymparisto.action_space.n

In [12]:
def mallin_rakennus(tilat, liikkeet):
    malli = Sequential()
    malli.add(Flatten(input_shape=(1,tilat)))
    malli.add(Dense(24, activation='relu'))
    malli.add(Dense(24, activation='relu'))
    malli.add(Dense(liikkeet, activation ='linear'))
    return malli
    

In [13]:
malli = mallin_rakennus(states, actions)
malli.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_6 (Dense)              (None, 24)                120       
_________________________________________________________________
dense_7 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_8 (Dense)              (None, 2)                 50        
Total params: 770
Trainable params: 770
Non-trainable params: 0
_________________________________________________________________


In [14]:
def agentin_rakennus(malli, liikkeet):
    kaytanto = BoltzmannQPolicy()
    muisti = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=malli, memory=muisti, policy=kaytanto,
                   nb_actions=liikkeet, nb_steps_warmup=20, target_model_update=1e-2)
    return dqn

In [15]:
dqn = agentin_rakennus(malli, liikkeet)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(ymparisto, nb_steps=50000, visualize=False, verbose=2)

Training for 1000 steps ...
  17/1000: episode: 1, duration: 0.047s, episode steps:  17, steps per second: 359, episode reward: 17.000, mean reward:  1.000 [ 1.000,  1.000], mean action: 0.647 [0.000, 1.000],  loss: --, mae: --, mean_q: --
  35/1000: episode: 2, duration: 0.416s, episode steps:  18, steps per second:  43, episode reward: 18.000, mean reward:  1.000 [ 1.000,  1.000], mean action: 0.611 [0.000, 1.000],  loss: 0.562151, mae: 0.574139, mean_q: 0.092179
  75/1000: episode: 3, duration: 0.135s, episode steps:  40, steps per second: 296, episode reward: 40.000, mean reward:  1.000 [ 1.000,  1.000], mean action: 0.425 [0.000, 1.000],  loss: 0.342263, mae: 0.548839, mean_q: 0.314357
  94/1000: episode: 4, duration: 0.066s, episode steps:  19, steps per second: 288, episode reward: 19.000, mean reward:  1.000 [ 1.000,  1.000], mean action: 0.474 [0.000, 1.000],  loss: 0.145960, mae: 0.602300, mean_q: 0.727255
 112/1000: episode: 5, duration: 0.062s, episode steps:  18, steps per

<tensorflow.python.keras.callbacks.History at 0x208de442630>

In [16]:
ymparisto = gym.make('CartPole-v0')
liikkeet = ymparisto.action_space.n
tilat = ymparisto.observation_space.shape[0]
malli = mallin_rakennus(tilat, liikkeet)
dqn = agentin_rakennus(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [17]:
dqn.load_weights('dqn_weights.h5f')

In [18]:
testi = dqn.test(env, nb_episodes=10, visualize=True)

Testing for 10 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200


KeyboardInterrupt: 