In [None]:
import gym
import random
import numpy as np

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [None]:
ymparisto = gym.make('CartPole-v0')
tilat = ymparisto.observation_space.shape[0]
liikkeet = ymparisto.action_space.n

In [None]:
def mallin_rakennus(tilat, liikkeet):
    malli = Sequential()
    malli.add(Flatten(input_shape=(1,tilat)))
    malli.add(Dense(24, activation='relu'))
    malli.add(Dense(24, activation='relu'))
    malli.add(Dense(liikkeet, activation ='linear'))
    return malli
    

In [None]:
malli = mallin_rakennus(tilat, liikkeet)
malli.summary()

In [None]:
def agentin_rakennus(malli, liikkeet):
    kaytanto = BoltzmannQPolicy()
    muisti = SequentialMemory(limit=60000, window_length=1)
    dqn = DQNAgent(model=malli, memory=muisti, policy=kaytanto,
                   nb_actions=liikkeet, nb_steps_warmup=20, target_model_update=1e-2)
    return dqn

In [None]:
dqn = agentin_rakennus(malli, liikkeet)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(ymparisto, nb_steps=60000, visualize=False, verbose=1)

In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [None]:
ymparisto2 = gym.make('CartPole-v0')
liikkeet = ymparisto2.action_space.n
tilat = ymparisto2.observation_space.shape[0]
malli = mallin_rakennus(tilat, liikkeet)
dqn = agentin_rakennus(malli, liikkeet)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [None]:
dqn.load_weights('dqn_weights.h5f')

In [None]:
testi = dqn.test(ymparisto2, nb_episodes=10, visualize=True)