In [None]:
import gym
from tensorflow import keras
from tensorflow.keras.optimizers.legacy import Adam
from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory
import matplotlib.pyplot as plt
#from keras.metrics import AUC

import numpy as np

from env.env import KeibaEnv


# ゲーム環境を作成します
env = KeibaEnv(
    [
        "./../data/train/2003_train_binary.csv",
        "./../data/train/2004_train_binary.csv",
        "./../data/train/2005_train_binary.csv",
        "./../data/train/2006_train_binary.csv",
        "./../data/train/2007_train_binary.csv",
        "./../data/train/2008_train_binary.csv",
        "./../data/train/2009_train_binary.csv",
        "./../data/train/2010_train_binary.csv",
        "./../data/train/2011_train_binary.csv",
        "./../data/train/2012_train_binary.csv",
    ],
    [
        "./../data/train/2003_result_wide.csv",
        "./../data/train/2004_result_wide.csv",
        "./../data/train/2005_result_wide.csv",
        "./../data/train/2006_result_wide.csv",
        "./../data/train/2007_result_wide.csv",
        "./../data/train/2008_result_wide.csv",
        "./../data/train/2009_result_wide.csv",
        "./../data/train/2010_result_wide.csv",
        "./../data/train/2011_result_wide.csv",
        "./../data/train/2012_result_wide.csv",
    ]
)

# ゲーム環境を初期化します。
observation = env.reset()

# 環境からアクション数を取得します。このゲームでは4となります。
nb_actions = env.action_space.n

try:
    model = keras.models.load_model('./../model/binary_model_wide')
except:
    print("create")
    # Kerasを使ってモデルを作成します。
    model = keras.models.Sequential([
        keras.layers.Flatten(input_shape=(1,) + env.observation_space.shape),
        keras.layers.Dense(128, activation="relu"),
        keras.layers.Dense(128, activation='relu'),
#        keras.layers.BatchNormalization(),
        keras.layers.Dense(128, activation='relu'),
#        keras.layers.Dense(64, activation='relu'),
#        keras.layers.Dropout(0.3),
        keras.layers.Dense(nb_actions, activation="linear"),
    ])

model.summary()
    
# 経験値を蓄積するためのメモリです。学習を安定させるために使用します。
memory = SequentialMemory(limit=50000, window_length=1)

# 行動ポリシーはBoltzmannQPolicyを使用しています。
# EpsGreedyQPolicyと比較して、こちらの方が収束が早かったので採用しています。
policy = EpsGreedyQPolicy()

# DQNAgentを作成します。
dqn = DQNAgent(
    model=model,
    nb_actions=nb_actions,
    memory=memory,
    target_model_update=1e-2,
    policy=policy)

# DQNAgentのコンパイル。最適化はAdam,評価関数はMAEを使用します。
dqn.compile(Adam(learning_rate=1e-4), metrics=[
    keras.losses.BinaryCrossentropy(from_logits=True, name='binary_crossentropy'), 'accuracy'
])

# 学習開始
history = dqn.fit(env, nb_steps=5000000, visualize=False, log_interval=34462, verbose=1)



RANK_ONE_TWO_HORSE: 0
RANK_ONE_THREE_HORSE: 0
RANK_ONE_FOUR_HORSE: 0
RANK_ONE_FIVE_HORSE: 0
RANK_TWO_THREE_HORSE: 0
RANK_TWO_FOUR_HORSE: 0
RANK_TWO_FIVE_HORSE: 0
RANK_THREE_FOUR_HORSE: 0
RANK_THREE_FIVE_HORSE: 0
RANK_FOUR_FIVE_HORSE: 0
NO_ACITON: 0
TOTAL_REWARD: 0

create
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 10)                0         
                                                                 
 dense (Dense)               (None, 128)               1408      
                                                                 
 dense_1 (Dense)             (None, 128)               16512     
                                                                 
 dense_2 (Dense)             (None, 128)               16512     
                                                                 
 dense_3 (Dense)             (None, 11)       

  updates=self.state_updates,


  995/34462 [..............................] - ETA: 3:31 - reward: -0.8472

  updates=self.state_updates,



RANK_ONE_TWO_HORSE: 1111
RANK_ONE_THREE_HORSE: 960
RANK_ONE_FOUR_HORSE: 1129
RANK_ONE_FIVE_HORSE: 936
RANK_TWO_THREE_HORSE: 1267
RANK_TWO_FOUR_HORSE: 889
RANK_TWO_FIVE_HORSE: 692
RANK_THREE_FOUR_HORSE: 897
RANK_THREE_FIVE_HORSE: 758
RANK_FOUR_FIVE_HORSE: 744
NO_ACITON: 25079
TOTAL_REWARD: -2346700

1 episodes - episode_reward: -6888.000 [-6888.000, -6888.000] - loss: 0.467 - binary_crossentropy: 0.348 - accuracy: 0.310 - mean_q: 0.075

Interval 2 (34462 steps performed)

RANK_ONE_TWO_HORSE: 611
RANK_ONE_THREE_HORSE: 1078
RANK_ONE_FOUR_HORSE: 426
RANK_ONE_FIVE_HORSE: 877
RANK_TWO_THREE_HORSE: 665
RANK_TWO_FOUR_HORSE: 458
RANK_TWO_FIVE_HORSE: 474
RANK_THREE_FOUR_HORSE: 425
RANK_THREE_FIVE_HORSE: 383
RANK_FOUR_FIVE_HORSE: 613
NO_ACITON: 28452
TOTAL_REWARD: -1508800

1 episodes - episode_reward: -2986.000 [-2986.000, -2986.000] - loss: 0.467 - binary_crossentropy: 0.803 - accuracy: 0.617 - mean_q: 1.328

Interval 3 (68924 steps performed)

RANK_ONE_TWO_HORSE: 744
RANK_ONE_THREE_HORSE: 140

In [1]:
# 学習した重みをファイルに保存します。
#dqn.save_weights('./../model/moving_test.hdf5', overwrite=True)

# 学習したモデルを保存
model.save("./../model/binary_model_wide")

# ゲームごとのステップ数と報酬をグラフ化します。
#plt.plot(history.history['nb_episode_steps'], label='nb_episode_steps')
plt.plot(history.history['episode_reward'], label='episode_reward')
plt.legend()
plt.show()

NameError: name 'model' is not defined