In [None]:
from generate import *
from keras.layers import ZeroPadding2D
from keras.callbacks import ModelCheckpoint, TensorBoard

## SL Policy

In [None]:
nb_classes = 225

model = Sequential()
model.add(ZeroPadding2D(padding=(2, 2), input_shape=INPUT_SHAPE))
model.add(Convolution2D(16, 5,
                        padding='valid'))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Convolution2D(32, 5,
                        padding='valid')) # same
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Convolution2D(32, 5,
                        padding='same'))
model.add(Activation('relu'))
model.add(BatchNormalization())
model.add(Convolution2D(48, 5,
                        padding='same'))
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(2048))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Dense(1024))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',
      
              optimizer='adadelta',
              metrics=['accuracy'])
model.save('large_policy_model')

In [None]:
models = [('all', load_model('large_policy_model')),
          ('+10', load_model('large_policy_model_10')),
          ('+10+1', load_model('large_policy_model_10_1'))]

In [None]:
for model in models:
    acc = [model[1].evaluate_generator(generator=generate_batch('aug_test.renju', 4096, last_n=n), steps=32)[1]
         for n in [None, 10, 1]]
    print('{}: {}%, {}%, {}%'.format(model[0], round(acc[0]*100), round(acc[1]*100), round(acc[2]*100)))

## Повторяющаяся часть для обучения:

In [None]:
EPOCHS = 40
BATCH_SIZE = 2048
EPOCH_SIZE = 131072 # 1048576 # 8388608
EPOCH_VAL_SIZE = 131072 # 262144
STEPS = EPOCH_SIZE // BATCH_SIZE
VAL_STEPS = EPOCH_VAL_SIZE // BATCH_SIZE

train_file = 'aug_train.renju'
test_file = 'aug_test.renju'
callbacks = [ModelCheckpoint('large_policy_model', period=5),
             TensorBoard(log_dir='./tensorboard_policy')]

print('Start time:', time.ctime())

model = load_model('large_policy_model')
history = model.fit_generator(generator=generate_batch(train_file, BATCH_SIZE, last_n=1),
                    steps_per_epoch=STEPS, epochs=EPOCHS, verbose=1,
                    validation_data=generate_batch(test_file, BATCH_SIZE, last_n=1),
                    validation_steps=VAL_STEPS, callbacks=callbacks)
model.save('large_policy_model')
    
print('Finish time:', time.ctime())
    

## Value network

In [None]:
model = Sequential()
model.add(ZeroPadding2D(padding=(2, 2), input_shape=INPUT_SHAPE))
model.add(Convolution2D(16, (5, 5),
                        padding='valid'))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Convolution2D(16, (5, 5),
                        padding='valid')) # same
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(512))  # 1024
model.add(Activation('relu'))
model.add(Dropout(0.1))

model.add(Dense(1))
model.add(Activation('tanh'))

model.compile(loss='mse',
              optimizer="adadelta")
model.save('value_model')

In [None]:
EPOCHS = 40
BATCH_SIZE = 4096
EPOCH_SIZE = 524288
EPOCH_VAL_SIZE = 131072
STEPS = EPOCH_SIZE // BATCH_SIZE
VAL_STEPS = EPOCH_VAL_SIZE // BATCH_SIZE

train_file = 'aug_train.renju'
test_file = 'aug_test.renju'

print('Start time:', time.ctime())

model = load_model('value_model')
history = model.fit_generator(generator=generate_batch(train_file, BATCH_SIZE, mark_player=True, last_n=20),
                    steps_per_epoch=STEPS, epochs=EPOCHS, verbose=1,
                    validation_data=generate_batch(test_file, BATCH_SIZE, mark_player=True, last_n=20),
                    validation_steps=VAL_STEPS, callbacks=[TensorBoard(log_dir='tensorboard', histogram_freq=1)])
model.save('value_model')
    
print('Finish time:', time.ctime())
    
# plt.figure(figsize=(30, 5))
# acc_history = list(map(lambda x: x, history.history['loss']))
# val_acc_history = list(map(lambda x: x, history.history['val_loss']))
# y_ticks = np.linspace(0, 2, 11)
# y_labels = np.vectorize(lambda x: str(round(x, 1)) + "%")(y_ticks)
# plt.plot(range(1, EPOCHS+1), acc_history, 'b', label='loss')
# plt.plot(range(1, EPOCHS+1), val_acc_history, 'r', label='val_loss')
# plt.yticks(y_ticks, y_labels)
# plt.grid(True)
# plt.legend(loc='upper left', fontsize=16)
# plt.savefig('value_history.png')
# plt.show()

In [None]:
gen = generate_batch('aug_train.renju', 1, mark_player=True, last_n=10)

In [None]:
x1, y1 = next(gen)
print(y1)
print(model.predict(x1))

In [None]:
measure_dataset('aug_train.renju', mark_player=True, last_n=20, nb_games=10000)