In [25]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import datetime
import os

# tensorboard in util
import sys
sys.path.append("/home/yasuhiko.iwase/jupyter/util")
import tensorboard as tb 

np.random.seed(0)
tf.set_random_seed(1234)

# モデル保存用
MODEL_DIR = os.path.join('./model')
if os.path.exists(MODEL_DIR) is False:
    os.mkdir(MODEL_DIR)
    

In [2]:
def inference(x, y, n_batch, is_training,
              input_digits=None,
              output_digits=None,
              n_hidden=None,
              n_out=None):
    def weight_variable(shape):
        initial = tf.truncated_normal(shape, stddev=0.01)
        return tf.Variable(initial, name='w')

    def bias_variable(shape):
        initial = tf.zeros(shape, dtype=tf.float32)
        return tf.Variable(initial, name='b')

    # Encode
    encoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    encoder = rnn.AttentionCellWrapper(encoder,
                                       input_digits,
                                       state_is_tuple=True)
    state = encoder.zero_state(n_batch, tf.float32)
    encoder_outputs = []
    encoder_states = []

    with tf.variable_scope('Encoder'):
        for t in range(input_digits):
            if t > 0:
                tf.get_variable_scope().reuse_variables()
            (output, state) = encoder(x[:, t, :], state)
            encoder_outputs.append(output)
            encoder_states.append(state)

    # Decode
    decoder = rnn.BasicLSTMCell(n_hidden, forget_bias=1.0)
    decoder = rnn.AttentionCellWrapper(decoder,
                                       input_digits,
                                       state_is_tuple=True)
    state = encoder_states[-1]
    decoder_outputs = [encoder_outputs[-1]]

    # 出力層の重みとバイアスを事前に定義
    V = weight_variable([n_hidden, n_out])
    c = bias_variable([n_out])
    outputs = []

    with tf.variable_scope('Decoder'):
        for t in range(1, output_digits):
            if t > 1:
                tf.get_variable_scope().reuse_variables()

            if is_training is True:
                (output, state) = decoder(y[:, t-1, :], state)
            else:
                # 直前の出力を求める
                linear = tf.matmul(decoder_outputs[-1], V) + c
                out = tf.nn.softmax(linear)
                outputs.append(out)
                out = tf.one_hot(tf.argmax(out, -1), depth=output_digits)

                (output, state) = decoder(out, state)

            decoder_outputs.append(output)

    if is_training is True:
        output = tf.reshape(tf.concat(decoder_outputs, axis=1),
                            [-1, output_digits, n_hidden])

        linear = tf.einsum('ijk,kl->ijl', output, V) + c
        return tf.nn.softmax(linear)
    else:
        # 最後の出力を求める
        linear = tf.matmul(decoder_outputs[-1], V) + c
        out = tf.nn.softmax(linear)
        outputs.append(out)

        output = tf.reshape(tf.concat(outputs, axis=1),
                            [-1, output_digits, n_out])
        return output


In [3]:

def loss(y, t):
    cross_entropy = \
        tf.reduce_mean(-tf.reduce_sum(
                       t * tf.log(tf.clip_by_value(y, 1e-10, 1.0)),
                       reduction_indices=[1]))
    return cross_entropy


def training(loss):
    optimizer = \
        tf.train.AdamOptimizer(learning_rate=0.001, beta1=0.9, beta2=0.999)
    train_step = optimizer.minimize(loss)
    return train_step


def accuracy(y, t):
    correct_prediction = tf.equal(tf.argmax(y, -1), tf.argmax(t, -1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    return accuracy



In [4]:
def n(digits=3):
    number = ''
    for i in range(np.random.randint(1, digits + 1)):
        number += np.random.choice(list('0123456789'))
    return int(number)

def padding(chars, maxlen):
    return chars + ' ' * (maxlen - len(chars))


In [5]:

'''
データの生成
'''
N = 20000
N_train = int(N * 0.9)
N_validation = N - N_train

digits = 4  # 最大の桁数
input_digits = digits * 2 + 1  # 例： 1234+5678
output_digits = digits + 1  # 5000+5000 = 10000 以上で５桁になる

added = set()
questions = []
answers = []

chars = '0123456789+ '
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

while len(questions) < N:
    a, b = n(digits), n(digits)  # 適当な数を２つ生成

    pair = tuple(sorted((a, b)))
    if pair in added:
        continue

    question = '{}+{}'.format(a, b)
    question = padding(question, input_digits)  # 足りない桁を穴埋め
    answer = str(a + b)
    answer = padding(answer, output_digits)  # 足りない桁を穴埋め

    added.add(pair)
    questions.append(question)
    answers.append(answer)

X = np.zeros((len(questions), input_digits, len(chars)), dtype=np.integer)
Y = np.zeros((len(questions), digits + 1, len(chars)), dtype=np.integer)

for i in range(N):
    for t, char in enumerate(questions[i]):
        X[i, t, char_indices[char]] = 1
    for t, char in enumerate(answers[i]):
        Y[i, t, char_indices[char]] = 1

X_train, X_validation, Y_train, Y_validation = \
    train_test_split(X, Y, train_size=N_train)



In [6]:

'''
モデル設定
'''
n_in = len(chars)
n_hidden = 128
n_out = len(chars)

x = tf.placeholder(tf.float32, shape=[None, input_digits, n_in])
t = tf.placeholder(tf.float32, shape=[None, output_digits, n_out])
n_batch = tf.placeholder(tf.int32, shape=[])
is_training = tf.placeholder(tf.bool)

y = inference(x, t, n_batch, is_training,
              input_digits=input_digits,
              output_digits=output_digits,
              n_hidden=n_hidden, n_out=n_out)
loss = loss(y, t)
train_step = training(loss)

acc = accuracy(y, t)

history = {
    'val_loss': [],
    'val_acc': []
}




In [None]:
# モデルの初期化（初回のみ）

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

In [35]:
'''
保存済データの利用
'''

MODEL_DIR = os.path.join('./model')
saver = tf.train.Saver()  # モデル保存用

saver.restore(sess, MODEL_DIR + '/02_attention_tensorflow_model.ckpt')


In [None]:

'''
モデル学習
'''
epochs = 10
batch_size = 200

n_batches = N_train // batch_size # 切り捨て除算

for epoch in range(epochs):
    print(datetime.datetime.now().strftime("%Y/%m/%d %H:%M:%S"))
    print('Epoch:', epoch+1)
    print('=' * 10)

    X_, Y_ = shuffle(X_train, Y_train)

    for i in range(n_batches):
        start = i * batch_size
        end = start + batch_size

        sess.run(train_step, feed_dict={
            x: X_[start:end],
            t: Y_[start:end],
            n_batch: batch_size,
            is_training: True
        })

    # 検証データを用いた評価
    val_loss = loss.eval(session=sess, feed_dict={
        x: X_validation,
        t: Y_validation,
        n_batch: N_validation,
        is_training: False
    })
    val_acc = acc.eval(session=sess, feed_dict={
        x: X_validation,
        t: Y_validation,
        n_batch: N_validation,
        is_training: False
    })

    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)

    print('validation loss:', val_loss,' validation acc: ', val_acc)


    # 検証データからランダムに問題を選んで答え合わせ
    for i in range(10):
        index = np.random.randint(0, N_validation)
        question = X_validation[np.array([index])]
        answer = Y_validation[np.array([index])]
        prediction = y.eval(session=sess, feed_dict={
            x: question,
            # t: answer,
            n_batch: 1,
            is_training: False
        })
        question = question.argmax(axis=-1)
        answer = answer.argmax(axis=-1)
        prediction = np.argmax(prediction, -1)

        q = ''.join(indices_char[i] for i in question[0])
        a = ''.join(indices_char[i] for i in answer[0])
        p = ''.join(indices_char[i] for i in prediction[0])

        print('Q:  ', q, 'A:  ', p, 'T/F:', end=' ')
        if a == p:
            print('TRUE!!!!!')
        else:
            print('FALSE')
        
    print('-' * 10)

2017/07/27 16:52:55
Epoch: 1
validation loss: 0.0407438  validation acc:  0.9707
Q:   17+8986   A:   9903  T/F: FALSE
Q:   50+867    A:   917   T/F: TRUE!!!!!
Q:   820+27    A:   847   T/F: TRUE!!!!!
Q:   27+72     A:   99    T/F: TRUE!!!!!
Q:   6265+12   A:   6277  T/F: TRUE!!!!!
Q:   812+28    A:   840   T/F: TRUE!!!!!
Q:   51+89     A:   140   T/F: TRUE!!!!!
Q:   9846+1063 A:   10909 T/F: TRUE!!!!!
Q:   3+8011    A:   8014  T/F: TRUE!!!!!
Q:   233+69    A:   302   T/F: TRUE!!!!!
----------
2017/07/27 16:54:11
Epoch: 2
validation loss: 0.0404406  validation acc:  0.9719
Q:   2688+94   A:   2782  T/F: TRUE!!!!!
Q:   25+1427   A:   1452  T/F: TRUE!!!!!
Q:   545+184   A:   729   T/F: TRUE!!!!!
Q:   7153+1    A:   7154  T/F: TRUE!!!!!
Q:   666+9238  A:   9904  T/F: TRUE!!!!!
Q:   131+6424  A:   6555  T/F: TRUE!!!!!
Q:   0+5371    A:   5371  T/F: TRUE!!!!!
Q:   1577+3365 A:   4942  T/F: TRUE!!!!!
Q:   135+1     A:   136   T/F: TRUE!!!!!
Q:   682+398   A:   1080  T/F: TRUE!!!!!
----------


In [34]:
'''
モデルファイル保存
'''
saver = tf.train.Saver()  # モデル保存用
model_path = saver.save(sess, MODEL_DIR + '/02_attention_tensorflow_model.ckpt')
print('Model saved to:', model_path)

Model saved to: ./model/02_attention_tensorflow_model.ckpt


In [33]:
'''
予測精度の評価
'''

questions_count = 100
success_count = 0

# 検証データからランダムに問題を選んで答え合わせ
for i in range(questions_count):
    index = np.random.randint(0, N_validation)
    question = X_validation[np.array([index])]
    answer = Y_validation[np.array([index])]
    prediction = y.eval(session=sess, feed_dict={
        x: question,
        # t: answer,
        n_batch: 1,
        is_training: False
    })
    question = question.argmax(axis=-1)
    answer = answer.argmax(axis=-1)
    prediction = np.argmax(prediction, -1)

    q = ''.join(indices_char[i] for i in question[0])
    a = ''.join(indices_char[i] for i in answer[0])
    p = ''.join(indices_char[i] for i in prediction[0])

    if a == p:
        success_count += 1 
    else:
        print('Q:  ', q, 'A:  ', p, 'T/F:', end=' ')
        print('FALSE')
        
success_rate = success_count / questions_count
print("success_count: ",success_count, "/",questions_count)
print("success_rate: ",success_rate)

Q:   6643+197  A:   6830  T/F: FALSE
Q:   586+395   A:   980   T/F: FALSE
Q:   27+182    A:   219   T/F: FALSE
Q:   14+8955   A:   8979  T/F: FALSE
Q:   3098+3027 A:   6215  T/F: FALSE
Q:   169+9519  A:   9698  T/F: FALSE
Q:   27+182    A:   219   T/F: FALSE
Q:   5223+106  A:   5339  T/F: FALSE
success_count:  92 / 100
success_rate:  0.92


In [26]:
tb.show_graph(tf.get_default_graph().as_graph_def())