In [1]:
import numpy as np
import random
import math

from keras.layers import Dense, LSTM, Dropout
from keras.models import Sequential
from sklearn.metrics import accuracy_score, mean_squared_error

Using TensorFlow backend.


In [2]:
MAX_NUM_LEN = 3
RAND_MIN = int(math.pow(10, MAX_NUM_LEN - 2))
RAND_MAX = int(math.pow(10, MAX_NUM_LEN - 1)) - 1

def cal_seq(n):
    seq = np.zeros((MAX_NUM_LEN,))
    for i in range(MAX_NUM_LEN):
        seq[i] = n % 10
        n = n // 10
    return seq

def gen(batch_size=16):
    X = np.zeros((batch_size, MAX_NUM_LEN, 2))
    y = np.zeros((batch_size, MAX_NUM_LEN, 1))
    
    for b in range(batch_size):
        x1, x2 = random.randint(RAND_MIN, RAND_MAX), random.randint(RAND_MIN, RAND_MAX)
        X[b,:,0], X[b,:,1] = cal_seq(x1), cal_seq(x2)
        y[b,:,0] = cal_seq(x1 + x2)
    
    return (X/10, y/10)

In [48]:
def get_model(batch_size=1):
    model = Sequential()
    model.add(LSTM(8, batch_input_shape=(batch_size, 1, 2), return_sequences=True, stateful=True, activation='relu'))
    model.add(LSTM(8, return_sequences=False, stateful=True, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='mse', optimizer='adam')
#     model.summary()
    return model

In [45]:
def evaluate(model, batch_size):
    X, y= gen(batch_size)
    time_steps = X.shape[1]
    
    preds = np.zeros((BATCH_SIZE, time_steps, 1))
    for t in range(time_steps):
        pred = model.predict(np.expand_dims(X[:, t], axis=1))
        preds[:, t] = pred
    model.reset_states()
    
    y, preds = y.flatten(), preds.flatten()
    loss = mean_squared_error(y, preds)
    acc = accuracy_score(np.around(y * 10).astype(np.int), np.around(preds * 10).astype(np.int))
    
    return loss, acc

In [None]:
BATCH_SIZE = 16
model = get_model(BATCH_SIZE)

EPOCHS = 100000
CV_NUM = 10
min_loss = 99999999

for e in range(EPOCHS):
    X, y= gen(BATCH_SIZE)
    
    for i in range(X.shape[1]):
        loss = model.train_on_batch(np.expand_dims(X[:, i], axis=1), y[:, i])
    model.reset_states()

    if (e+1) % 100 == 0:
        curr_loss, curr_acc = 0.0, 0.0
        for i in range(CV_NUM):
            loss, acc = evaluate(model, BATCH_SIZE)
            curr_loss += loss / CV_NUM
            curr_acc += acc / CV_NUM
        
        print('epoch {}, loss {:.4f}, acc {:.4f}'.format(e+1, curr_loss, curr_acc))
        if min_loss > curr_loss:
            min_loss = curr_loss
            model.save_weights('./weights/weights-{}-{:.4f}-{:.4f}.h5'.format(e+1, curr_loss, curr_acc))
            print('model saved')

In [42]:
def num(x):
    l = x.shape[0]
    n = 0
    for i in range(l):
        n += round(x[l-i-1] * 10)
        n *= 10
    return int(n // 10)

def test(model, batch_size):
    X, y= gen(batch_size)
    time_steps = X.shape[1]
    
    preds = np.zeros((BATCH_SIZE, time_steps, 1))
    for t in range(time_steps):
        pred = model.predict(np.expand_dims(X[:, t], axis=1))
        preds[:, t] = pred
    model.reset_states()
    
    time_steps = X.shape[1]
    for b in range(batch_size):
        x1 = num(X[b, :, 0])
        x2 = num(X[b, :, 1])
        y_ = num(y[b, :, 0])
        preds_ = num(preds[b, :, 0])
        print("{} + {} = {}(true:{})".format(x1, x2, preds_, y_))
        
BATCH_SIZE = 16
model = get_model(BATCH_SIZE)
model.load_weights('./weights/weights-79400-0.0005-0.9771.h5')
test(model, BATCH_SIZE)

46 + 57 = 103(true:103)
20 + 23 = 43(true:43)
60 + 43 = 113(true:103)
84 + 46 = 130(true:130)
13 + 16 = 29(true:29)
20 + 72 = 92(true:92)
77 + 32 = 109(true:109)
91 + 14 = 105(true:105)
32 + 87 = 119(true:119)
81 + 46 = 127(true:127)
57 + 66 = 123(true:123)
80 + 15 = 95(true:95)
87 + 28 = 115(true:115)
20 + 44 = 64(true:64)
97 + 55 = 152(true:152)
85 + 32 = 117(true:117)
