In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

# Data Generation

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80000


In [8]:
print(questions[:5], expected[:5])

['28+245 ', '5+93   ', '8+16   ', '0+36   ', '2+531  '] ['273 ', '98  ', '24  ', '36  ', '533 ']


# Processing

In [9]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [10]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
split_size = int(TRAINING_SIZE*0.75)
train_x = x[:split_size]
train_y = y[:split_size]
test_x = x[split_size:]
test_y = y[split_size:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(54000, 7, 12)
(54000, 4, 12)
Validation Data:
(6000, 7, 12)
(6000, 4, 12)
Testing Data:
(20000, 7, 12)
(20000, 4, 12)


In [11]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False False  True False False
   False]
  [False False  True False False False False False False False False
   False]
  [False False False False False False False False False False False
    True]
  [False  True False False False False False False False False False
   False]
  [False False  True False False False False False False False False
   False]
  [ True False False False False False False False False False False
   False]
  [ True False False False False False False False False False False
   False]]

 [[False False False False False False  True False False False False
   False]
  [False False False False False False  True False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False False False False
    True]
  [False False False False False False False False False  True False
   False]
  [False False False False  True False Fal

# Build Model

In [12]:
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last hidden state of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

model.summary()

Build model...
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
_________________________________________________________________
activation_1 (Activation)    (None, 4, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


# Training

In [13]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 226+15  T 241  [91m☒[0m 225 
Q 213+730 T 943  [91m☒[0m 111 
Q 668+65  T 733  [91m☒[0m 105 
Q 580+54  T 634  [91m☒[0m 105 
Q 576+665 T 1241 [91m☒[0m 1105
Q 53+340  T 393  [91m☒[0m 325 
Q 437+477 T 914  [91m☒[0m 110 
Q 891+10  T 901  [91m☒[0m 101 
Q 2+951   T 953  [91m☒[0m 21  
Q 381+684 T 1065 [91m☒[0m 100 

--------------------------------------------------
Iteration 1
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 69+440  T 509  [91m☒[0m 557 
Q 595+40  T 635  [91m☒[0m 657 
Q 986+328 T 1314 [91m☒[0m 1489
Q 113+486 T 599  [91m☒[0m 387 
Q 42+70   T 112  [91m☒[0m 11  
Q 81+862  T 943  [91m☒[0m 907 
Q 95+717  T 812  [91m☒[0m 777 
Q 89+912  T 1001 [91m☒[0m 900 
Q 2+754   T 756  [91m☒[0m 41  
Q

Q 499+40  T 539  [92m☑[0m 539 
Q 98+726  T 824  [92m☑[0m 824 
Q 679+58  T 737  [92m☑[0m 737 
Q 36+95   T 131  [92m☑[0m 131 
Q 7+1     T 8    [91m☒[0m 1   
Q 31+252  T 283  [92m☑[0m 283 
Q 75+439  T 514  [92m☑[0m 514 
Q 3+136   T 139  [92m☑[0m 139 
Q 43+279  T 322  [92m☑[0m 322 
Q 574+82  T 656  [92m☑[0m 656 

--------------------------------------------------
Iteration 14
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 7+660   T 667  [92m☑[0m 667 
Q 11+813  T 824  [92m☑[0m 824 
Q 42+375  T 417  [92m☑[0m 417 
Q 470+90  T 560  [92m☑[0m 560 
Q 431+445 T 876  [92m☑[0m 876 
Q 384+76  T 460  [92m☑[0m 460 
Q 719+55  T 774  [92m☑[0m 774 
Q 802+2   T 804  [92m☑[0m 804 
Q 7+698   T 705  [92m☑[0m 705 
Q 624+430 T 1054 [92m☑[0m 1054

--------------------------------------------------
Iteration 15
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 230+50  T 280  [92m☑[0m 280 
Q 732+858 T 1590 [92m☑[0m 1590
Q 85+99   T 184  [92m☑[0

Q 39+5    T 44   [92m☑[0m 44  
Q 879+814 T 1693 [92m☑[0m 1693
Q 729+97  T 826  [92m☑[0m 826 
Q 852+148 T 1000 [92m☑[0m 1000
Q 84+75   T 159  [92m☑[0m 159 
Q 67+551  T 618  [92m☑[0m 618 
Q 798+47  T 845  [92m☑[0m 845 
Q 226+365 T 591  [92m☑[0m 591 
Q 74+672  T 746  [92m☑[0m 746 
Q 719+32  T 751  [92m☑[0m 751 

--------------------------------------------------
Iteration 28
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 802+2   T 804  [92m☑[0m 804 
Q 68+68   T 136  [92m☑[0m 136 
Q 306+89  T 395  [92m☑[0m 395 
Q 40+428  T 468  [92m☑[0m 468 
Q 400+700 T 1100 [91m☒[0m 1200
Q 91+602  T 693  [92m☑[0m 693 
Q 880+46  T 926  [92m☑[0m 926 
Q 70+398  T 468  [92m☑[0m 468 
Q 137+656 T 793  [91m☒[0m 893 
Q 856+174 T 1030 [92m☑[0m 1030

--------------------------------------------------
Iteration 29
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 88+874  T 962  [92m☑[0m 962 
Q 17+766  T 783  [92m☑[0m 783 
Q 64+265  T 329  [92m☑[0

Q 43+524  T 567  [92m☑[0m 567 
Q 88+961  T 1049 [92m☑[0m 1049
Q 238+11  T 249  [92m☑[0m 249 
Q 893+58  T 951  [92m☑[0m 951 
Q 975+68  T 1043 [92m☑[0m 1043
Q 480+572 T 1052 [92m☑[0m 1052
Q 4+5     T 9    [92m☑[0m 9   
Q 132+6   T 138  [92m☑[0m 138 
Q 890+809 T 1699 [92m☑[0m 1699
Q 53+783  T 836  [92m☑[0m 836 

--------------------------------------------------
Iteration 42
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 53+783  T 836  [92m☑[0m 836 
Q 609+901 T 1510 [92m☑[0m 1510
Q 374+221 T 595  [92m☑[0m 595 
Q 327+34  T 361  [92m☑[0m 361 
Q 158+46  T 204  [92m☑[0m 204 
Q 78+697  T 775  [92m☑[0m 775 
Q 595+709 T 1304 [92m☑[0m 1304
Q 92+235  T 327  [92m☑[0m 327 
Q 3+879   T 882  [92m☑[0m 882 
Q 881+68  T 949  [92m☑[0m 949 

--------------------------------------------------
Iteration 43
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 824+897 T 1721 [92m☑[0m 1721
Q 765+244 T 1009 [92m☑[0m 1009
Q 24+17   T 41   [92m☑[0

Q 149+619 T 768  [92m☑[0m 768 
Q 78+135  T 213  [92m☑[0m 213 
Q 784+999 T 1783 [92m☑[0m 1783
Q 831+75  T 906  [92m☑[0m 906 
Q 643+477 T 1120 [92m☑[0m 1120
Q 89+436  T 525  [92m☑[0m 525 
Q 364+32  T 396  [92m☑[0m 396 
Q 27+916  T 943  [92m☑[0m 943 
Q 71+825  T 896  [92m☑[0m 896 
Q 60+729  T 789  [92m☑[0m 789 

--------------------------------------------------
Iteration 56
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 930+148 T 1078 [92m☑[0m 1078
Q 181+908 T 1089 [92m☑[0m 1089
Q 46+753  T 799  [91m☒[0m 899 
Q 42+149  T 191  [92m☑[0m 191 
Q 576+525 T 1101 [92m☑[0m 1101
Q 87+242  T 329  [92m☑[0m 329 
Q 674+980 T 1654 [92m☑[0m 1654
Q 768+937 T 1705 [92m☑[0m 1705
Q 74+904  T 978  [92m☑[0m 978 
Q 322+48  T 370  [92m☑[0m 370 

--------------------------------------------------
Iteration 57
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 36+48   T 84   [92m☑[0m 84  
Q 65+641  T 706  [92m☑[0m 706 
Q 37+792  T 829  [92m☑[0

Q 557+932 T 1489 [92m☑[0m 1489
Q 366+173 T 539  [92m☑[0m 539 
Q 162+577 T 739  [92m☑[0m 739 
Q 957+391 T 1348 [91m☒[0m 1338
Q 203+34  T 237  [92m☑[0m 237 
Q 38+383  T 421  [92m☑[0m 421 
Q 19+580  T 599  [92m☑[0m 599 
Q 975+712 T 1687 [92m☑[0m 1687
Q 782+18  T 800  [92m☑[0m 800 
Q 35+616  T 651  [92m☑[0m 651 

--------------------------------------------------
Iteration 70
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 582+333 T 915  [92m☑[0m 915 
Q 630+611 T 1241 [92m☑[0m 1241
Q 130+250 T 380  [92m☑[0m 380 
Q 332+459 T 791  [92m☑[0m 791 
Q 199+44  T 243  [92m☑[0m 243 
Q 84+43   T 127  [92m☑[0m 127 
Q 32+967  T 999  [92m☑[0m 999 
Q 814+85  T 899  [91m☒[0m 999 
Q 40+409  T 449  [92m☑[0m 449 
Q 983+78  T 1061 [92m☑[0m 1061

--------------------------------------------------
Iteration 71
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 450+45  T 495  [92m☑[0m 495 
Q 857+680 T 1537 [92m☑[0m 1537
Q 591+54  T 645  [92m☑[0

Q 371+25  T 396  [92m☑[0m 396 
Q 344+63  T 407  [92m☑[0m 407 
Q 943+694 T 1637 [92m☑[0m 1637
Q 594+27  T 621  [92m☑[0m 621 
Q 95+953  T 1048 [92m☑[0m 1048
Q 19+540  T 559  [92m☑[0m 559 
Q 865+776 T 1641 [92m☑[0m 1641
Q 461+154 T 615  [92m☑[0m 615 
Q 945+69  T 1014 [92m☑[0m 1014
Q 65+884  T 949  [92m☑[0m 949 

--------------------------------------------------
Iteration 84
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 572+142 T 714  [92m☑[0m 714 
Q 592+37  T 629  [92m☑[0m 629 
Q 160+319 T 479  [92m☑[0m 479 
Q 88+785  T 873  [92m☑[0m 873 
Q 978+43  T 1021 [92m☑[0m 1021
Q 872+22  T 894  [92m☑[0m 894 
Q 249+18  T 267  [92m☑[0m 267 
Q 93+202  T 295  [92m☑[0m 295 
Q 940+0   T 940  [92m☑[0m 940 
Q 485+460 T 945  [92m☑[0m 945 

--------------------------------------------------
Iteration 85
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 884+471 T 1355 [91m☒[0m 1255
Q 50+17   T 67   [92m☑[0m 67  
Q 31+884  T 915  [92m☑[0

Q 799+560 T 1359 [92m☑[0m 1359
Q 356+877 T 1233 [92m☑[0m 1233
Q 36+198  T 234  [92m☑[0m 234 
Q 435+676 T 1111 [92m☑[0m 1111
Q 853+499 T 1352 [92m☑[0m 1352
Q 875+718 T 1593 [92m☑[0m 1593
Q 344+377 T 721  [92m☑[0m 721 
Q 625+874 T 1499 [92m☑[0m 1499
Q 808+264 T 1072 [92m☑[0m 1072
Q 27+682  T 709  [92m☑[0m 709 

--------------------------------------------------
Iteration 98
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 770+830 T 1600 [92m☑[0m 1600
Q 21+35   T 56   [92m☑[0m 56  
Q 67+617  T 684  [92m☑[0m 684 
Q 673+289 T 962  [92m☑[0m 962 
Q 825+44  T 869  [92m☑[0m 869 
Q 412+61  T 473  [92m☑[0m 473 
Q 555+200 T 755  [92m☑[0m 755 
Q 113+243 T 356  [92m☑[0m 356 
Q 149+619 T 768  [92m☑[0m 768 
Q 86+1    T 87   [92m☑[0m 87  

--------------------------------------------------
Iteration 99
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 262+316 T 578  [92m☑[0m 578 
Q 416+5   T 421  [92m☑[0m 421 
Q 304+536 T 840  [92m☑[0

# Testing

In [14]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
#####################################################

predictions = model.predict_classes(test_x)    

MSG : Prediction


In [15]:
predict_right = 0
for i in range(test_y.shape[0]):
    predict_y = ctable.decode(predictions[i],calc_argmax=False)
    correct_y = ctable.decode(test_y[i])
    if predict_y == correct_y:
        predict_right+=1
accuracy = predict_right / test_y.shape[0]
print("addition model accuracy:",accuracy)
    

addition model accuracy: 0.9925
