In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+- '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '-',
 3: '0',
 4: '1',
 5: '2',
 6: '3',
 7: '4',
 8: '5',
 9: '6',
 10: '7',
 11: '8',
 12: '9'}

# Data Generation

In [7]:
questions = []
expected = []
seen = set()
is_even = True
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    
    if b > a:
        tmp = a
        a = b
        b = tmp
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    
    if is_even:
        q = '{}+{}'.format(a, b)
        query = q + ' ' * (MAXLEN - len(q))
        ans = str(a + b)
        is_even = False
    else:
        q = '{}-{}'.format(a, b)
        query = q + ' ' * (MAXLEN - len(q))
        ans = str(a - b)
        is_even = True
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80000


In [8]:
print(questions[:5], expected[:5])

['529+3  ', '979-77 ', '567+44 ', '44-4   ', '353+48 '] ['532 ', '902 ', '611 ', '40  ', '401 ']


In [9]:
import numpy as np
np.random.choice(list('01'))
print(questions)

['529+3  ', '979-77 ', '567+44 ', '44-4   ', '353+48 ', '5-1    ', '56+4   ', '292-68 ', '830+330', '318-39 ', '7+6    ', '16-7   ', '55+4   ', '652-49 ', '9+7    ', '415-2  ', '916+675', '94-39  ', '998+54 ', '857-19 ', '79+14  ', '166-22 ', '435+333', '700-231', '11+4   ', '111-36 ', '428+19 ', '9-2    ', '437+7  ', '4-4    ', '963+93 ', '522-462', '2+1    ', '543-486', '784+95 ', '99-30  ', '98+69  ', '600-71 ', '366+0  ', '60-13  ', '88+8   ', '28-4   ', '879+771', '422-6  ', '6+3    ', '43-3   ', '898+1  ', '92-7   ', '121+6  ', '962-7  ', '788+416', '56-1   ', '88+80  ', '79-6   ', '223+0  ', '693-512', '86+7   ', '558-6  ', '905+414', '40-9   ', '98+9   ', '87-7   ', '856+278', '85-3   ', '40+2   ', '772-2  ', '937+26 ', '533-2  ', '904+6  ', '305-71 ', '902+81 ', '9-8    ', '44+8   ', '84-2   ', '34+31  ', '385-93 ', '880+43 ', '40-21  ', '245+37 ', '96-3   ', '695+3  ', '644-285', '383+2  ', '8-0    ', '920+90 ', '41-15  ', '533+50 ', '77-29  ', '21+3   ', '959-97 ', '700+7  '

# Processing

In [10]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [11]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
split_size = int(TRAINING_SIZE*0.75)
train_x = x[:split_size]
train_y = y[:split_size]
test_x = x[split_size:]
test_y = y[split_size:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(54000, 7, 13)
(54000, 4, 13)
Validation Data:
(6000, 7, 13)
(6000, 4, 13)
Testing Data:
(20000, 7, 13)
(20000, 4, 13)


In [12]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False  True False False False False
   False False]
  [False False False False False False False  True False False False
   False False]
  [False False False False False False  True False False False False
   False False]
  [False False  True False False False False False False False False
   False False]
  [False False False False False False False False False  True False
   False False]
  [False False False False False False  True False False False False
   False False]
  [ True False False False False False False False False False False
   False False]]

 [[False False False False False False False False False False  True
   False False]
  [False False False False False False False False False False False
    True False]
  [False False False False False False False  True False False False
   False False]
  [False  True False False False False False False False False False
   False False]
  [False False False False False False False False Fals

# Build Model

In [13]:
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last hidden state of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

model.summary()

Build model...
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72704     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 13)             1677      
_________________________________________________________________
activation_1 (Activation)    (None, 4, 13)             0         
Total params: 205,965
Trainable params: 205,965
Non-trainable params: 0
_________________________________________________________________


# Training

In [14]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 864+39  T 903  [91m☒[0m 101 
Q 773-7   T 766  [91m☒[0m 77  
Q 737+384 T 1121 [91m☒[0m 111 
Q 536-17  T 519  [91m☒[0m 33  
Q 111+48  T 159  [91m☒[0m 106 
Q 715+0   T 715  [91m☒[0m 87  
Q 173+54  T 227  [91m☒[0m 106 
Q 432-87  T 345  [91m☒[0m 33  
Q 885+3   T 888  [91m☒[0m 100 
Q 823-21  T 802  [91m☒[0m 336 

--------------------------------------------------
Iteration 1
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 695+57  T 752  [91m☒[0m 802 
Q 918+57  T 975  [91m☒[0m 101 
Q 370-328 T 42   [91m☒[0m 24  
Q 136+4   T 140  [91m☒[0m 127 
Q 628-36  T 592  [91m☒[0m 547 
Q 238+97  T 335  [91m☒[0m 249 
Q 822-10  T 812  [91m☒[0m 737 
Q 304+66  T 370  [91m☒[0m 449 
Q 524-214 T 310  [91m☒[0m 349 
Q

Q 310+288 T 598  [91m☒[0m 500 
Q 954+13  T 967  [91m☒[0m 977 
Q 185-120 T 65   [91m☒[0m 15  
Q 526+196 T 722  [91m☒[0m 612 
Q 386+15  T 401  [91m☒[0m 419 
Q 822-309 T 513  [91m☒[0m 512 
Q 35+25   T 60   [91m☒[0m 62  
Q 644-73  T 571  [91m☒[0m 582 
Q 80+77   T 157  [92m☑[0m 157 
Q 76+22   T 98   [91m☒[0m 90  

--------------------------------------------------
Iteration 14
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 859+79  T 938  [91m☒[0m 943 
Q 225+4   T 229  [91m☒[0m 220 
Q 714-10  T 704  [91m☒[0m 702 
Q 773-13  T 760  [91m☒[0m 757 
Q 593-205 T 388  [91m☒[0m 372 
Q 947-49  T 898  [91m☒[0m 892 
Q 443+146 T 589  [91m☒[0m 582 
Q 232-46  T 186  [92m☑[0m 186 
Q 733+144 T 877  [91m☒[0m 862 
Q 708-112 T 596  [91m☒[0m 580 

--------------------------------------------------
Iteration 15
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 230-6   T 224  [91m☒[0m 236 
Q 389+3   T 392  [91m☒[0m 381 
Q 949-91  T 858  [91m☒[0

Q 308+73  T 381  [92m☑[0m 381 
Q 762-91  T 671  [92m☑[0m 671 
Q 231-5   T 226  [92m☑[0m 226 
Q 882-545 T 337  [92m☑[0m 337 
Q 813+48  T 861  [92m☑[0m 861 
Q 149-18  T 131  [92m☑[0m 131 
Q 47-10   T 37   [92m☑[0m 37  
Q 57+26   T 83   [92m☑[0m 83  
Q 208+41  T 249  [91m☒[0m 259 
Q 958-66  T 892  [92m☑[0m 892 

--------------------------------------------------
Iteration 28
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 815-96  T 719  [92m☑[0m 719 
Q 609+87  T 696  [92m☑[0m 696 
Q 472-93  T 379  [91m☒[0m 389 
Q 944-721 T 223  [92m☑[0m 223 
Q 50-49   T 1    [92m☑[0m 1   
Q 714-322 T 392  [91m☒[0m 382 
Q 277+37  T 314  [92m☑[0m 314 
Q 646+85  T 731  [92m☑[0m 731 
Q 442+94  T 536  [92m☑[0m 536 
Q 876-40  T 836  [92m☑[0m 836 

--------------------------------------------------
Iteration 29
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 440+202 T 642  [92m☑[0m 642 
Q 198+26  T 224  [92m☑[0m 224 
Q 693-24  T 669  [92m☑[0

Q 131-61  T 70   [91m☒[0m 60  
Q 65+60   T 125  [92m☑[0m 125 
Q 50+38   T 88   [92m☑[0m 88  
Q 478-101 T 377  [92m☑[0m 377 
Q 374+8   T 382  [92m☑[0m 382 
Q 902+33  T 935  [92m☑[0m 935 
Q 205+89  T 294  [92m☑[0m 294 
Q 889-729 T 160  [91m☒[0m 150 
Q 302+26  T 328  [92m☑[0m 328 
Q 389-56  T 333  [92m☑[0m 333 

--------------------------------------------------
Iteration 42
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 834+624 T 1458 [92m☑[0m 1458
Q 567+53  T 620  [92m☑[0m 620 
Q 980+558 T 1538 [92m☑[0m 1538
Q 944+424 T 1368 [92m☑[0m 1368
Q 467+47  T 514  [92m☑[0m 514 
Q 757+458 T 1215 [92m☑[0m 1215
Q 606+3   T 609  [92m☑[0m 609 
Q 681-661 T 20   [91m☒[0m 10  
Q 439-50  T 389  [92m☑[0m 389 
Q 866-444 T 422  [92m☑[0m 422 

--------------------------------------------------
Iteration 43
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 952-66  T 886  [92m☑[0m 886 
Q 910+80  T 990  [92m☑[0m 990 
Q 196-184 T 12   [91m☒[0

Q 668+560 T 1228 [92m☑[0m 1228
Q 570+149 T 719  [92m☑[0m 719 
Q 976-289 T 687  [92m☑[0m 687 
Q 329+74  T 403  [92m☑[0m 403 
Q 861-99  T 762  [92m☑[0m 762 
Q 727+265 T 992  [92m☑[0m 992 
Q 468-40  T 428  [92m☑[0m 428 
Q 919-276 T 643  [92m☑[0m 643 
Q 833-94  T 739  [92m☑[0m 739 
Q 206+66  T 272  [92m☑[0m 272 

--------------------------------------------------
Iteration 56
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 823-748 T 75   [92m☑[0m 75  
Q 283-64  T 219  [92m☑[0m 219 
Q 978+38  T 1016 [92m☑[0m 1016
Q 413-30  T 383  [92m☑[0m 383 
Q 868+85  T 953  [92m☑[0m 953 
Q 918+57  T 975  [92m☑[0m 975 
Q 92+80   T 172  [92m☑[0m 172 
Q 544+50  T 594  [92m☑[0m 594 
Q 742+12  T 754  [92m☑[0m 754 
Q 766+42  T 808  [92m☑[0m 808 

--------------------------------------------------
Iteration 57
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 541+3   T 544  [92m☑[0m 544 
Q 550-480 T 70   [91m☒[0m 60  
Q 820+44  T 864  [92m☑[0

Q 668+1   T 669  [92m☑[0m 669 
Q 312-36  T 276  [92m☑[0m 276 
Q 589-9   T 580  [92m☑[0m 580 
Q 431-10  T 421  [92m☑[0m 421 
Q 282-212 T 70   [91m☒[0m 60  
Q 662-91  T 571  [92m☑[0m 571 
Q 506-61  T 445  [92m☑[0m 445 
Q 316-7   T 309  [92m☑[0m 309 
Q 304-52  T 252  [92m☑[0m 252 
Q 842-1   T 841  [92m☑[0m 841 

--------------------------------------------------
Iteration 70
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 572-169 T 403  [92m☑[0m 403 
Q 51+40   T 91   [92m☑[0m 91  
Q 532+99  T 631  [92m☑[0m 631 
Q 835+7   T 842  [92m☑[0m 842 
Q 697+4   T 701  [92m☑[0m 701 
Q 746+48  T 794  [92m☑[0m 794 
Q 348-10  T 338  [92m☑[0m 338 
Q 958-866 T 92   [92m☑[0m 92  
Q 884-21  T 863  [92m☑[0m 863 
Q 89-62   T 27   [92m☑[0m 27  

--------------------------------------------------
Iteration 71
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 590+120 T 710  [92m☑[0m 710 
Q 990+347 T 1337 [92m☑[0m 1337
Q 889+17  T 906  [92m☑[0

Q 469-68  T 401  [92m☑[0m 401 
Q 455+105 T 560  [92m☑[0m 560 
Q 283-51  T 232  [92m☑[0m 232 
Q 105-23  T 82   [92m☑[0m 82  
Q 231+90  T 321  [92m☑[0m 321 
Q 321+223 T 544  [92m☑[0m 544 
Q 859-299 T 560  [92m☑[0m 560 
Q 525-0   T 525  [92m☑[0m 525 
Q 340+4   T 344  [92m☑[0m 344 
Q 417+338 T 755  [92m☑[0m 755 

--------------------------------------------------
Iteration 84
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 124-96  T 28   [91m☒[0m 38  
Q 726-333 T 393  [92m☑[0m 393 
Q 262-28  T 234  [92m☑[0m 234 
Q 740-735 T 5    [91m☒[0m 2   
Q 711-464 T 247  [92m☑[0m 247 
Q 553-2   T 551  [92m☑[0m 551 
Q 985-751 T 234  [92m☑[0m 234 
Q 104-3   T 101  [91m☒[0m 10  
Q 841+204 T 1045 [92m☑[0m 1045
Q 640+14  T 654  [92m☑[0m 654 

--------------------------------------------------
Iteration 85
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 626-356 T 270  [92m☑[0m 270 
Q 979+406 T 1385 [92m☑[0m 1385
Q 465+34  T 499  [92m☑[0

Q 345-4   T 341  [92m☑[0m 341 
Q 42-41   T 1    [91m☒[0m 2   
Q 267-44  T 223  [92m☑[0m 223 
Q 172-91  T 81   [92m☑[0m 81  
Q 307+48  T 355  [92m☑[0m 355 
Q 805+6   T 811  [92m☑[0m 811 
Q 302-76  T 226  [92m☑[0m 226 
Q 694-672 T 22   [92m☑[0m 22  
Q 942+768 T 1710 [92m☑[0m 1710
Q 804+213 T 1017 [92m☑[0m 1017

--------------------------------------------------
Iteration 98
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 532-82  T 450  [92m☑[0m 450 
Q 821+70  T 891  [92m☑[0m 891 
Q 733+46  T 779  [92m☑[0m 779 
Q 793-243 T 550  [92m☑[0m 550 
Q 225+4   T 229  [92m☑[0m 229 
Q 820-476 T 344  [92m☑[0m 344 
Q 117+56  T 173  [92m☑[0m 173 
Q 75+62   T 137  [92m☑[0m 137 
Q 86+2    T 88   [92m☑[0m 88  
Q 82+73   T 155  [92m☑[0m 155 

--------------------------------------------------
Iteration 99
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 972-680 T 292  [92m☑[0m 292 
Q 881+27  T 908  [92m☑[0m 908 
Q 266+62  T 328  [92m☑[0

# Testing

In [15]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
#####################################################

predictions = model.predict_classes(test_x)
print(predictions)

MSG : Prediction
[[ 4  7 11  5]
 [ 4  6  3 11]
 [ 8  3  3  0]
 ...
 [11  5 11  0]
 [11  3 10  0]
 [ 4  6  0  0]]


In [16]:
predict_right = 0
for i in range(test_y.shape[0]):
    predict_y = ctable.decode(predictions[i],calc_argmax=False)
    correct_y = ctable.decode(test_y[i])
    if predict_y == correct_y:
        predict_right+=1
accuracy = predict_right / test_y.shape[0]
print("combine model accuracy:",accuracy)
    

combine model accuracy: 0.9254
