# An implementation of sequence to sequence learning for performind addition

Build a RNN sequence to sequence (encoder-decoder) model to learn addition.

The code are pretty much copied from
https://github.com/keras-team/keras/blob/master/examples/addition_rnn.py

In [1]:
import tensorflow as tf
sess = tf.Session(config=tf.ConfigProto(log_device_placement=True))


  from ._conv import register_converters as _register_converters


In [2]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

Using TensorFlow backend.


### define classes for one-hot encoding

In [3]:
class CharacterTable(object):
    ''' Given a set of characters:
        + Encode them into a one hot interger representation
        + Decode the one hot integer representation to their character output
        + Decode a vector of probablities to their character output
    '''
    def __init__(self, chars):
        ''' 
        # Arguments:
            chars: Characters that can apeear in the input.
        '''
        self.chars = sorted(set(chars))
        self.char_indices = dict((c,i) for i,c in enumerate(self.chars))
        self.indices_char = dict((i,c) for i,c in enumerate(self.chars))
        
    def encode(self, C, num_rows):
        '''One hot encode given string C
        #Arguments:
            num_rows: number of rows int he returned on hot encoding.
        '''
        x = np.zeros((num_rows, len(self.chars)))
        for i,c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax = True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[ic] for ic in x)

In [4]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

## Generate training/testing data

In [5]:
# Parameters for the model and dataset.
TRAINING_SIZE = 50000
DIGITS = 3
REVERSE = False

In [6]:
# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS

In [7]:
chars = '0123456789+ '
ctable = CharacterTable(chars)

In [8]:
print('Generating data...')
questions = []
expected = []
seen = set()
while len(questions)<TRAINING_SIZE:
    if (len(questions)%10000==0):
        print('%d/%d data has been generated' % (len(questions),TRAINING_SIZE))
    f = lambda: int(''.join(np.random.choice(list('0123456789'))
                           for i in range(np.random.randint(1,DIGITS+1))))
    a, b = f(), f()
    
    #skip addition questions we've already seen
    key = tuple(sorted((a,b)))
    if key in seen:
        continue
    seen.add(key)
    
    #pad the data with spaces such that it is always MAXLEN
    q = '{}+{}'.format(a,b)
    query = q+' '*(MAXLEN-len(q))
    ans = str(a+b)
    ans += ''*(DIGITS+1-len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition question:', len(questions))

Generating data...
0/50000 data has been generated
10000/50000 data has been generated
20000/50000 data has been generated
20000/50000 data has been generated
20000/50000 data has been generated
30000/50000 data has been generated
40000/50000 data has been generated
Total addition question: 50000


In [11]:
print('vectorization..')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype = np.bool)
y = np.zeros((len(questions), DIGITS+1, len(chars)), dtype = np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS+1)

vectorization..


In [12]:
#shuffle (x,y)
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]
# Explicitly set apart 10% for validation data that we never train over.
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]


print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)


Training Data:
(45000, 7, 12)
(45000, 4, 12)
Validation Data:
(5000, 7, 12)
(5000, 4, 12)


### Build models

In [13]:
# Try replacing GRU, or SimpleRNN.
RNN = layers.SimpleRNN
HIDDEN_SIZE = 64
BATCH_SIZE = 128
LAYERS = 1

In [14]:
print('Build model...')
model = Sequential()

#encode the input using a RNN, producing an output of HIDDEN_SIZE
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape = (MAXLEN, len(chars))))

#As the decoder RNN's input, repeatedly provide with the last hidden state of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 1))

# The decoder RNN could be multiple layers staked or a single layer
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences = True))

model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer = 'adam',
              metrics = ['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
simple_rnn_1 (SimpleRNN)     (None, 64)                4928      
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 64)             0         
_________________________________________________________________
simple_rnn_2 (SimpleRNN)     (None, 4, 64)             8256      
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             780       
_________________________________________________________________
activation_1 (Activation)    (None, 4, 12)             0         
Total params: 13,964
Trainable params: 13,964
Non-trainable params: 0
_________________________________________________________________


In [15]:
for iteration in range(1,50):
    print('-'*20+'Interation:%d' % iteration + '-'*20)
    model.fit(x_train,y_train,batch_size = BATCH_SIZE,
                epochs = 10,
                validation_data = (x_val,y_val))
    
    #select 5 samples fromthe validation set at random to visualize erros
    #if iteration % 20!=0:
    #    continue
    for i in range(5):
        ind = np.random.randint(0,len(x_val))
        rowx,rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose = 0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax = False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok+ '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)

--------------------Interation:1--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 481+745 T 1226 [91m☒[0m 1235
Q 782+803 T 1585 [91m☒[0m 1695
Q 61+787  T 848  [91m☒[0m 8490
Q 9+69    T 78   [91m☒[0m 6046
Q 942+667 T 1609 [91m☒[0m 1510
--------------------Interation:2--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 959+913 T 1872 [91m☒[0m 1860
Q 46+381  T 427  [91m☒[0m 4286
Q 81+977  T 1058 [91m☒[0m 1068
Q 69+899  T 968  [91m☒[0m 9688
Q 4+518   T 522  [91m☒[0m 5224
--------------------Interation:3--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 305+47  T 352  [91m☒[0m 352

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 79+247  T 326  [91m☒[0m 3266
Q 900+767 T 1667 [91m☒[0m 1668
Q 981+801 T 1782 [92m☑[0m 1782
Q 133+363 T 496  [91m☒[0m 4965
Q 115+432 T 547  [91m☒[0m 5476
--------------------Interation:7--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 6+195   T 201  [91m☒[0m 2019
Q 648+7   T 655  [91m☒[0m 6565
Q 710+1   T 711  [91m☒[0m 7110
Q 100+6   T 106  [91m☒[0m 1068
Q 134+106 T 240  [91m☒[0m 2400
--------------------Interation:8--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 304+385 T 689  [91m☒[0m 6898
Q 2+392   T 394  [91m☒[0m 3945
Q 658+35  T 693  [91m☒[0m 6935
Q 300+95  T 395  [91m☒[0m 3954
Q 294+8   T 

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 31+447  T 478  [91m☒[0m 4786
Q 98+425  T 523  [91m☒[0m 5234
Q 437+984 T 1421 [92m☑[0m 1421
Q 929+50  T 979  [91m☒[0m 9798
Q 618+590 T 1208 [91m☒[0m 1229
--------------------Interation:12--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 226+624 T 850  [91m☒[0m 8409
Q 154+99  T 253  [91m☒[0m 2534
Q 883+5   T 888  [91m☒[0m 8888
Q 915+19  T 934  [91m☒[0m 9345
Q 96+798  T 894  [91m☒[0m 8845
--------------------Interation:13--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 751+7   T 758  [91m☒[0m 7588
Q 481+45  T 526  [91m☒[0m 5265
Q 19+278  T 297  [91m☒[0m 2976
Q 190+975 T 1165 [91m☒[0m 1175
Q 468+18  

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 65+439  T 504  [91m☒[0m 5045
Q 35+914  T 949  [91m☒[0m 9499
Q 52+550  T 602  [91m☒[0m 6021
Q 34+222  T 256  [91m☒[0m 2565
Q 796+5   T 801  [91m☒[0m 8010
--------------------Interation:17--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 735+387 T 1122 [92m☑[0m 1122
Q 29+90   T 119  [91m☒[0m 1198
Q 584+80  T 664  [91m☒[0m 6648
Q 511+4   T 515  [91m☒[0m 5154
Q 513+46  T 559  [91m☒[0m 5596
--------------------Interation:18--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 960+624 T 1584 [91m☒[0m 1684
Q 52+722  T 774  [91m☒[0m 7744
Q 358+956 T 1314 [92m☑[0m 1314
Q 304+38  T 342  [91m☒[0m 3426
Q 407+95  

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 505+932 T 1437 [92m☑[0m 1437
Q 339+95  T 434  [91m☒[0m 4345
Q 41+88   T 129  [91m☒[0m 1290
Q 477+716 T 1193 [92m☑[0m 1193
Q 55+99   T 154  [91m☒[0m 1545
--------------------Interation:22--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 7+821   T 828  [91m☒[0m 8284
Q 173+41  T 214  [91m☒[0m 2143
Q 19+149  T 168  [91m☒[0m 1682
Q 531+553 T 1084 [92m☑[0m 1084
Q 599+2   T 601  [91m☒[0m 6010
--------------------Interation:23--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 652+458 T 1110 [92m☑[0m 1110
Q 21+40   T 61   [91m☒[0m 6111
Q 635+87  T 722  [91m☒[0m 7224
Q 425+67  T 492  [91m☒[0m 4928
Q 584+37  

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 61+391  T 452  [91m☒[0m 4523
Q 66+68   T 134  [91m☒[0m 1345
Q 310+59  T 369  [91m☒[0m 3691
Q 21+40   T 61   [91m☒[0m 6122
Q 773+49  T 822  [91m☒[0m 8223
--------------------Interation:27--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 960+186 T 1146 [92m☑[0m 1146
Q 0+575   T 575  [91m☒[0m 5756
Q 470+25  T 495  [91m☒[0m 4852
Q 856+62  T 918  [91m☒[0m 9189
Q 744+738 T 1482 [92m☑[0m 1482
--------------------Interation:28--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 880+71  T 951  [91m☒[0m 9510
Q 679+517 T 1196 [91m☒[0m 1296
Q 413+79  T 492  [91m☒[0m 4923
Q 39+58   T 97   [91m☒[0m 9775
Q 135+86  

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 5+733   T 738  [91m☒[0m 7389
Q 1+171   T 172  [91m☒[0m 1720
Q 819+884 T 1703 [92m☑[0m 1703
Q 413+79  T 492  [91m☒[0m 4923
Q 771+28  T 799  [91m☒[0m 7990
--------------------Interation:32--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 64+993  T 1057 [92m☑[0m 1057
Q 5+37    T 42   [91m☒[0m 4208
Q 98+58   T 156  [91m☒[0m 1565
Q 249+5   T 254  [91m☒[0m 2545
Q 533+77  T 610  [91m☒[0m 6000
--------------------Interation:33--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 7+990   T 997  [91m☒[0m 9978
Q 515+8   T 523  [91m☒[0m 5237
Q 78+432  T 510  [91m☒[0m 5103
Q 31+715  T 746  [91m☒[0m 7468
Q 832+941 

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 49+956  T 1005 [92m☑[0m 1005
Q 534+670 T 1204 [92m☑[0m 1204
Q 27+150  T 177  [91m☒[0m 1673
Q 811+958 T 1769 [91m☒[0m 1779
Q 95+918  T 1013 [92m☑[0m 1013
--------------------Interation:37--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 655+84  T 739  [91m☒[0m 7390
Q 620+886 T 1506 [92m☑[0m 1506
Q 700+147 T 847  [91m☒[0m 8476
Q 7+322   T 329  [91m☒[0m 3294
Q 80+38   T 118  [91m☒[0m 1186
--------------------Interation:38--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 988+61  T 1049 [92m☑[0m 1049
Q 359+729 T 1088 [92m☑[0m 1088
Q 582+593 T 1175 [91m☒[0m 1176
Q 5+772   T 777  [91m☒[0m 7777
Q 796+47  

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 945+355 T 1300 [91m☒[0m 1200
Q 361+57  T 418  [91m☒[0m 4183
Q 238+385 T 623  [91m☒[0m 6239
Q 149+868 T 1017 [91m☒[0m 1007
Q 785+20  T 805  [91m☒[0m 7057
--------------------Interation:42--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 849+33  T 882  [91m☒[0m 8825
Q 417+2   T 419  [91m☒[0m 4194
Q 1+794   T 795  [91m☒[0m 7957
Q 617+408 T 1025 [92m☑[0m 1025
Q 29+893  T 922  [91m☒[0m 9222
--------------------Interation:43--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 492+867 T 1359 [92m☑[0m 1359
Q 883+63  T 946  [91m☒[0m 9466
Q 753+9   T 762  [91m☒[0m 7622
Q 801+31  T 832  [91m☒[0m 8328
Q 373+70  

Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 716+636 T 1352 [92m☑[0m 1352
Q 87+700  T 787  [91m☒[0m 7871
Q 69+692  T 761  [91m☒[0m 7610
Q 76+369  T 445  [91m☒[0m 4454
Q 532+496 T 1028 [92m☑[0m 1028
--------------------Interation:47--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 81+340  T 421  [91m☒[0m 4213
Q 81+658  T 739  [91m☒[0m 7390
Q 432+25  T 457  [91m☒[0m 4575
Q 6+270   T 276  [91m☒[0m 2761
Q 768+43  T 811  [91m☒[0m 8114
--------------------Interation:48--------------------
Train on 45000 samples, validate on 5000 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Q 911+5   T 916  [91m☒[0m 9166
Q 0+263   T 263  [91m☒[0m 2632
Q 695+3   T 698  [91m☒[0m 7987
Q 233+919 T 1152 [92m☑[0m 1152
Q 699+48  

In [None]:
from keras import backend as K
K.tensorflow_backend._get_available_gpus()