In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 8000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789+ '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

# Data Generation

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 8000


In [8]:
print(questions[:5], expected[:5])

['796+3  ', '4+3    ', '7+48   ', '631+47 ', '8+3    '] ['799 ', '7   ', '55  ', '678 ', '11  ']


# Processing

In [9]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [10]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(7200, 7, 12)
(7200, 4, 12)
Validation Data:
(800, 7, 12)
(800, 4, 12)
Testing Data:
(0, 7, 12)
(0, 4, 12)


In [11]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False  True False False False False False False False
   False]
  [False False False False False False False False  True False False
   False]
  [False False False False False False False False False  True False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False  True False False False
   False]
  [ True False False False False False False False False False False
   False]
  [ True False False False False False False False False False False
   False]]

 [[False False False False False  True False False False False False
   False]
  [False False False False False False False False False False False
    True]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False  True False False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False False  Tr

# Build Model

In [12]:
print('Build model...')
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
_________________________________________________________________
activation_1 (Activation)    (None, 4, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


# Training

In [13]:
for iteration in range(50):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 7200 samples, validate on 800 samples
Epoch 1/1
Q 14+19   T 33   [91m☒[0m 1   
Q 8+27    T 35   [91m☒[0m 1   
Q 61+117  T 178  [91m☒[0m 11  
Q 621+359 T 980  [91m☒[0m 11  
Q 755+90  T 845  [91m☒[0m 1   
Q 70+461  T 531  [91m☒[0m 11  
Q 911+326 T 1237 [91m☒[0m 11  
Q 90+4    T 94   [91m☒[0m 1   
Q 483+7   T 490  [91m☒[0m 1   
Q 396+1   T 397  [91m☒[0m 1   

--------------------------------------------------
Iteration 1
Train on 7200 samples, validate on 800 samples
Epoch 1/1
Q 0+394   T 394  [91m☒[0m 11  
Q 85+502  T 587  [91m☒[0m 116 
Q 9+800   T 809  [91m☒[0m 114 
Q 4+152   T 156  [91m☒[0m 11  
Q 465+0   T 465  [91m☒[0m 11  
Q 498+963 T 1461 [91m☒[0m 1136
Q 899+8   T 907  [91m☒[0m 116 
Q 613+15  T 628  [91m☒[0m 116 
Q 66+47   T 113  [91m☒[0m 11  
Q 142+114 T 256  [91m☒[0m 116 

--------------------------------------------------
Iteration 2
Train on 7200 samples, validate on

Q 30+57   T 87   [91m☒[0m 47  
Q 56+97   T 153  [91m☒[0m 663 
Q 25+48   T 73   [91m☒[0m 55  
Q 422+4   T 426  [91m☒[0m 45  
Q 58+23   T 81   [91m☒[0m 33  
Q 0+394   T 394  [91m☒[0m 33  
Q 92+23   T 115  [91m☒[0m 33  
Q 571+88  T 659  [91m☒[0m 854 
Q 218+63  T 281  [91m☒[0m 229 
Q 4+767   T 771  [91m☒[0m 742 

--------------------------------------------------
Iteration 15
Train on 7200 samples, validate on 800 samples
Epoch 1/1
Q 60+34   T 94   [91m☒[0m 639 
Q 0+995   T 995  [91m☒[0m 900 
Q 69+58   T 127  [91m☒[0m 663 
Q 547+333 T 880  [91m☒[0m 119 
Q 668+70  T 738  [91m☒[0m 763 
Q 93+19   T 112  [92m☑[0m 112 
Q 75+79   T 154  [91m☒[0m 164 
Q 898+6   T 904  [91m☒[0m 986 
Q 18+56   T 74   [91m☒[0m 111 
Q 96+210  T 306  [91m☒[0m 109 

--------------------------------------------------
Iteration 16
Train on 7200 samples, validate on 800 samples
Epoch 1/1
Q 1+561   T 562  [91m☒[0m 11  
Q 48+38   T 86   [91m☒[0m 48  
Q 804+76  T 880  [91m☒[0m 14

Q 8+249   T 257  [91m☒[0m 255 
Q 96+7    T 103  [91m☒[0m 10  
Q 196+8   T 204  [91m☒[0m 197 
Q 79+745  T 824  [91m☒[0m 722 
Q 63+740  T 803  [91m☒[0m 700 
Q 399+97  T 496  [91m☒[0m 405 
Q 52+7    T 59   [91m☒[0m 72  
Q 725+12  T 737  [91m☒[0m 777 
Q 58+798  T 856  [91m☒[0m 864 
Q 52+7    T 59   [91m☒[0m 72  

--------------------------------------------------
Iteration 29
Train on 7200 samples, validate on 800 samples
Epoch 1/1
Q 1+277   T 278  [91m☒[0m 284 
Q 675+91  T 766  [91m☒[0m 761 
Q 595+34  T 629  [91m☒[0m 691 
Q 44+31   T 75   [91m☒[0m 45  
Q 550+9   T 559  [92m☑[0m 559 
Q 12+1    T 13   [91m☒[0m 22  
Q 9+869   T 878  [91m☒[0m 964 
Q 203+85  T 288  [91m☒[0m 292 
Q 40+370  T 410  [91m☒[0m 487 
Q 63+86   T 149  [91m☒[0m 141 

--------------------------------------------------
Iteration 30
Train on 7200 samples, validate on 800 samples
Epoch 1/1
Q 626+604 T 1230 [91m☒[0m 1211
Q 860+7   T 867  [91m☒[0m 866 
Q 30+30   T 60   [91m☒[0m 33

Q 95+21   T 116  [91m☒[0m 117 
Q 10+37   T 47   [91m☒[0m 78  
Q 7+917   T 924  [91m☒[0m 720 
Q 4+74    T 78   [92m☑[0m 78  
Q 0+920   T 920  [91m☒[0m 900 
Q 21+281  T 302  [91m☒[0m 203 
Q 338+345 T 683  [91m☒[0m 621 
Q 52+32   T 84   [91m☒[0m 85  
Q 1+641   T 642  [91m☒[0m 652 
Q 278+367 T 645  [91m☒[0m 965 

--------------------------------------------------
Iteration 44
Train on 7200 samples, validate on 800 samples
Epoch 1/1
Q 66+393  T 459  [91m☒[0m 411 
Q 84+13   T 97   [91m☒[0m 107 
Q 890+0   T 890  [91m☒[0m 808 
Q 2+222   T 224  [91m☒[0m 233 
Q 793+8   T 801  [91m☒[0m 791 
Q 83+778  T 861  [92m☑[0m 861 
Q 899+8   T 907  [92m☑[0m 907 
Q 898+6   T 904  [91m☒[0m 974 
Q 8+747   T 755  [92m☑[0m 755 
Q 77+672  T 749  [91m☒[0m 830 

--------------------------------------------------
Iteration 45
Train on 7200 samples, validate on 800 samples
Epoch 1/1
Q 9+315   T 324  [91m☒[0m 335 
Q 81+24   T 105  [91m☒[0m 106 
Q 550+14  T 564  [91m☒[0m 55

In [14]:
print("MSG : Prediction")
test_x = ["555+275", "860+7  ", "340+29 "]
test_y = ["830 ", "867 ", "369 "]
x = np.zeros((len(test_x), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(test_y), DIGITS + 1, len(chars)), dtype=np.bool)
for j, (i, c) in enumerate(zip(test_x, test_y)):
    x[j] = ctable.encode(i, MAXLEN)
    y[j] = ctable.encode(c, DIGITS + 1)

MSG : Prediction


# Validation

In [15]:
right = 0
preds = model.predict_classes(test_x, verbose=0)
for i in range(len(preds)):
    q = ctable.decode(test_x[i])
    correct = ctable.decode(test_y[i])
    guess = ctable.decode(preds[i], calc_argmax=False)
    print('Q', q[::-1] if REVERSE else q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print(colors.ok + '☑' + colors.close, end=' ')
        right += 1
    else:
        print(colors.fail + '☒' + colors.close, end=' ')
    print(guess)
print("MSG : Accuracy is {}".format(right / len(preds)))

AttributeError: 'str' object has no attribute 'ndim'