In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
from six.moves import range
import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000
DIGITS = 3
REVERSE = False
MAXLEN = DIGITS + 1 + DIGITS
chars = '0123456789* '
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '*',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

# Data Generation

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}*{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a * b)
    ans += ' ' * (DIGITS + 3 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80000


In [8]:
print(questions[:5], expected[:5])

['974*848', '8*49   ', '756*88 ', '76*53  ', '8*232  '] ['825952', '392   ', '66528 ', '4028  ', '1856  ']


# Processing

In [9]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 3, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 3)

Vectorization...


In [10]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
split_size = int(TRAINING_SIZE*0.75)
train_x = x[:split_size]
train_y = y[:split_size]
test_x = x[split_size:]
test_y = y[split_size:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(54000, 7, 12)
(54000, 6, 12)
Validation Data:
(6000, 7, 12)
(6000, 6, 12)
Testing Data:
(20000, 7, 12)
(20000, 6, 12)


In [11]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False  True False False False
   False]
  [False False False  True False False False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False  True False False False
   False]
  [False False False False False False  True False False False False
   False]
  [False False False False False False False False  True False False
   False]]

 [[False False False False False False False  True False False False
   False]
  [False False False False False False False False False False False
    True]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False False False  True
   False]
  [False False False False False False False  True False False False
   False]
  [False False False False False False Fal

# Build Model

In [12]:
print('Build model...')
model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last hidden state of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
model.add(layers.RepeatVector(DIGITS + 3))
# The decoder RNN could be multiple layers stacked or a single layer.
for _ in range(LAYERS):
    # By setting return_sequences to True, return not only the last output but
    # all the outputs so far in the form of (num_samples, timesteps,
    # output_dim). This is necessary as TimeDistributed in the below expects
    # the first dimension to be the timesteps.
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

# Apply a dense layer to the every temporal slice of an input. For each of step
# of the output sequence, decide which character should be chosen.
model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

model.summary()

Build model...
Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 6, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 6, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 6, 12)             1548      
_________________________________________________________________
activation_1 (Activation)    (None, 6, 12)             0         
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


# Training

In [13]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 460*238 T 109480 [91m☒[0m 166660
Q 886*31  T 27466  [91m☒[0m 14744 
Q 62*945  T 58590  [91m☒[0m 15550 
Q 482*856 T 412592 [91m☒[0m 155520
Q 352*84  T 29568  [91m☒[0m 15524 
Q 673*75  T 50475  [91m☒[0m 15555 
Q 21*262  T 5502   [91m☒[0m 1264  
Q 70*559  T 39130  [91m☒[0m 15555 
Q 51*888  T 45288  [91m☒[0m 14664 
Q 899*16  T 14384  [91m☒[0m 14774 

--------------------------------------------------
Iteration 1
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 93*47   T 4371   [91m☒[0m 1996  
Q 56*64   T 3584   [91m☒[0m 1444  
Q 59*159  T 9381   [91m☒[0m 14955 
Q 218*19  T 4142   [91m☒[0m 1196  
Q 785*0   T 0      [91m☒[0m 200   
Q 975*411 T 400725 [91m☒[0m 144475
Q 768*503 T 386304 [91m☒[0m 344970

Q 18*340  T 6120   [91m☒[0m 6440  
Q 39*968  T 37752  [91m☒[0m 37048 
Q 348*298 T 103704 [91m☒[0m 100088
Q 308*89  T 27412  [91m☒[0m 27448 
Q 740*41  T 30340  [91m☒[0m 30440 
Q 573*64  T 36672  [91m☒[0m 36044 
Q 22*372  T 8184   [91m☒[0m 8444  
Q 7*191   T 1337   [91m☒[0m 1307  
Q 245*98  T 24010  [91m☒[0m 24890 
Q 913*34  T 31042  [91m☒[0m 31444 

--------------------------------------------------
Iteration 13
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 959*3   T 2877   [91m☒[0m 2981  
Q 387*72  T 27864  [91m☒[0m 28176 
Q 705*96  T 67680  [91m☒[0m 66120 
Q 38*91   T 3458   [91m☒[0m 3584  
Q 945*950 T 897750 [91m☒[0m 837750
Q 54*964  T 52056  [91m☒[0m 53154 
Q 0*880   T 0      [92m☑[0m 0     
Q 57*82   T 4674   [91m☒[0m 4718  
Q 392*538 T 210896 [91m☒[0m 219772
Q 906*12  T 10872  [91m☒[0m 11774 

--------------------------------------------------
Iteration 14
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 19*350  T 

Q 954*39  T 37206  [91m☒[0m 36966 
Q 57*30   T 1710   [91m☒[0m 1790  
Q 4*374   T 1496   [91m☒[0m 1456  
Q 22*252  T 5544   [91m☒[0m 5224  
Q 74*286  T 21164  [91m☒[0m 21104 
Q 766*56  T 42896  [91m☒[0m 42956 
Q 9*139   T 1251   [91m☒[0m 1211  
Q 31*782  T 24242  [91m☒[0m 23902 
Q 44*425  T 18700  [91m☒[0m 18400 
Q 406*2   T 812    [92m☑[0m 812   

--------------------------------------------------
Iteration 26
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 61*286  T 17446  [91m☒[0m 17426 
Q 2*972   T 1944   [91m☒[0m 1964  
Q 99*266  T 26334  [91m☒[0m 26764 
Q 940*85  T 79900  [91m☒[0m 79300 
Q 402*0   T 0      [92m☑[0m 0     
Q 427*789 T 336903 [91m☒[0m 331373
Q 150*176 T 26400  [91m☒[0m 23300 
Q 546*190 T 103740 [91m☒[0m 102060
Q 650*26  T 16900  [91m☒[0m 17100 
Q 67*791  T 52997  [91m☒[0m 53737 

--------------------------------------------------
Iteration 27
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 409*760 T 

Q 58*389  T 22562  [91m☒[0m 22202 
Q 848*8   T 6784   [91m☒[0m 6704  
Q 7*151   T 1057   [91m☒[0m 1037  
Q 146*16  T 2336   [91m☒[0m 2476  
Q 179*805 T 144095 [91m☒[0m 142835
Q 489*700 T 342300 [91m☒[0m 347700
Q 596*90  T 53640  [91m☒[0m 53840 
Q 954*195 T 186030 [91m☒[0m 183930
Q 115*58  T 6670   [91m☒[0m 6970  
Q 38*807  T 30666  [91m☒[0m 30906 

--------------------------------------------------
Iteration 40
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 504*56  T 28224  [91m☒[0m 27784 
Q 2*411   T 822    [92m☑[0m 822   
Q 693*57  T 39501  [91m☒[0m 39991 
Q 48*804  T 38592  [91m☒[0m 38252 
Q 410*981 T 402210 [91m☒[0m 403310
Q 5*69    T 345    [92m☑[0m 345   
Q 438*70  T 30660  [91m☒[0m 30260 
Q 740*28  T 20720  [92m☑[0m 20720 
Q 417*567 T 236439 [91m☒[0m 239999
Q 30*507  T 15210  [91m☒[0m 15010 

--------------------------------------------------
Iteration 41
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 32*536  T 

Q 74*664  T 49136  [91m☒[0m 48256 
Q 428*657 T 281196 [91m☒[0m 279336
Q 769*252 T 193788 [91m☒[0m 193348
Q 81*547  T 44307  [91m☒[0m 44737 
Q 697*333 T 232101 [91m☒[0m 231331
Q 617*669 T 412773 [91m☒[0m 419133
Q 85*151  T 12835  [92m☑[0m 12835 
Q 565*99  T 55935  [91m☒[0m 55985 
Q 196*396 T 77616  [91m☒[0m 79936 
Q 109*973 T 106057 [91m☒[0m 104887

--------------------------------------------------
Iteration 53
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 831*52  T 43212  [91m☒[0m 42412 
Q 81*984  T 79704  [91m☒[0m 79564 
Q 97*751  T 72847  [91m☒[0m 72927 
Q 531*30  T 15930  [91m☒[0m 15830 
Q 90*305  T 27450  [91m☒[0m 27950 
Q 90*103  T 9270   [91m☒[0m 8870  
Q 97*799  T 77503  [91m☒[0m 77333 
Q 37*459  T 16983  [91m☒[0m 16523 
Q 966*627 T 605682 [91m☒[0m 605822
Q 94*771  T 72474  [91m☒[0m 72514 

--------------------------------------------------
Iteration 54
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 175*236 T 

Q 381*71  T 27051  [91m☒[0m 27911 
Q 81*705  T 57105  [91m☒[0m 57465 
Q 21*536  T 11256  [91m☒[0m 11136 
Q 96*301  T 28896  [91m☒[0m 28416 
Q 716*853 T 610748 [91m☒[0m 611888
Q 463*6   T 2778   [91m☒[0m 2758  
Q 23*822  T 18906  [91m☒[0m 18326 
Q 212*242 T 51304  [91m☒[0m 51044 
Q 43*121  T 5203   [91m☒[0m 5283  
Q 21*223  T 4683   [91m☒[0m 4623  

--------------------------------------------------
Iteration 67
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 732*740 T 541680 [91m☒[0m 542280
Q 152*24  T 3648   [91m☒[0m 3608  
Q 921*654 T 602334 [91m☒[0m 601334
Q 229*54  T 12366  [91m☒[0m 12386 
Q 65*548  T 35620  [91m☒[0m 35420 
Q 39*755  T 29445  [91m☒[0m 29845 
Q 996*26  T 25896  [91m☒[0m 26996 
Q 277*140 T 38780  [91m☒[0m 38880 
Q 226*676 T 152776 [91m☒[0m 153976
Q 95*833  T 79135  [91m☒[0m 78385 

--------------------------------------------------
Iteration 68
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 437*37  T 

Q 14*17   T 238    [92m☑[0m 238   
Q 97*121  T 11737  [92m☑[0m 11737 
Q 564*0   T 0      [92m☑[0m 0     
Q 25*107  T 2675   [91m☒[0m 2025  
Q 572*99  T 56628  [91m☒[0m 56048 
Q 39*26   T 1014   [91m☒[0m 1024  
Q 306*131 T 40086  [91m☒[0m 40426 
Q 207*942 T 194994 [91m☒[0m 192234
Q 203*653 T 132559 [91m☒[0m 133499
Q 909*581 T 528129 [91m☒[0m 526489

--------------------------------------------------
Iteration 80
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 7*340   T 2380   [92m☑[0m 2380  
Q 645*82  T 52890  [91m☒[0m 53690 
Q 221*576 T 127296 [91m☒[0m 127516
Q 28*600  T 16800  [92m☑[0m 16800 
Q 138*288 T 39744  [91m☒[0m 40644 
Q 309*426 T 131634 [91m☒[0m 120654
Q 96*301  T 28896  [91m☒[0m 28676 
Q 331*874 T 289294 [91m☒[0m 287014
Q 226*58  T 13108  [91m☒[0m 13068 
Q 106*80  T 8480   [91m☒[0m 8580  

--------------------------------------------------
Iteration 81
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 769*507 T 

Q 792*7   T 5544   [91m☒[0m 5524  
Q 90*130  T 11700  [92m☑[0m 11700 
Q 535*37  T 19795  [91m☒[0m 29345 
Q 902*611 T 551122 [91m☒[0m 541222
Q 79*515  T 40685  [91m☒[0m 40285 
Q 705*13  T 9165   [92m☑[0m 9165  
Q 656*5   T 3280   [92m☑[0m 3280  
Q 492*101 T 49692  [91m☒[0m 49912 
Q 87*54   T 4698   [91m☒[0m 4778  
Q 96*253  T 24288  [91m☒[0m 24408 

--------------------------------------------------
Iteration 94
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 28*399  T 11172  [91m☒[0m 11152 
Q 5*271   T 1355   [92m☑[0m 1355  
Q 397*106 T 42082  [91m☒[0m 42862 
Q 628*576 T 361728 [91m☒[0m 364128
Q 47*845  T 39715  [91m☒[0m 39415 
Q 243*431 T 104733 [91m☒[0m 105733
Q 27*976  T 26352  [91m☒[0m 26472 
Q 509*904 T 460136 [91m☒[0m 459716
Q 90*492  T 44280  [92m☑[0m 44280 
Q 34*20   T 680    [92m☑[0m 680   

--------------------------------------------------
Iteration 95
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
Q 2*537   T 

# Testing

In [14]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
#####################################################

predictions = model.predict_classes(test_x)    

MSG : Prediction


In [16]:
predict_right = 0
for i in range(test_y.shape[0]):
    predict_y = ctable.decode(predictions[i],calc_argmax=False)
    correct_y = ctable.decode(test_y[i])
    if predict_y == correct_y:
        predict_right+=1
accuracy = predict_right / test_y.shape[0]
print("multiplication model accuracy:",accuracy)
    

multiplication model accuracy: 0.1781
