# 載入套件

In [1]:
from keras.models import Sequential
from keras import layers
from keras import regularizers
import keras
import numpy as np
from six.moves import range

Using TensorFlow backend.


# 加法器

### Parameters Config 參數設定

設定shell輸出文字的顏色。另外還能設定輸出文字的背景、底線、粗體等(參考:http://inpega.blogspot.com/2015/07/shell.html)

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 80000             #總樣本數
DIGITS = 3                        #所產生數字的最大位數
REVERSE = False                   #是否將數字從尾到頭反過來寫
MAXLEN = DIGITS + 1 + DIGITS      #兩個數字相加的字串最長長度
chars = '0123456789+ '            #訓練時會出現的所有字串
RNN = layers.LSTM                 #將RNN設定為LSTM
HIDDEN_SIZE = 128                 #隱藏層Neoren數
BATCH_SIZE = 128                  #Batch數
LAYERS = 1                        #層數

#### 整理個別字串成dictionary形式/對字串做encoding/對字串做decoding

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

### Data Generation 資料產生

In [7]:
questions = []       #questions: 兩個數字相加
expected = []        #expected:  兩個數字相加的和
seen = set()         #檢查新產生的數字組合是否出現過
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    # 隨機產生最高三位數的數字(0~999)
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:                        #若新產生的數字組合已經出現過了，則不加入樣本中
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b)               #將題目轉成字串形式
    query = q + ' ' * (MAXLEN - len(q))    #補上最後面的空格，使所有題目的字串長度一致
    ans = str(a + b)                       #將答案轉成字串形式
    ans += ' ' * (DIGITS + 1 - len(ans))   #補上最後面的空格，使所有答案的字串長度一致
    if REVERSE:                            #若REVERSE=TRUE，則將兩數相加的字串反過來寫
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 80000


In [8]:
#檢查產生出的資料是否正確合理
print(questions[:5], expected[:5])

['944+3  ', '8+52   ', '516+5  ', '163+8  ', '26+204 '] ['947 ', '60  ', '521 ', '171 ', '230 ']


### Processing 資料前處理

#### 將題目字串及答案字串重新編碼

In [9]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


#### 將所有資料分成1/4的訓練資料和3/4的測試資料，在訓練資料中再分成90%的訓練集與10%的驗證集

In [10]:
#隨機打散
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]

#把training data的90%作為訓練集，10%作為驗證集
split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

#分別印出訓練集、驗證集及測試集的x和y的大小
print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(18000, 7, 12)
(18000, 4, 12)
Validation Data:
(2000, 7, 12)
(2000, 4, 12)
Testing Data:
(60000, 7, 12)
(60000, 4, 12)


#### 查看輸入及輸出資料格式

In [11]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False  True False False False False False
   False]
  [False False  True False False False False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False False False  True
   False]
  [False False False False False  True False False False False False
   False]
  [ True False False False False False False False False False False
   False]]

 [[False False False False False False False False  True False False
   False]
  [False False False False False False False False False False  True
   False]
  [False False False  True False False False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False  True False False
   False]
  [False False False False False False Fal

### Build Model 模型建立

In [12]:
print('Build model...')

############################################
##### Build your own model here ############
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

############################################

model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


### Training 模型訓練

In [13]:
for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    #隨機取10筆樣本印出來，看有沒有回答正確
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 780+57  T 837  [91m☒[0m 110 
Q 790+0   T 790  [91m☒[0m 100 
Q 810+56  T 866  [91m☒[0m 120 
Q 21+81   T 102  [91m☒[0m 13  
Q 425+77  T 502  [91m☒[0m 100 
Q 909+36  T 945  [91m☒[0m 110 
Q 42+389  T 431  [91m☒[0m 120 
Q 575+693 T 1268 [91m☒[0m 1101
Q 223+93  T 316  [91m☒[0m 120 
Q 318+708 T 1026 [91m☒[0m 110 

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 61+8    T 69   [91m☒[0m 34  
Q 88+47   T 135  [91m☒[0m 107 
Q 712+420 T 1132 [91m☒[0m 104 
Q 414+312 T 726  [91m☒[0m 124 
Q 575+693 T 1268 [91m☒[0m 1117
Q 438+402 T 840  [91m☒[0m 111 
Q 20+225  T 245  [91m☒[0m 154 
Q 316+27  T 343  [91m☒[0m 154 
Q 532+725 T 1257 [91m☒[0m 111 
Q 75+155  T 230  [91m☒[0m 107 

--------------------------------------------------
Iteration 2
Train on 18000 samples, valida

Q 34+912  T 946  [91m☒[0m 943 
Q 649+156 T 805  [91m☒[0m 711 
Q 475+950 T 1425 [91m☒[0m 1436
Q 190+62  T 252  [91m☒[0m 241 
Q 199+165 T 364  [91m☒[0m 216 
Q 23+441  T 464  [91m☒[0m 465 
Q 479+1   T 480  [91m☒[0m 560 
Q 98+333  T 431  [92m☑[0m 431 
Q 94+996  T 1090 [91m☒[0m 1055
Q 50+373  T 423  [91m☒[0m 416 

--------------------------------------------------
Iteration 15
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 35+660  T 695  [91m☒[0m 699 
Q 176+32  T 208  [91m☒[0m 291 
Q 296+24  T 320  [91m☒[0m 322 
Q 1+946   T 947  [92m☑[0m 947 
Q 311+157 T 468  [91m☒[0m 411 
Q 177+307 T 484  [91m☒[0m 475 
Q 88+121  T 209  [91m☒[0m 290 
Q 7+243   T 250  [91m☒[0m 232 
Q 675+57  T 732  [92m☑[0m 732 
Q 787+780 T 1567 [91m☒[0m 1545

--------------------------------------------------
Iteration 16
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 50+775  T 825  [91m☒[0m 738 
Q 252+966 T 1218 [91m☒[0m 1200
Q 956+706 T 1662 [91m☒[0

Q 75+292  T 367  [92m☑[0m 367 
Q 19+511  T 530  [91m☒[0m 518 
Q 67+793  T 860  [92m☑[0m 860 
Q 950+939 T 1889 [91m☒[0m 1890
Q 5+405   T 410  [91m☒[0m 411 
Q 130+21  T 151  [91m☒[0m 222 
Q 923+34  T 957  [91m☒[0m 967 
Q 583+11  T 594  [92m☑[0m 594 
Q 50+218  T 268  [92m☑[0m 268 
Q 953+936 T 1889 [91m☒[0m 1899

--------------------------------------------------
Iteration 29
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 642+872 T 1514 [91m☒[0m 1513
Q 223+880 T 1103 [92m☑[0m 1103
Q 349+10  T 359  [92m☑[0m 359 
Q 68+447  T 515  [92m☑[0m 515 
Q 69+348  T 417  [91m☒[0m 416 
Q 551+3   T 554  [92m☑[0m 554 
Q 51+488  T 539  [92m☑[0m 539 
Q 898+509 T 1407 [91m☒[0m 1417
Q 25+84   T 109  [92m☑[0m 109 
Q 99+817  T 916  [91m☒[0m 917 

--------------------------------------------------
Iteration 30
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 775+167 T 942  [91m☒[0m 922 
Q 28+311  T 339  [91m☒[0m 349 
Q 621+949 T 1570 [92m☑[0

Q 833+847 T 1680 [91m☒[0m 1670
Q 90+851  T 941  [92m☑[0m 941 
Q 51+198  T 249  [92m☑[0m 249 
Q 621+149 T 770  [91m☒[0m 760 
Q 435+193 T 628  [91m☒[0m 638 
Q 223+880 T 1103 [92m☑[0m 1103
Q 523+704 T 1227 [92m☑[0m 1227
Q 2+814   T 816  [92m☑[0m 816 
Q 80+881  T 961  [92m☑[0m 961 
Q 693+39  T 732  [92m☑[0m 732 

--------------------------------------------------
Iteration 43
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 246+80  T 326  [92m☑[0m 326 
Q 989+2   T 991  [92m☑[0m 991 
Q 948+176 T 1124 [92m☑[0m 1124
Q 208+50  T 258  [92m☑[0m 258 
Q 771+55  T 826  [92m☑[0m 826 
Q 360+887 T 1247 [92m☑[0m 1247
Q 504+758 T 1262 [92m☑[0m 1262
Q 13+424  T 437  [91m☒[0m 447 
Q 46+3    T 49   [91m☒[0m 59  
Q 672+34  T 706  [92m☑[0m 706 

--------------------------------------------------
Iteration 44
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 802+604 T 1406 [91m☒[0m 1416
Q 91+77   T 168  [92m☑[0m 168 
Q 99+607  T 706  [92m☑[0

Q 118+274 T 392  [92m☑[0m 392 
Q 224+29  T 253  [92m☑[0m 253 
Q 27+328  T 355  [92m☑[0m 355 
Q 426+895 T 1321 [92m☑[0m 1321
Q 268+444 T 712  [92m☑[0m 712 
Q 687+38  T 725  [92m☑[0m 725 
Q 611+12  T 623  [92m☑[0m 623 
Q 21+515  T 536  [92m☑[0m 536 
Q 787+212 T 999  [91m☒[0m 909 
Q 719+516 T 1235 [92m☑[0m 1235

--------------------------------------------------
Iteration 57
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 269+54  T 323  [92m☑[0m 323 
Q 67+671  T 738  [91m☒[0m 739 
Q 33+59   T 92   [91m☒[0m 90  
Q 494+483 T 977  [91m☒[0m 967 
Q 387+428 T 815  [92m☑[0m 815 
Q 294+362 T 656  [92m☑[0m 656 
Q 146+107 T 253  [92m☑[0m 253 
Q 447+733 T 1180 [91m☒[0m 1170
Q 472+910 T 1382 [92m☑[0m 1382
Q 984+973 T 1957 [92m☑[0m 1957

--------------------------------------------------
Iteration 58
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 262+17  T 279  [91m☒[0m 289 
Q 202+816 T 1018 [92m☑[0m 1018
Q 758+73  T 831  [92m☑[0

Q 45+319  T 364  [92m☑[0m 364 
Q 48+257  T 305  [92m☑[0m 305 
Q 794+635 T 1429 [92m☑[0m 1429
Q 820+54  T 874  [92m☑[0m 874 
Q 87+808  T 895  [92m☑[0m 895 
Q 712+420 T 1132 [92m☑[0m 1132
Q 11+840  T 851  [91m☒[0m 841 
Q 6+931   T 937  [92m☑[0m 937 
Q 656+158 T 814  [92m☑[0m 814 
Q 122+869 T 991  [92m☑[0m 991 

--------------------------------------------------
Iteration 71
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 84+590  T 674  [92m☑[0m 674 
Q 403+759 T 1162 [92m☑[0m 1162
Q 389+264 T 653  [92m☑[0m 653 
Q 816+320 T 1136 [92m☑[0m 1136
Q 166+41  T 207  [92m☑[0m 207 
Q 298+796 T 1094 [91m☒[0m 1084
Q 288+64  T 352  [92m☑[0m 352 
Q 634+44  T 678  [92m☑[0m 678 
Q 26+23   T 49   [91m☒[0m 59  
Q 388+45  T 433  [92m☑[0m 433 

--------------------------------------------------
Iteration 72
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 842+58  T 900  [91m☒[0m 800 
Q 1+202   T 203  [92m☑[0m 203 
Q 21+114  T 135  [92m☑[0

Q 40+19   T 59   [92m☑[0m 59  
Q 34+370  T 404  [92m☑[0m 404 
Q 14+833  T 847  [92m☑[0m 847 
Q 435+8   T 443  [92m☑[0m 443 
Q 373+78  T 451  [92m☑[0m 451 
Q 700+4   T 704  [92m☑[0m 704 
Q 574+804 T 1378 [92m☑[0m 1378
Q 17+11   T 28   [91m☒[0m 37  
Q 548+51  T 599  [92m☑[0m 599 
Q 52+904  T 956  [92m☑[0m 956 

--------------------------------------------------
Iteration 85
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 723+22  T 745  [92m☑[0m 745 
Q 788+40  T 828  [92m☑[0m 828 
Q 13+654  T 667  [92m☑[0m 667 
Q 2+956   T 958  [92m☑[0m 958 
Q 277+3   T 280  [92m☑[0m 280 
Q 34+942  T 976  [92m☑[0m 976 
Q 389+53  T 442  [92m☑[0m 442 
Q 272+29  T 301  [92m☑[0m 301 
Q 478+595 T 1073 [92m☑[0m 1073
Q 807+137 T 944  [91m☒[0m 934 

--------------------------------------------------
Iteration 86
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 507+383 T 890  [91m☒[0m 990 
Q 162+712 T 874  [92m☑[0m 874 
Q 46+899  T 945  [92m☑[0

Q 690+60  T 750  [92m☑[0m 750 
Q 19+137  T 156  [92m☑[0m 156 
Q 39+835  T 874  [92m☑[0m 874 
Q 734+736 T 1470 [91m☒[0m 1460
Q 938+32  T 970  [91m☒[0m 960 
Q 84+360  T 444  [92m☑[0m 444 
Q 156+407 T 563  [92m☑[0m 563 
Q 475+950 T 1425 [92m☑[0m 1425
Q 664+13  T 677  [92m☑[0m 677 
Q 317+22  T 339  [92m☑[0m 339 

--------------------------------------------------
Iteration 99
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 54+748  T 802  [92m☑[0m 802 
Q 91+908  T 999  [92m☑[0m 999 
Q 952+35  T 987  [92m☑[0m 987 
Q 416+641 T 1057 [92m☑[0m 1057
Q 321+2   T 323  [92m☑[0m 323 
Q 971+755 T 1726 [92m☑[0m 1726
Q 831+30  T 861  [92m☑[0m 861 
Q 438+402 T 840  [92m☑[0m 840 
Q 658+738 T 1396 [92m☑[0m 1396
Q 124+36  T 160  [92m☑[0m 160 


### Testing 模型測試

In [14]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
right = 0
predictions = model.predict_classes(test_x, verbose = 1) 
for i in range(60000):
    correct = ctable.decode(test_y[i])
    guess = ctable.decode(predictions[i], calc_argmax=False)
    if correct == guess:
        right = right + 1

acc = right/60000
print("Accuracy : ", acc)
#####################################################

MSG : Prediction
Accuracy :  0.9351833333333334


Validation Accuracy為0.9826，Test Accuracy為0.9352，可以發現模型配得還不錯。

***

# 減法器

### Parameters Config

In [15]:
chars = '0123456789- '
ctable = CharacterTable(chars)
ctable.indices_char

{0: ' ',
 1: '-',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

### Data Generation

#### 產生資料方式跟加法器相同
#### 唯一差別是:由於規定a必須大於等於b，使相減後的差大於等於0，故將a小於b的組合作a,b互換的動作

In [16]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    #若a小於b，則令a、b互換
    if a < b :
        a = a + b
        b = a - b
        a = a - b
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}-{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a - b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)
print('Total substraction questions:', len(questions))

Generating data...
Total substraction questions: 80000


In [17]:
print(questions[:5], expected[:5])

['9-5    ', '107-14 ', '272-19 ', '8-0    ', '293-1  '] ['4   ', '93  ', '253 ', '8   ', '292 ']


### Processing

In [18]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [19]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(18000, 7, 12)
(18000, 4, 12)
Validation Data:
(2000, 7, 12)
(2000, 4, 12)
Testing Data:
(60000, 7, 12)
(60000, 4, 12)


In [20]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False  True False False False False False False
   False]
  [False False False False False  True False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False  True False False False
   False]
  [False False False False False False False False  True False False
   False]
  [ True False False False False False False False False False False
   False]]

 [[False False False False False False False False False False  True
   False]
  [False False False False False  True False False False False False
   False]
  [False False False False False False False  True False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False  True False False False False
   False]
  [False False False False False  True Fal

### Build Model

In [21]:
print('Build model...')

############################################
##### Build your own model here ############
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

############################################

model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_3 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_2 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_4 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_2 (TimeDist (None, 4, 12)             1548      
Total params: 205,324
Trainable params: 205,324
Non-trainable params: 0
_________________________________________________________________


### Training

模型在減法器的效果沒有很好，故將迭代次數調高至150

In [22]:
for iteration in range(150):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 899-301 T 598  [91m☒[0m 13  
Q 290-48  T 242  [91m☒[0m 13  
Q 958-391 T 567  [91m☒[0m 13  
Q 536-55  T 481  [91m☒[0m 13  
Q 579-4   T 575  [91m☒[0m 13  
Q 374-42  T 332  [91m☒[0m 13  
Q 611-568 T 43   [91m☒[0m 13  
Q 175-45  T 130  [91m☒[0m 13  
Q 303-4   T 299  [91m☒[0m 13  
Q 543-72  T 471  [91m☒[0m 13  

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 844-202 T 642  [91m☒[0m 131 
Q 596-45  T 551  [91m☒[0m 130 
Q 674-477 T 197  [91m☒[0m 131 
Q 377-180 T 197  [91m☒[0m 131 
Q 944-27  T 917  [91m☒[0m 131 
Q 890-720 T 170  [91m☒[0m 160 
Q 734-508 T 226  [91m☒[0m 331 
Q 732-2   T 730  [91m☒[0m 131 
Q 159-53  T 106  [91m☒[0m 131 
Q 994-547 T 447  [91m☒[0m 160 

--------------------------------------------------
Iteration 2
Train on 18000 samples, valida

Q 430-51  T 379  [91m☒[0m 375 
Q 897-858 T 39   [91m☒[0m 10  
Q 795-22  T 773  [91m☒[0m 772 
Q 166-58  T 108  [91m☒[0m 110 
Q 141-108 T 33   [91m☒[0m 13  
Q 659-38  T 621  [91m☒[0m 625 
Q 177-13  T 164  [91m☒[0m 155 
Q 993-25  T 968  [91m☒[0m 953 
Q 630-73  T 557  [91m☒[0m 555 
Q 521-18  T 503  [91m☒[0m 505 

--------------------------------------------------
Iteration 15
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 300-71  T 229  [91m☒[0m 246 
Q 546-519 T 27   [91m☒[0m 1   
Q 632-73  T 559  [91m☒[0m 567 
Q 937-412 T 525  [91m☒[0m 583 
Q 439-80  T 359  [91m☒[0m 355 
Q 815-5   T 810  [91m☒[0m 813 
Q 944-39  T 905  [91m☒[0m 906 
Q 194-20  T 174  [91m☒[0m 171 
Q 591-80  T 511  [92m☑[0m 511 
Q 219-58  T 161  [91m☒[0m 164 

--------------------------------------------------
Iteration 16
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 876-785 T 91   [91m☒[0m 104 
Q 125-70  T 55   [91m☒[0m 42  
Q 353-72  T 281  [91m☒[0

Q 671-603 T 68   [91m☒[0m 77  
Q 249-43  T 206  [91m☒[0m 202 
Q 515-43  T 472  [91m☒[0m 471 
Q 323-27  T 296  [92m☑[0m 296 
Q 614-4   T 610  [91m☒[0m 618 
Q 616-52  T 564  [92m☑[0m 564 
Q 153-70  T 83   [91m☒[0m 73  
Q 670-440 T 230  [91m☒[0m 237 
Q 777-713 T 64   [91m☒[0m 15  
Q 297-231 T 66   [91m☒[0m 15  

--------------------------------------------------
Iteration 29
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 655-64  T 591  [91m☒[0m 590 
Q 308-9   T 299  [91m☒[0m 290 
Q 947-263 T 684  [91m☒[0m 781 
Q 583-81  T 502  [91m☒[0m 500 
Q 784-522 T 262  [91m☒[0m 341 
Q 942-310 T 632  [91m☒[0m 630 
Q 683-268 T 415  [91m☒[0m 406 
Q 754-38  T 716  [92m☑[0m 716 
Q 675-91  T 584  [91m☒[0m 587 
Q 655-64  T 591  [91m☒[0m 590 

--------------------------------------------------
Iteration 30
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 260-70  T 190  [91m☒[0m 199 
Q 658-364 T 294  [91m☒[0m 394 
Q 576-95  T 481  [91m☒[0

Q 980-2   T 978  [92m☑[0m 978 
Q 290-175 T 115  [91m☒[0m 104 
Q 510-33  T 477  [92m☑[0m 477 
Q 223-71  T 152  [91m☒[0m 143 
Q 208-80  T 128  [92m☑[0m 128 
Q 870-46  T 824  [92m☑[0m 824 
Q 822-784 T 38   [91m☒[0m 49  
Q 924-64  T 860  [92m☑[0m 860 
Q 509-51  T 458  [92m☑[0m 458 
Q 389-41  T 348  [92m☑[0m 348 

--------------------------------------------------
Iteration 43
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 187-27  T 160  [92m☑[0m 160 
Q 222-0   T 222  [92m☑[0m 222 
Q 836-48  T 788  [92m☑[0m 788 
Q 344-94  T 250  [92m☑[0m 250 
Q 733-697 T 36   [91m☒[0m 55  
Q 719-62  T 657  [92m☑[0m 657 
Q 326-62  T 264  [92m☑[0m 264 
Q 329-35  T 294  [92m☑[0m 294 
Q 368-38  T 330  [91m☒[0m 331 
Q 862-16  T 846  [92m☑[0m 846 

--------------------------------------------------
Iteration 44
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 636-353 T 283  [92m☑[0m 283 
Q 742-664 T 78   [91m☒[0m 79  
Q 569-90  T 479  [92m☑[0

Q 795-282 T 513  [92m☑[0m 513 
Q 689-82  T 607  [92m☑[0m 607 
Q 202-7   T 195  [92m☑[0m 195 
Q 680-78  T 602  [92m☑[0m 602 
Q 709-6   T 703  [92m☑[0m 703 
Q 306-55  T 251  [92m☑[0m 251 
Q 675-25  T 650  [92m☑[0m 650 
Q 26-8    T 18   [92m☑[0m 18  
Q 175-45  T 130  [92m☑[0m 130 
Q 883-9   T 874  [92m☑[0m 874 

--------------------------------------------------
Iteration 57
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 628-368 T 260  [91m☒[0m 250 
Q 675-520 T 155  [92m☑[0m 155 
Q 206-38  T 168  [92m☑[0m 168 
Q 189-7   T 182  [92m☑[0m 182 
Q 576-95  T 481  [92m☑[0m 481 
Q 414-16  T 398  [92m☑[0m 398 
Q 647-83  T 564  [92m☑[0m 564 
Q 91-59   T 32   [92m☑[0m 32  
Q 790-70  T 720  [92m☑[0m 720 
Q 380-203 T 177  [92m☑[0m 177 

--------------------------------------------------
Iteration 58
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 639-42  T 597  [92m☑[0m 597 
Q 978-40  T 938  [92m☑[0m 938 
Q 299-18  T 281  [92m☑[0

Q 222-8   T 214  [92m☑[0m 214 
Q 430-284 T 146  [92m☑[0m 146 
Q 993-25  T 968  [92m☑[0m 968 
Q 406-12  T 394  [92m☑[0m 394 
Q 620-7   T 613  [92m☑[0m 613 
Q 13-5    T 8    [91m☒[0m 1   
Q 318-52  T 266  [92m☑[0m 266 
Q 803-78  T 725  [92m☑[0m 725 
Q 676-583 T 93   [91m☒[0m 92  
Q 814-564 T 250  [92m☑[0m 250 

--------------------------------------------------
Iteration 71
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 979-90  T 889  [92m☑[0m 889 
Q 502-50  T 452  [92m☑[0m 452 
Q 499-55  T 444  [92m☑[0m 444 
Q 869-41  T 828  [92m☑[0m 828 
Q 578-417 T 161  [92m☑[0m 161 
Q 666-9   T 657  [92m☑[0m 657 
Q 878-779 T 99   [91m☒[0m 999 
Q 994-51  T 943  [92m☑[0m 943 
Q 820-74  T 746  [92m☑[0m 746 
Q 566-91  T 475  [92m☑[0m 475 

--------------------------------------------------
Iteration 72
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 641-7   T 634  [92m☑[0m 634 
Q 443-72  T 371  [92m☑[0m 371 
Q 533-0   T 533  [92m☑[0

Q 656-56  T 600  [92m☑[0m 600 
Q 300-71  T 229  [92m☑[0m 229 
Q 935-36  T 899  [91m☒[0m 999 
Q 636-353 T 283  [92m☑[0m 283 
Q 280-63  T 217  [92m☑[0m 217 
Q 481-75  T 406  [92m☑[0m 406 
Q 561-430 T 131  [92m☑[0m 131 
Q 33-1    T 32   [91m☒[0m 31  
Q 448-392 T 56   [91m☒[0m 66  
Q 964-16  T 948  [92m☑[0m 948 

--------------------------------------------------
Iteration 85
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 836-97  T 739  [92m☑[0m 739 
Q 946-622 T 324  [92m☑[0m 324 
Q 891-72  T 819  [92m☑[0m 819 
Q 833-7   T 826  [92m☑[0m 826 
Q 556-62  T 494  [92m☑[0m 494 
Q 91-47   T 44   [92m☑[0m 44  
Q 698-83  T 615  [92m☑[0m 615 
Q 956-705 T 251  [92m☑[0m 251 
Q 299-59  T 240  [92m☑[0m 240 
Q 237-18  T 219  [92m☑[0m 219 

--------------------------------------------------
Iteration 86
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 494-142 T 352  [91m☒[0m 342 
Q 248-17  T 231  [92m☑[0m 231 
Q 522-14  T 508  [92m☑[0

Q 991-25  T 966  [92m☑[0m 966 
Q 940-20  T 920  [92m☑[0m 920 
Q 226-72  T 154  [92m☑[0m 154 
Q 403-155 T 248  [92m☑[0m 248 
Q 600-191 T 409  [92m☑[0m 409 
Q 514-72  T 442  [92m☑[0m 442 
Q 763-179 T 584  [92m☑[0m 584 
Q 251-54  T 197  [92m☑[0m 197 
Q 867-0   T 867  [92m☑[0m 867 
Q 161-15  T 146  [92m☑[0m 146 

--------------------------------------------------
Iteration 99
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 86-36   T 50   [92m☑[0m 50  
Q 945-26  T 919  [92m☑[0m 919 
Q 619-14  T 605  [92m☑[0m 605 
Q 334-55  T 279  [92m☑[0m 279 
Q 921-833 T 88   [92m☑[0m 88  
Q 499-55  T 444  [92m☑[0m 444 
Q 196-21  T 175  [92m☑[0m 175 
Q 634-47  T 587  [92m☑[0m 587 
Q 952-72  T 880  [92m☑[0m 880 
Q 546-75  T 471  [92m☑[0m 471 

--------------------------------------------------
Iteration 100
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 143-123 T 20   [91m☒[0m 10  
Q 109-24  T 85   [92m☑[0m 85  
Q 146-27  T 119  [92m☑[

Q 733-647 T 86   [92m☑[0m 86  
Q 692-63  T 629  [92m☑[0m 629 
Q 939-145 T 794  [92m☑[0m 794 
Q 753-430 T 323  [92m☑[0m 323 
Q 561-220 T 341  [92m☑[0m 341 
Q 812-213 T 599  [91m☒[0m 609 
Q 953-32  T 921  [92m☑[0m 921 
Q 962-17  T 945  [92m☑[0m 945 
Q 876-785 T 91   [92m☑[0m 91  
Q 558-345 T 213  [92m☑[0m 213 

--------------------------------------------------
Iteration 113
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 375-0   T 375  [92m☑[0m 375 
Q 145-81  T 64   [92m☑[0m 64  
Q 574-295 T 279  [92m☑[0m 279 
Q 896-45  T 851  [92m☑[0m 851 
Q 42-23   T 19   [92m☑[0m 19  
Q 752-21  T 731  [92m☑[0m 731 
Q 88-46   T 42   [92m☑[0m 42  
Q 941-90  T 851  [92m☑[0m 851 
Q 867-576 T 291  [92m☑[0m 291 
Q 914-24  T 890  [91m☒[0m 880 

--------------------------------------------------
Iteration 114
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 960-22  T 938  [92m☑[0m 938 
Q 851-6   T 845  [92m☑[0m 845 
Q 987-234 T 753  [92m☑

Q 391-192 T 199  [91m☒[0m 198 
Q 790-70  T 720  [92m☑[0m 720 
Q 364-90  T 274  [92m☑[0m 274 
Q 600-191 T 409  [92m☑[0m 409 
Q 977-4   T 973  [92m☑[0m 973 
Q 967-334 T 633  [92m☑[0m 633 
Q 719-671 T 48   [91m☒[0m 58  
Q 987-31  T 956  [92m☑[0m 956 
Q 972-52  T 920  [92m☑[0m 920 
Q 892-811 T 81   [91m☒[0m 71  

--------------------------------------------------
Iteration 127
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 633-355 T 278  [92m☑[0m 278 
Q 116-3   T 113  [92m☑[0m 113 
Q 532-85  T 447  [92m☑[0m 447 
Q 698-83  T 615  [91m☒[0m 605 
Q 725-102 T 623  [92m☑[0m 623 
Q 736-733 T 3    [92m☑[0m 3   
Q 747-33  T 714  [92m☑[0m 714 
Q 213-69  T 144  [92m☑[0m 144 
Q 917-807 T 110  [91m☒[0m 11  
Q 453-77  T 376  [91m☒[0m 366 

--------------------------------------------------
Iteration 128
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 263-98  T 165  [92m☑[0m 165 
Q 143-123 T 20   [91m☒[0m 30  
Q 924-548 T 376  [92m☑

Q 664-27  T 637  [92m☑[0m 637 
Q 468-16  T 452  [91m☒[0m 442 
Q 902-137 T 765  [91m☒[0m 775 
Q 326-52  T 274  [92m☑[0m 274 
Q 170-6   T 164  [91m☒[0m 165 
Q 848-733 T 115  [92m☑[0m 115 
Q 205-44  T 161  [92m☑[0m 161 
Q 609-0   T 609  [92m☑[0m 609 
Q 979-90  T 889  [91m☒[0m 899 
Q 867-589 T 278  [92m☑[0m 278 

--------------------------------------------------
Iteration 141
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 570-16  T 554  [92m☑[0m 554 
Q 62-62   T 0    [92m☑[0m 0   
Q 656-65  T 591  [92m☑[0m 591 
Q 879-254 T 625  [92m☑[0m 625 
Q 611-128 T 483  [91m☒[0m 493 
Q 94-81   T 13   [92m☑[0m 13  
Q 723-93  T 630  [92m☑[0m 630 
Q 63-20   T 43   [92m☑[0m 43  
Q 753-607 T 146  [92m☑[0m 146 
Q 414-53  T 361  [92m☑[0m 361 

--------------------------------------------------
Iteration 142
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 620-7   T 613  [92m☑[0m 613 
Q 140-69  T 71   [92m☑[0m 71  
Q 722-309 T 413  [92m☑

### Testing

In [23]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
right = 0
predictions = model.predict_classes(test_x, verbose = 1) 
for i in range(60000):
    correct = ctable.decode(test_y[i])
    guess = ctable.decode(predictions[i], calc_argmax=False)
    if correct == guess:
        right = right + 1

acc = right/60000
print("Accuracy : ", acc)
#####################################################

MSG : Prediction
Accuracy :  0.9229666666666667


|TRAINING_SIZE|Iterarions|Validation Accuracy|Test Accuracy|
|---|---|---|---|
|80000|100|0.9682|0.8855|
|80000|150|0.9799|0.9230|

由於迭代次數 = 100的模型準確率沒有很高，所以將迭代次數調高至150，準確率也因此提高。

***

# 加法+減法

### Parameters Config

將原本的80000筆樣本分成40000筆加法和40000筆減法去建立模型與訓練會得到蠻低的準確率(約0.58)，故將樣本數增加至160000筆(80000筆加法和80000筆減法)

In [24]:
TRAINING_SIZE = 160000
chars = '0123456789+- '
ctable = CharacterTable(chars)
ctable.indices_char

{0: ' ',
 1: '+',
 2: '-',
 3: '0',
 4: '1',
 5: '2',
 6: '3',
 7: '4',
 8: '5',
 9: '6',
 10: '7',
 11: '8',
 12: '9'}

### Data Generation

In [25]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < (TRAINING_SIZE/2):
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)

while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    #若a小於b，則令a、b互換
    if a < b :
        a = a + b
        b = a - b
        a = a - b
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}-{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a - b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)

print('Total addition&substraction questions:', len(questions))

Generating data...
Total addition&substraction questions: 160000


### Processing

In [26]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [27]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = np.concatenate([x[0:20000], x[80000:100000]])
train_y = np.concatenate([y[0:20000], y[80000:100000]])
test_x = np.concatenate([x[20000:80000], x[100000:160000]])
test_y = np.concatenate([y[20000:80000], y[100000:160000]])

#split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = np.concatenate([train_x[0:18000], train_x[20000:38000]]), np.concatenate([train_x[18000:20000], train_x[38000:40000]])
(y_train, y_val) = np.concatenate([train_y[0:18000], train_y[20000:38000]]), np.concatenate([train_y[18000:20000], train_y[38000:40000]])

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(36000, 7, 13)
(36000, 4, 13)
Validation Data:
(4000, 7, 13)
(4000, 4, 13)
Testing Data:
(120000, 7, 13)
(120000, 4, 13)


In [28]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False False False False False False False False
   False  True]
  [False False False  True False False False False False False False
   False False]
  [False False False False False False False False  True False False
   False False]
  [False False  True False False False False False False False False
   False False]
  [False False False False False False False False False  True False
   False False]
  [False False False False False False False  True False False False
   False False]
  [False False False False False False False False False  True False
   False False]]

 [[False False False False  True False False False False False False
   False False]
  [False False False False False False False False False False  True
   False False]
  [False False False False False False False  True False False False
   False False]
  [False False  True False False False False False False False False
   False False]
  [False False False False False False False False Fals

### Build Model

In [29]:
print('Build model...')

############################################
##### Build your own model here ############
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

############################################

model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 128)               72704     
_________________________________________________________________
repeat_vector_3 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_6 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_3 (TimeDist (None, 4, 13)             1677      
Total params: 205,965
Trainable params: 205,965
Non-trainable params: 0
_________________________________________________________________


### Training

In [30]:
for iteration in range(150):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 485+24  T 509  [91m☒[0m 100 
Q 446-264 T 182  [91m☒[0m 10  
Q 211-66  T 145  [91m☒[0m 10  
Q 334-151 T 183  [91m☒[0m 10  
Q 542-64  T 478  [91m☒[0m 10  
Q 289+49  T 338  [91m☒[0m 101 
Q 892+22  T 914  [91m☒[0m 100 
Q 460-85  T 375  [91m☒[0m 10  
Q 512+726 T 1238 [91m☒[0m 100 
Q 768-754 T 14   [91m☒[0m 101 

--------------------------------------------------
Iteration 1
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 495+571 T 1066 [91m☒[0m 100 
Q 633+28  T 661  [91m☒[0m 405 
Q 651-451 T 200  [91m☒[0m 24  
Q 775+23  T 798  [91m☒[0m 805 
Q 821-232 T 589  [91m☒[0m 24  
Q 809-143 T 666  [91m☒[0m 40  
Q 65+145  T 210  [91m☒[0m 405 
Q 302+129 T 431  [91m☒[0m 104 
Q 804+770 T 1574 [91m☒[0m 100 
Q 963-89  T 874  [91m☒[0m 485 

--------------------------------------------------
Iteration 2
Train on 36000 samples, valida

Q 895-49  T 846  [91m☒[0m 820 
Q 470-11  T 459  [91m☒[0m 447 
Q 400+30  T 430  [91m☒[0m 452 
Q 586+907 T 1493 [91m☒[0m 1465
Q 586+907 T 1493 [91m☒[0m 1465
Q 423+44  T 467  [92m☑[0m 467 
Q 960-785 T 175  [91m☒[0m 179 
Q 102+2   T 104  [91m☒[0m 111 
Q 731-451 T 280  [91m☒[0m 287 
Q 757-54  T 703  [91m☒[0m 710 

--------------------------------------------------
Iteration 15
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 474-298 T 176  [91m☒[0m 186 
Q 88+979  T 1067 [91m☒[0m 1072
Q 443+658 T 1101 [91m☒[0m 1190
Q 455+58  T 513  [91m☒[0m 524 
Q 34+431  T 465  [91m☒[0m 477 
Q 357+468 T 825  [91m☒[0m 811 
Q 749+526 T 1275 [91m☒[0m 1279
Q 869-681 T 188  [91m☒[0m 199 
Q 79+235  T 314  [91m☒[0m 311 
Q 655-499 T 156  [91m☒[0m 197 

--------------------------------------------------
Iteration 16
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 839-817 T 22   [91m☒[0m 50  
Q 921+10  T 931  [91m☒[0m 921 
Q 462-12  T 450  [91m☒[0

Q 849-414 T 435  [91m☒[0m 445 
Q 64+382  T 446  [92m☑[0m 446 
Q 855-676 T 179  [91m☒[0m 188 
Q 710-475 T 235  [91m☒[0m 236 
Q 974-696 T 278  [92m☑[0m 278 
Q 609+90  T 699  [91m☒[0m 690 
Q 851-610 T 241  [92m☑[0m 241 
Q 3+195   T 198  [92m☑[0m 198 
Q 989-149 T 840  [91m☒[0m 839 
Q 688-23  T 665  [92m☑[0m 665 

--------------------------------------------------
Iteration 29
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 689+767 T 1456 [91m☒[0m 1455
Q 646-147 T 499  [92m☑[0m 499 
Q 962-815 T 147  [92m☑[0m 147 
Q 937-594 T 343  [91m☒[0m 353 
Q 206+379 T 585  [91m☒[0m 575 
Q 663-196 T 467  [92m☑[0m 467 
Q 31+72   T 103  [91m☒[0m 113 
Q 648+101 T 749  [91m☒[0m 758 
Q 7+786   T 793  [91m☒[0m 783 
Q 916-202 T 714  [92m☑[0m 714 

--------------------------------------------------
Iteration 30
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 5+459   T 464  [92m☑[0m 464 
Q 15+137  T 152  [91m☒[0m 142 
Q 117+673 T 790  [92m☑[0

Q 235+319 T 554  [92m☑[0m 554 
Q 927-96  T 831  [92m☑[0m 831 
Q 320+9   T 329  [92m☑[0m 329 
Q 4+605   T 609  [92m☑[0m 609 
Q 99+352  T 451  [92m☑[0m 451 
Q 882-56  T 826  [92m☑[0m 826 
Q 847-288 T 559  [92m☑[0m 559 
Q 159+196 T 355  [92m☑[0m 355 
Q 611-133 T 478  [92m☑[0m 478 
Q 341-88  T 253  [92m☑[0m 253 

--------------------------------------------------
Iteration 43
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 394-27  T 367  [92m☑[0m 367 
Q 693+229 T 922  [91m☒[0m 912 
Q 773-32  T 741  [92m☑[0m 741 
Q 867-803 T 64   [91m☒[0m 54  
Q 314-93  T 221  [92m☑[0m 221 
Q 24+812  T 836  [92m☑[0m 836 
Q 559+44  T 603  [91m☒[0m 613 
Q 594-454 T 140  [92m☑[0m 140 
Q 66+369  T 435  [92m☑[0m 435 
Q 558-250 T 308  [91m☒[0m 208 

--------------------------------------------------
Iteration 44
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 327-147 T 180  [92m☑[0m 180 
Q 273-66  T 207  [92m☑[0m 207 
Q 542+952 T 1494 [92m☑[0

Q 208+4   T 212  [92m☑[0m 212 
Q 945+28  T 973  [92m☑[0m 973 
Q 505+48  T 553  [92m☑[0m 553 
Q 956-113 T 843  [92m☑[0m 843 
Q 38+917  T 955  [92m☑[0m 955 
Q 952+9   T 961  [92m☑[0m 961 
Q 895-576 T 319  [92m☑[0m 319 
Q 397+119 T 516  [92m☑[0m 516 
Q 337+13  T 350  [91m☒[0m 340 
Q 77+701  T 778  [92m☑[0m 778 

--------------------------------------------------
Iteration 57
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 548+577 T 1125 [92m☑[0m 1125
Q 157+664 T 821  [92m☑[0m 821 
Q 587-46  T 541  [92m☑[0m 541 
Q 102+7   T 109  [92m☑[0m 109 
Q 344-92  T 252  [92m☑[0m 252 
Q 89+528  T 617  [92m☑[0m 617 
Q 3+195   T 198  [92m☑[0m 198 
Q 383+635 T 1018 [92m☑[0m 1018
Q 58+155  T 213  [92m☑[0m 213 
Q 209+47  T 256  [91m☒[0m 257 

--------------------------------------------------
Iteration 58
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 20+530  T 550  [92m☑[0m 550 
Q 876+76  T 952  [92m☑[0m 952 
Q 242+8   T 250  [92m☑[0

Q 662+520 T 1182 [92m☑[0m 1182
Q 671-95  T 576  [92m☑[0m 576 
Q 53+43   T 96   [92m☑[0m 96  
Q 976-56  T 920  [92m☑[0m 920 
Q 64+698  T 762  [92m☑[0m 762 
Q 608-326 T 282  [92m☑[0m 282 
Q 24+18   T 42   [92m☑[0m 42  
Q 376+23  T 399  [92m☑[0m 399 
Q 57+391  T 448  [92m☑[0m 448 
Q 585+937 T 1522 [92m☑[0m 1522

--------------------------------------------------
Iteration 71
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 501-470 T 31   [92m☑[0m 31  
Q 228-200 T 28   [91m☒[0m 29  
Q 831+50  T 881  [92m☑[0m 881 
Q 344-92  T 252  [92m☑[0m 252 
Q 70+290  T 360  [92m☑[0m 360 
Q 462+160 T 622  [92m☑[0m 622 
Q 955-546 T 409  [92m☑[0m 409 
Q 311+415 T 726  [92m☑[0m 726 
Q 991-687 T 304  [92m☑[0m 304 
Q 456+169 T 625  [92m☑[0m 625 

--------------------------------------------------
Iteration 72
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 28+61   T 89   [92m☑[0m 89  
Q 158+201 T 359  [92m☑[0m 359 
Q 131-67  T 64   [92m☑[0

Q 886+59  T 945  [92m☑[0m 945 
Q 244-26  T 218  [92m☑[0m 218 
Q 417-69  T 348  [92m☑[0m 348 
Q 454-90  T 364  [92m☑[0m 364 
Q 823-700 T 123  [92m☑[0m 123 
Q 392-38  T 354  [92m☑[0m 354 
Q 49+901  T 950  [92m☑[0m 950 
Q 54+185  T 239  [92m☑[0m 239 
Q 845-14  T 831  [92m☑[0m 831 
Q 680+615 T 1295 [92m☑[0m 1295

--------------------------------------------------
Iteration 85
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 511-94  T 417  [92m☑[0m 417 
Q 513+31  T 544  [92m☑[0m 544 
Q 700+444 T 1144 [92m☑[0m 1144
Q 2+658   T 660  [92m☑[0m 660 
Q 961-98  T 863  [92m☑[0m 863 
Q 735-407 T 328  [92m☑[0m 328 
Q 288-256 T 32   [92m☑[0m 32  
Q 454+492 T 946  [92m☑[0m 946 
Q 781+882 T 1663 [92m☑[0m 1663
Q 120+7   T 127  [92m☑[0m 127 

--------------------------------------------------
Iteration 86
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 117+922 T 1039 [92m☑[0m 1039
Q 285-16  T 269  [92m☑[0m 269 
Q 577-70  T 507  [92m☑[0

Q 586-57  T 529  [92m☑[0m 529 
Q 801+255 T 1056 [91m☒[0m 1046
Q 802-87  T 715  [92m☑[0m 715 
Q 380-52  T 328  [92m☑[0m 328 
Q 623-397 T 226  [92m☑[0m 226 
Q 794+65  T 859  [92m☑[0m 859 
Q 543+128 T 671  [92m☑[0m 671 
Q 335-40  T 295  [92m☑[0m 295 
Q 545+97  T 642  [92m☑[0m 642 
Q 20+530  T 550  [92m☑[0m 550 

--------------------------------------------------
Iteration 99
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 297+30  T 327  [92m☑[0m 327 
Q 884+702 T 1586 [92m☑[0m 1586
Q 883-550 T 333  [92m☑[0m 333 
Q 559-30  T 529  [92m☑[0m 529 
Q 83+620  T 703  [92m☑[0m 703 
Q 945-301 T 644  [92m☑[0m 644 
Q 91+464  T 555  [92m☑[0m 555 
Q 335-40  T 295  [91m☒[0m 395 
Q 906+843 T 1749 [92m☑[0m 1749
Q 950-31  T 919  [92m☑[0m 919 

--------------------------------------------------
Iteration 100
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 177-79  T 98   [92m☑[0m 98  
Q 235-153 T 82   [92m☑[0m 82  
Q 394-203 T 191  [92m☑[

Q 884-450 T 434  [92m☑[0m 434 
Q 95+841  T 936  [92m☑[0m 936 
Q 183+90  T 273  [92m☑[0m 273 
Q 176+77  T 253  [92m☑[0m 253 
Q 700+444 T 1144 [92m☑[0m 1144
Q 42+583  T 625  [92m☑[0m 625 
Q 4+948   T 952  [92m☑[0m 952 
Q 45+415  T 460  [92m☑[0m 460 
Q 56+608  T 664  [92m☑[0m 664 
Q 789+25  T 814  [92m☑[0m 814 

--------------------------------------------------
Iteration 113
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 857-180 T 677  [92m☑[0m 677 
Q 249-24  T 225  [92m☑[0m 225 
Q 906-568 T 338  [92m☑[0m 338 
Q 734-370 T 364  [92m☑[0m 364 
Q 753-141 T 612  [92m☑[0m 612 
Q 861+54  T 915  [92m☑[0m 915 
Q 745+347 T 1092 [92m☑[0m 1092
Q 680-327 T 353  [92m☑[0m 353 
Q 899+15  T 914  [92m☑[0m 914 
Q 872-39  T 833  [92m☑[0m 833 

--------------------------------------------------
Iteration 114
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 895-789 T 106  [92m☑[0m 106 
Q 348+8   T 356  [92m☑[0m 356 
Q 371+789 T 1160 [92m☑

Q 66+61   T 127  [92m☑[0m 127 
Q 848-681 T 167  [92m☑[0m 167 
Q 611-133 T 478  [92m☑[0m 478 
Q 402-94  T 308  [92m☑[0m 308 
Q 270+25  T 295  [92m☑[0m 295 
Q 713-269 T 444  [92m☑[0m 444 
Q 787+892 T 1679 [92m☑[0m 1679
Q 98+973  T 1071 [92m☑[0m 1071
Q 891-844 T 47   [92m☑[0m 47  
Q 988-825 T 163  [92m☑[0m 163 

--------------------------------------------------
Iteration 127
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 307-44  T 263  [92m☑[0m 263 
Q 24+18   T 42   [92m☑[0m 42  
Q 240-42  T 198  [92m☑[0m 198 
Q 757+257 T 1014 [92m☑[0m 1014
Q 63+956  T 1019 [91m☒[0m 1029
Q 816-29  T 787  [91m☒[0m 777 
Q 67+371  T 438  [92m☑[0m 438 
Q 778-34  T 744  [92m☑[0m 744 
Q 966-620 T 346  [92m☑[0m 346 
Q 409-325 T 84   [92m☑[0m 84  

--------------------------------------------------
Iteration 128
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 633-446 T 187  [92m☑[0m 187 
Q 985+67  T 1052 [92m☑[0m 1052
Q 972-177 T 795  [92m☑

Q 650+21  T 671  [92m☑[0m 671 
Q 364-97  T 267  [92m☑[0m 267 
Q 76+408  T 484  [92m☑[0m 484 
Q 590-59  T 531  [92m☑[0m 531 
Q 962+416 T 1378 [92m☑[0m 1378
Q 633-553 T 80   [92m☑[0m 80  
Q 789+25  T 814  [92m☑[0m 814 
Q 133+19  T 152  [92m☑[0m 152 
Q 587-343 T 244  [92m☑[0m 244 
Q 653+58  T 711  [92m☑[0m 711 

--------------------------------------------------
Iteration 141
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 781-50  T 731  [92m☑[0m 731 
Q 253+20  T 273  [92m☑[0m 273 
Q 578+22  T 600  [92m☑[0m 600 
Q 72+733  T 805  [92m☑[0m 805 
Q 76+658  T 734  [92m☑[0m 734 
Q 772+945 T 1717 [92m☑[0m 1717
Q 785-95  T 690  [92m☑[0m 690 
Q 890-25  T 865  [92m☑[0m 865 
Q 523+413 T 936  [92m☑[0m 936 
Q 954-293 T 661  [92m☑[0m 661 

--------------------------------------------------
Iteration 142
Train on 36000 samples, validate on 4000 samples
Epoch 1/1
Q 477-199 T 278  [92m☑[0m 278 
Q 0+39    T 39   [91m☒[0m 49  
Q 953-567 T 386  [92m☑

### Testing

In [31]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
right = 0
predictions = model.predict_classes(test_x, verbose = 1) 
for i in range(60000):
    correct = ctable.decode(test_y[i])
    guess = ctable.decode(predictions[i], calc_argmax=False)
    if correct == guess:
        right = right + 1

acc = right/60000
print("Accuracy : ", acc)
#####################################################

MSG : Prediction
Accuracy :  0.92355


Validation Accuracy為0.9804，Test Accuracy為0.92355，可以看出模型的效果不錯，甚至表現得比只有減法的情形還好。

# 乘法

### 最後來看一下同樣的模型在訓練乘法的資料是不是也會一樣效果不錯吧><

### Parameters Config

In [32]:
TRAINING_SIZE = 80000
chars = '0123456789* '
LAYERS = 2

In [33]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [34]:
ctable = CharacterTable(chars)

In [35]:
ctable.indices_char

{0: ' ',
 1: '*',
 2: '0',
 3: '1',
 4: '2',
 5: '3',
 6: '4',
 7: '5',
 8: '6',
 9: '7',
 10: '8',
 11: '9'}

### Data Generation

In [36]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for i in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))
    if key in seen:
        continue
    seen.add(key)
    q = '{}*{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a * b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]
    questions.append(query)
    expected.append(ans)

print('Total multiplication questions:', len(questions))

Generating data...
Total multiplication questions: 80000


In [37]:
print(questions[:5], expected[:5])

['866*43 ', '4*8    ', '4*61   ', '166*956', '17*476 '] ['37238', '32  ', '244 ', '158696', '8092']


### Processing

In [38]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS*2, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS*2)

Vectorization...


In [39]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
train_x = x[:20000]
train_y = y[:20000]
test_x = x[20000:]
test_y = y[20000:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(18000, 7, 12)
(18000, 6, 12)
Validation Data:
(2000, 7, 12)
(2000, 6, 12)
Testing Data:
(60000, 7, 12)
(60000, 6, 12)


In [40]:
print("input: ", x_train[:3], '\n\n', "label: ", y_train[:3])

input:  [[[False False False False  True False False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False  True False False False False False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False False  True False False False False
   False]
  [ True False False False False False False False False False False
   False]]

 [[False False False False False False False False False  True False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False False False False  True False False
   False]
  [False  True False False False False False False False False False
   False]
  [False False False False False False False False False  True False
   False]
  [False False False False False False  Tr

### Build Model

In [41]:
print('Build model...')

############################################
##### Build your own model here ############
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS*2))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))
    
model.add(layers.TimeDistributed(layers.Dense(len(chars), activation='softmax')))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

############################################

model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_7 (LSTM)                (None, 128)               72192     
_________________________________________________________________
repeat_vector_4 (RepeatVecto (None, 6, 128)            0         
_________________________________________________________________
lstm_8 (LSTM)                (None, 6, 128)            131584    
_________________________________________________________________
lstm_9 (LSTM)                (None, 6, 128)            131584    
_________________________________________________________________
time_distributed_4 (TimeDist (None, 6, 12)             1548      
Total params: 336,908
Trainable params: 336,908
Non-trainable params: 0
_________________________________________________________________


### Training

In [42]:
for iteration in range(150):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    model.fit(x_train, y_train,
              batch_size=BATCH_SIZE,
              epochs=1,
              validation_data=(x_val, y_val))
    for i in range(10):
        ind = np.random.randint(0, len(x_val))
        rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
        preds = model.predict_classes(rowx, verbose=0)
        q = ctable.decode(rowx[0])
        correct = ctable.decode(rowy[0])
        guess = ctable.decode(preds[0], calc_argmax=False)
        print('Q', q[::-1] if REVERSE else q, end=' ')
        print('T', correct, end=' ')
        if correct == guess:
            print(colors.ok + '☑' + colors.close, end=' ')
        else:
            print(colors.fail + '☒' + colors.close, end=' ')
        print(guess)


--------------------------------------------------
Iteration 0
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 941*996 T 937236 [91m☒[0m 144444
Q 643*207 T 133101 [91m☒[0m 144444
Q 410*859 T 352190 [91m☒[0m 135500
Q 42*34   T 1428   [91m☒[0m 148800
Q 88*432  T 38016  [91m☒[0m 144444
Q 841*30  T 25230  [91m☒[0m 148000
Q 463*53  T 24539  [91m☒[0m 135500
Q 631*664 T 418984 [91m☒[0m 144444
Q 78*356  T 27768  [91m☒[0m 135500
Q 9*465   T 4185   [91m☒[0m 140000

--------------------------------------------------
Iteration 1
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 739*47  T 34733  [91m☒[0m 111990
Q 827*680 T 562360 [91m☒[0m 102000
Q 136*74  T 10064  [91m☒[0m 111220
Q 39*657  T 25623  [91m☒[0m 105550
Q 739*56  T 41384  [91m☒[0m 105500
Q 410*859 T 352190 [91m☒[0m 105000
Q 952*25  T 23800  [91m☒[0m 105000
Q 124*31  T 3844   [91m☒[0m 111920
Q 858*61  T 52338  [91m☒[0m 101000
Q 26*97   T 2522   [91m☒[0m 111200

-------------

Q 312*19  T 5928   [91m☒[0m 6552  
Q 685*3   T 2055   [91m☒[0m 2175  
Q 404*42  T 16968  [91m☒[0m 160560
Q 298*505 T 150490 [91m☒[0m 140750
Q 61*923  T 56303  [91m☒[0m 56009 
Q 665*513 T 341145 [91m☒[0m 370075
Q 617*75  T 46275  [91m☒[0m 460055
Q 846*984 T 832464 [91m☒[0m 776644
Q 55*579  T 31845  [91m☒[0m 397755
Q 42*425  T 17850  [91m☒[0m 160500

--------------------------------------------------
Iteration 14
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 548*54  T 29592  [91m☒[0m 297720
Q 703*20  T 14060  [91m☒[0m 136600
Q 205*17  T 3485   [91m☒[0m 3225  
Q 447*28  T 12516  [91m☒[0m 12776 
Q 517*115 T 59455  [91m☒[0m 577255
Q 78*63   T 4914   [91m☒[0m 4676  
Q 425*35  T 14875  [91m☒[0m 157750
Q 336*26  T 8736   [91m☒[0m 90022 
Q 41*573  T 23493  [91m☒[0m 22699 
Q 0*259   T 0      [92m☑[0m 0     

--------------------------------------------------
Iteration 15
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 664*237 T 

Q 402*51  T 20502  [91m☒[0m 20008 
Q 479*594 T 284526 [91m☒[0m 280116
Q 87*991  T 86217  [91m☒[0m 880077
Q 946*679 T 642334 [91m☒[0m 631436
Q 527*67  T 35309  [91m☒[0m 34411 
Q 36*953  T 34308  [91m☒[0m 345144
Q 293*82  T 24026  [91m☒[0m 240122
Q 664*237 T 157368 [91m☒[0m 150116
Q 879*263 T 231177 [91m☒[0m 234111
Q 95*550  T 52250  [91m☒[0m 522500

--------------------------------------------------
Iteration 27
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 14*574  T 8036   [91m☒[0m 7124  
Q 598*94  T 56212  [91m☒[0m 589740
Q 923*105 T 96915  [91m☒[0m 906655
Q 552*26  T 14352  [91m☒[0m 137720
Q 540*840 T 453600 [91m☒[0m 447400
Q 239*161 T 38479  [91m☒[0m 367877
Q 608*883 T 536864 [91m☒[0m 515774
Q 96*295  T 28320  [91m☒[0m 276500
Q 220*76  T 16720  [91m☒[0m 167800
Q 78*624  T 48672  [91m☒[0m 47772 

--------------------------------------------------
Iteration 28
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 570*928 T 

Q 374*4   T 1496   [91m☒[0m 14760 
Q 3*289   T 867    [91m☒[0m 827   
Q 532*219 T 116508 [91m☒[0m 119908
Q 487*44  T 21428  [91m☒[0m 218880
Q 8*261   T 2088   [92m☑[0m 2088  
Q 510*20  T 10200  [91m☒[0m 102000
Q 698*2   T 1396   [92m☑[0m 1396  
Q 293*271 T 79403  [91m☒[0m 707831
Q 318*972 T 309096 [91m☒[0m 306236
Q 658*86  T 56588  [91m☒[0m 568486

--------------------------------------------------
Iteration 41
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 420*649 T 272580 [91m☒[0m 262080
Q 178*3   T 534    [91m☒[0m 594   
Q 94*805  T 75670  [91m☒[0m 756800
Q 954*54  T 51516  [91m☒[0m 508760
Q 482*867 T 417894 [91m☒[0m 417234
Q 21*16   T 336    [92m☑[0m 336   
Q 58*491  T 28478  [91m☒[0m 282784
Q 904*67  T 60568  [91m☒[0m 608880
Q 258*0   T 0      [92m☑[0m 0     
Q 984*193 T 189912 [91m☒[0m 183212

--------------------------------------------------
Iteration 42
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 869*3   T 

Q 672*71  T 47712  [92m☑[0m 47712 
Q 23*530  T 12190  [91m☒[0m 122900
Q 32*13   T 416    [91m☒[0m 456   
Q 52*74   T 3848   [91m☒[0m 3948  
Q 19*977  T 18563  [91m☒[0m 18503 
Q 601*4   T 2404   [92m☑[0m 2404  
Q 243*49  T 11907  [91m☒[0m 123777
Q 5*48    T 240    [92m☑[0m 240   
Q 427*887 T 378749 [91m☒[0m 389719
Q 792*41  T 32472  [91m☒[0m 32972 

--------------------------------------------------
Iteration 54
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 540*63  T 34020  [91m☒[0m 346200
Q 751*622 T 467122 [91m☒[0m 467362
Q 60*51   T 3060   [91m☒[0m 316004
Q 146*876 T 127896 [91m☒[0m 123996
Q 925*91  T 84175  [91m☒[0m 826257
Q 754*406 T 306124 [91m☒[0m 305484
Q 159*104 T 16536  [91m☒[0m 152360
Q 707*847 T 598829 [91m☒[0m 509719
Q 24*499  T 11976  [91m☒[0m 121168
Q 92*758  T 69736  [91m☒[0m 70636 

--------------------------------------------------
Iteration 55
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 655*13  T 

Q 69*54   T 3726   [91m☒[0m 37860 
Q 158*62  T 9796   [91m☒[0m 9636  
Q 77*742  T 57134  [91m☒[0m 578740
Q 58*491  T 28478  [91m☒[0m 28358 
Q 921*421 T 387741 [91m☒[0m 387521
Q 54*526  T 28404  [91m☒[0m 286240
Q 688*216 T 148608 [91m☒[0m 146128
Q 533*88  T 46904  [91m☒[0m 46744 
Q 106*318 T 33708  [91m☒[0m 365286
Q 775*10  T 7750   [91m☒[0m 775000

--------------------------------------------------
Iteration 68
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 1*437   T 437    [91m☒[0m 457   
Q 426*182 T 77532  [91m☒[0m 74332 
Q 257*544 T 139808 [91m☒[0m 138608
Q 923*105 T 96915  [91m☒[0m 960455
Q 858*249 T 213642 [91m☒[0m 214342
Q 38*637  T 24206  [91m☒[0m 242460
Q 311*440 T 136840 [92m☑[0m 136840
Q 835*180 T 150300 [91m☒[0m 148300
Q 55*506  T 27830  [91m☒[0m 277700
Q 447*848 T 379056 [91m☒[0m 383256

--------------------------------------------------
Iteration 69
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 91*40   T 

Q 643*44  T 28292  [91m☒[0m 28232 
Q 220*76  T 16720  [91m☒[0m 165200
Q 59*369  T 21771  [91m☒[0m 218318
Q 495*29  T 14355  [91m☒[0m 141650
Q 411*1   T 411    [91m☒[0m 451   
Q 2*377   T 754    [91m☒[0m 794   
Q 307*656 T 201392 [91m☒[0m 206232
Q 483*296 T 142968 [91m☒[0m 144618
Q 595*739 T 439705 [91m☒[0m 443515
Q 79*937  T 74023  [91m☒[0m 746139

--------------------------------------------------
Iteration 81
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 987*31  T 30597  [91m☒[0m 300477
Q 273*75  T 20475  [91m☒[0m 204255
Q 19*200  T 3800   [91m☒[0m 380000
Q 7*783   T 5481   [91m☒[0m 5411  
Q 623*470 T 292810 [91m☒[0m 298110
Q 86*338  T 29068  [91m☒[0m 293082
Q 37*565  T 20905  [91m☒[0m 214450
Q 910*388 T 353080 [92m☑[0m 353080
Q 213*216 T 46008  [91m☒[0m 445188
Q 291*31  T 9021   [92m☑[0m 9021  

--------------------------------------------------
Iteration 82
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 485*215 T 

Q 547*303 T 165741 [91m☒[0m 160121
Q 73*452  T 32996  [91m☒[0m 320762
Q 848*24  T 20352  [91m☒[0m 197120
Q 20*993  T 19860  [91m☒[0m 196600
Q 619*69  T 42711  [91m☒[0m 42511 
Q 633*2   T 1266   [91m☒[0m 1246  
Q 774*558 T 431892 [91m☒[0m 431532
Q 605*925 T 559625 [91m☒[0m 559175
Q 83*84   T 6972   [91m☒[0m 6032  
Q 36*950  T 34200  [91m☒[0m 348000

--------------------------------------------------
Iteration 95
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 189*62  T 11718  [91m☒[0m 11978 
Q 582*0   T 0      [92m☑[0m 0     
Q 27*729  T 19683  [91m☒[0m 196339
Q 859*267 T 229353 [91m☒[0m 226613
Q 392*76  T 29792  [91m☒[0m 293324
Q 825*71  T 58575  [91m☒[0m 585757
Q 61*968  T 59048  [91m☒[0m 591288
Q 700*996 T 697200 [91m☒[0m 703600
Q 664*950 T 630800 [91m☒[0m 624400
Q 706*356 T 251336 [91m☒[0m 254216

--------------------------------------------------
Iteration 96
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 753*51  T 

Q 615*266 T 163590 [91m☒[0m 160790
Q 259*5   T 1295   [91m☒[0m 1235  
Q 52*475  T 24700  [91m☒[0m 243000
Q 400*0   T 0      [92m☑[0m 0     
Q 906*289 T 261834 [91m☒[0m 268434
Q 168*63  T 10584  [91m☒[0m 10724 
Q 392*89  T 34888  [91m☒[0m 35048 
Q 26*691  T 17966  [91m☒[0m 189862
Q 332*650 T 215800 [91m☒[0m 210600
Q 135*29  T 3915   [91m☒[0m 3855  

--------------------------------------------------
Iteration 108
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 638*721 T 459998 [91m☒[0m 464718
Q 706*923 T 651638 [91m☒[0m 654778
Q 9*772   T 6948   [91m☒[0m 6968  
Q 18*625  T 11250  [91m☒[0m 114500
Q 615*266 T 163590 [91m☒[0m 162590
Q 76*903  T 68628  [91m☒[0m 691888
Q 700*293 T 205100 [91m☒[0m 200100
Q 36*642  T 23112  [91m☒[0m 235922
Q 7*686   T 4802   [91m☒[0m 4842  
Q 683*777 T 530691 [91m☒[0m 533471

--------------------------------------------------
Iteration 109
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 32*126  

Q 463*53  T 24539  [91m☒[0m 246892
Q 47*241  T 11327  [91m☒[0m 11007 
Q 647*488 T 315736 [91m☒[0m 311676
Q 4*945   T 3780   [91m☒[0m 37800 
Q 776*4   T 3104   [91m☒[0m 31440 
Q 555*723 T 401265 [91m☒[0m 402585
Q 24*631  T 15144  [91m☒[0m 149244
Q 28*361  T 10108  [91m☒[0m 1008  
Q 672*16  T 10752  [91m☒[0m 106222
Q 9*620   T 5580   [91m☒[0m 558006

--------------------------------------------------
Iteration 122
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 372*49  T 18228  [91m☒[0m 18268 
Q 62*272  T 16864  [91m☒[0m 165444
Q 917*10  T 9170   [91m☒[0m 933000
Q 28*422  T 11816  [91m☒[0m 117160
Q 41*362  T 14842  [91m☒[0m 14162 
Q 595*225 T 133875 [91m☒[0m 138775
Q 26*333  T 8658   [91m☒[0m 8958  
Q 389*19  T 7391   [91m☒[0m 70017 
Q 972*12  T 11664  [91m☒[0m 119044
Q 478*80  T 38240  [91m☒[0m 370400

--------------------------------------------------
Iteration 123
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 36*176  

Q 671*112 T 75152  [91m☒[0m 777722
Q 618*724 T 447432 [91m☒[0m 440512
Q 14*912  T 12768  [91m☒[0m 127680
Q 145*645 T 93525  [91m☒[0m 938750
Q 251*547 T 137297 [91m☒[0m 139877
Q 226*647 T 146222 [91m☒[0m 144442
Q 886*979 T 867394 [91m☒[0m 869374
Q 531*34  T 18054  [91m☒[0m 18334 
Q 410*859 T 352190 [91m☒[0m 354190
Q 352*841 T 296032 [91m☒[0m 298452

--------------------------------------------------
Iteration 135
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 21*963  T 20223  [91m☒[0m 195233
Q 11*63   T 693    [91m☒[0m 653   
Q 551*1   T 551    [92m☑[0m 551   
Q 43*521  T 22403  [91m☒[0m 222533
Q 697*0   T 0      [92m☑[0m 0     
Q 587*667 T 391529 [91m☒[0m 398349
Q 763*71  T 54173  [91m☒[0m 53283 
Q 355*961 T 341155 [91m☒[0m 337045
Q 565*52  T 29380  [91m☒[0m 29740 
Q 939*3   T 2817   [91m☒[0m 2747  

--------------------------------------------------
Iteration 136
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 764*479 

Q 78*966  T 75348  [91m☒[0m 754086
Q 300*392 T 117600 [92m☑[0m 117600
Q 829*662 T 548798 [91m☒[0m 549718
Q 724*94  T 68056  [91m☒[0m 676360
Q 26*333  T 8658   [91m☒[0m 8758  
Q 42*830  T 34860  [91m☒[0m 342600
Q 23*530  T 12190  [91m☒[0m 129900
Q 9*620   T 5580   [91m☒[0m 558006
Q 46*90   T 4140   [91m☒[0m 41400 
Q 700*156 T 109200 [91m☒[0m 101200

--------------------------------------------------
Iteration 149
Train on 18000 samples, validate on 2000 samples
Epoch 1/1
Q 68*567  T 38556  [91m☒[0m 385560
Q 392*76  T 29792  [91m☒[0m 29392 
Q 81*870  T 70470  [91m☒[0m 70670 
Q 52*805  T 41860  [91m☒[0m 414600
Q 75*310  T 23250  [91m☒[0m 227500
Q 902*797 T 718894 [91m☒[0m 724674
Q 23*716  T 16468  [91m☒[0m 160088
Q 72*871  T 62712  [91m☒[0m 627326
Q 620*731 T 453220 [91m☒[0m 456420
Q 538*8   T 4304   [91m☒[0m 43046 


### Testing

In [43]:
print("MSG : Prediction")
#####################################################
## Try to test and evaluate your model ##############
## ex. test_x = ["555+175", "860+7  ", "340+29 "]
## ex. test_y = ["730 ", "867 ", "369 "] 
right = 0
predictions = model.predict_classes(test_x, verbose = 1) 
for i in range(60000):
    correct = ctable.decode(test_y[i])
    guess = ctable.decode(predictions[i], calc_argmax=False)
    if correct == guess:
        right = right + 1

acc = right/60000
print("Accuracy : ", acc)
#####################################################

MSG : Prediction
Accuracy :  0.049166666666666664



|Layers|TRAINING_SIZE|Iterarions|Validation Accuracy|Test Accuracy|
|---|---|---|---|---|
|1|80000|150|0.5760|0.0425|
|2|80000|150|0.6043|0.0492|

由上表中的Validation Accuracy及Test Accuracy可以看出模型在乘法的效果很不好，而從Test Accuracy小於Validation Accuracy很多也能推測模型可能有over fitting的問題。要改善模型可以從增加樣本數或更改模型的架構去著手。