In [1]:
from keras.models import Sequential
from keras import layers
import numpy as np
import pandas as pd
from six.moves import range

Using TensorFlow backend.


# Parameters Config

In [2]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'

In [3]:
TRAINING_SIZE = 70000
DIGITS = 3
ADD_SUB_MIX = True
MAXLEN = DIGITS + 1 + DIGITS
RNN = layers.LSTM
HIDDEN_SIZE = 128
BATCH_SIZE = 128
LAYERS = 1
chars = '0123456789+- ' if ADD_SUB_MIX else '0123456789+'
op_option = list('+-' if ADD_SUB_MIX else '+')

In [4]:
class CharacterTable(object):
    def __init__(self, chars):
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))
    
    def encode(self, C, num_rows):
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x
    
    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return "".join(self.indices_char[i] for i in x)

In [5]:
ctable = CharacterTable(chars)

In [6]:
ctable.indices_char

{0: ' ',
 1: '+',
 2: '-',
 3: '0',
 4: '1',
 5: '2',
 6: '3',
 7: '4',
 8: '5',
 9: '6',
 10: '7',
 11: '8',
 12: '9'}

# Data Generation

In [7]:
questions = []
expected = []
seen = set()
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) for _ in range(DIGITS)))
    a, b, op = f(), f(), np.random.choice(op_option)
    key = a, b, op
    if key in seen or a < 10**(DIGITS-1) or b < 10**(DIGITS-1):
        continue
    seen.add(key)
    q = '{}{}{}'.format(a, op, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a+b if op == '+' else a-b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    questions.append(query)
    expected.append(ans)
print('Total addition questions:', len(questions))

Generating data...
Total addition questions: 70000


In [8]:
print(questions[:10], expected[:10])

['929+823', '306-914', '318-311', '472+369', '553-667', '465-737', '937-499', '226+561', '829-276', '168+232'] ['1752', '-608', '7   ', '841 ', '-114', '-272', '438 ', '787 ', '553 ', '400 ']


# Processing

In [9]:
print('Vectorization...')
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(expected), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [10]:
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

# train_test_split
split_at = len(x) - 10000
train_x, test_x = x[:split_at], x[split_at:]
train_y, test_y = y[:split_at], y[split_at:]

split_at = len(train_x) - len(train_x) // 10
(x_train, x_val) = train_x[:split_at], train_x[split_at:]
(y_train, y_val) = train_y[:split_at], train_y[split_at:]

print('Training Data:')
print(x_train.shape)
print(y_train.shape)

print('Validation Data:')
print(x_val.shape)
print(y_val.shape)

print('Testing Data:')
print(test_x.shape)
print(test_y.shape)

Training Data:
(54000, 7, 13)
(54000, 4, 13)
Validation Data:
(6000, 7, 13)
(6000, 4, 13)
Testing Data:
(10000, 7, 13)
(10000, 4, 13)


# Build Model

In [11]:
print('Build model...')
model = Sequential()
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(layers.RepeatVector(DIGITS + 1))
for _ in range(LAYERS):
    model.add(RNN(HIDDEN_SIZE, return_sequences=True))

model.add(layers.TimeDistributed(layers.Dense(len(chars))))
model.add(layers.Activation('softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.summary()

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               72704     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 128)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 128)            131584    
_________________________________________________________________
time_distributed_1 (TimeDist (None, 4, 13)             1677      
_________________________________________________________________
activation_1 (Activation)    (None, 4, 13)             0         
Total params: 205,965
Trainable params: 205,965
Non-trainable params: 0
_________________________________________________________________


# Training

In [None]:
accuracy = list()
history = list()

for iteration in range(100):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    history.append(model.fit(x_train, y_train,
                            batch_size=BATCH_SIZE,
                            epochs=1,
                            validation_data=(x_val, y_val)))
    
    
    right = 0
    preds = model.predict_classes(test_x, verbose=0)
    for i in range(len(preds)):
        q = ctable.decode(test_x[i])
        correct = ctable.decode(test_y[i])
        guess = ctable.decode(preds[i], calc_argmax=False)
        #print('Q', q[::-1] if REVERSE else q, end=' ')
        #print('T', correct, end=' ')
        if correct == guess:
            #print(colors.ok + '☑' + colors.close, end=' ')
            right += 1
        else:
            pass
            #print(colors.fail + '☒' + colors.close, end=' ')
            #print(guess)
    
    accuracy.append(right / len(preds))
    print("MSG : Accuracy is {}".format(right / len(preds)))


--------------------------------------------------
Iteration 0
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.0022

--------------------------------------------------
Iteration 1
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.0045

--------------------------------------------------
Iteration 2
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.0071

--------------------------------------------------
Iteration 3
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.0055

--------------------------------------------------
Iteration 4
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.009

--------------------------------------------------
Iteration 5
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.0082

--------------------------------------------------
Iteration 6
Train on 54000 samples, validate on 6000 samples


MSG : Accuracy is 0.1123

--------------------------------------------------
Iteration 30
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.0963

--------------------------------------------------
Iteration 31
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.1714

--------------------------------------------------
Iteration 32
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.2081

--------------------------------------------------
Iteration 33
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.2751

--------------------------------------------------
Iteration 34
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.3265

--------------------------------------------------
Iteration 35
Train on 54000 samples, validate on 6000 samples
Epoch 1/1
MSG : Accuracy is 0.34

--------------------------------------------------
Iteration 36
Train on 54000 sam

# Output

In [None]:
print('Output...')
df = pd.DataFrame([[hist.history['acc'][0], hist.history['val_acc'][0]] for hist in history], columns=['acc', 'val_acc'])
df.insert(2, 'test_acc', accuracy)
df.to_csv('output_{}_{}.csv'.format(x_train.shape[0], x_val.shape[0]), index=False)