In [1]:
from keras.models import Model
from keras.layers import Input
from keras.layers import LSTM
from keras.layers import Dense
from keras.utils.vis_utils import plot_model

Using TensorFlow backend.


In [2]:
num_encoder_tokens = 71
num_decoder_tokens = 93
latent_dim = 256

In [3]:
def define_models(n_input, n_output, n_units):
    #define training encoder
    encoder_inputs = Input(shape=(None, n_input))
    encoder = LSTM(n_units, return_state=True)
    encoder_outputs, state_h, state_c = encoder(encoder_inputs)
    encoder_states = [state_h, state_c]
    
    decoder_inputs = Input(shape=(None, n_output))
    decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
    decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
    decoder_dense = Dense(n_output, activation='softmax')
    decoder_outputs = decoder_dense(decoder_outputs)
    model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

    # define encoder inference model
    encoder_model = Model(encoder_inputs, encoder_states)
    
    # define decoder inference model
    decoder_state_input_h = Input(shape=(n_units,))
    decoder_state_input_c = Input(shape=(n_units,))
    decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
    decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
    decoder_states = [state_h, state_c]
    decoder_outputs = decoder_dense(decoder_outputs)
    decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
    
    return model, encoder_model, decoder_model

In [4]:
def predict_sequence(infenc, infdec, source, n_steps, cardinality):
    # encode
    state = infenc.predict(source)
    # start of sequence input
    target_seq = array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
    # collect predictions
    output = list()
    for t in range(n_steps):
        # predict next char
        yhat, h, c = infdec.predict([target_seq] + state)
        # store prediction
        output.append(yhat[0,0,:])
        # update state
        state = [h, c]
        # update target sequence
        target_seq = yhat
    return array(output)

In [5]:
class CharacterTable(object):
    def __init__(self, chars):
        """Initialize character table.
        # Arguments
            chars: Characters that can appear in the input.
        """
        self.chars = sorted(set(chars))
        self.char_indices = dict((c, i) for i, c in enumerate(self.chars))
        self.indices_char = dict((i, c) for i, c in enumerate(self.chars))

    def encode(self, C, num_rows):
        """One hot encode given string C.
        # Arguments
            num_rows: Number of rows in the returned one hot encoding. This is
                used to keep the # of rows for each data the same.
        """
        x = np.zeros((num_rows, len(self.chars)))
        for i, c in enumerate(C):
            x[i, self.char_indices[c]] = 1
        return x

    def decode(self, x, calc_argmax=True):
        if calc_argmax:
            x = x.argmax(axis=-1)
        return ''.join(self.indices_char[x] for x in x)

In [6]:
import numpy as np

In [7]:
TRAINING_SIZE=50000
DIGITS=3
MAXLEN = DIGITS + 1 + DIGITS
seen = set()
questions = []
expected = []
REVERSE = True
chars = '0123456789+ '
ctable = CharacterTable(chars)

In [8]:
print('Generating data...')
while len(questions) < TRAINING_SIZE:
    f = lambda: int(''.join(np.random.choice(list('0123456789')) 
                            for _ in range(np.random.randint(1, DIGITS + 1))))
    a, b = f(), f()
    key = tuple(sorted((a, b)))    

    #if key in seen:
        #continue
    seen.add(key)

    q = '{}+{}'.format(a, b)
    query = q + ' ' * (MAXLEN - len(q))
    ans = str(a + b)
    ans += ' ' * (DIGITS + 1 - len(ans))
    if REVERSE:
        query = query[::-1]

    questions.append(query)
    expected.append(ans)

Generating data...


In [9]:
print('Total addition questions:', len(questions))

Total addition questions: 50000


In [10]:
print('Vectorization...')
#자리를 만들고
x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)

for i, sentence in enumerate(questions):
    x[i] = ctable.encode(sentence, MAXLEN)
for i, sentence in enumerate(expected):
    y[i] = ctable.encode(sentence, DIGITS + 1)

Vectorization...


In [11]:
x.shape

(50000, 7, 12)

In [12]:
#섞기
indices = np.arange(len(y))
np.random.shuffle(indices)
x = x[indices]
y = y[indices]

#트레이닝과 테스트 셋 나누기
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]

In [13]:
print(x_train.shape)
print(x_val.shape)

(45000, 7, 12)
(5000, 7, 12)


In [14]:
x_train[0]

array([[ True, False, False, False, False, False, False, False, False,
        False, False, False],
       [ True, False, False, False, False, False, False, False, False,
        False, False, False],
       [ True, False, False, False, False, False, False, False, False,
        False, False, False],
       [ True, False, False, False, False, False, False, False, False,
        False, False, False],
       [False, False, False,  True, False, False, False, False, False,
        False, False, False],
       [False,  True, False, False, False, False, False, False, False,
        False, False, False],
       [False, False, False, False,  True, False, False, False, False,
        False, False, False]])

In [15]:
y_train.shape

(45000, 4, 12)

In [16]:
from keras.models import Sequential
from keras.layers import LSTM, RepeatVector, Dense, Activation

In [17]:
BATCH_SIZE = 64

print('Build model...')
model = Sequential()
#encode
model.add(LSTM(64, input_shape=(MAXLEN, len(chars))))
model.add(RepeatVector(DIGITS + 1))
model.add(LSTM(32, return_sequences=True))
model.add(Dense(len(chars)))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
model.summary()
model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=50, validation_data=(x_val, y_val))

Build model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 64)                19712     
_________________________________________________________________
repeat_vector_1 (RepeatVecto (None, 4, 64)             0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 4, 32)             12416     
_________________________________________________________________
dense_1 (Dense)              (None, 4, 12)             396       
_________________________________________________________________
activation_1 (Activation)    (None, 4, 12)             0         
Total params: 32,524
Trainable params: 32,524
Non-trainable params: 0
_________________________________________________________________
Train on 45000 samples, validate on 5000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/

<keras.callbacks.History at 0x1227f16d8>

In [None]:
class colors:
    ok = '\033[92m'
    fail = '\033[91m'
    close = '\033[0m'
    
    
for i in range(10):
    ind = np.random.randint(0, len(x_val))
    rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
    preds = model.predict_classes(rowx)
    q = ctable.decode(rowx[0])
    correct = ctable.decode(rowy[0])
    guess = ctable.decode(preds[0], calc_argmax=False)
    print('%5d' % ind, end=': ')
    print('Q', q[::-1] if REVERSE else q, end=' ')
    print('T', correct, end=' ')
    if correct == guess:
        print(colors.ok + '☑' + colors.close, end=' ')
    else:
        print(colors.fail + '☒' + colors.close, end=' ')
    print(guess)

# %% Print final results.

i = 2975
output_final = model.predict(x_val)


In [None]:
from keras.models import Model
intermediate_layer_model = Model(inputs=model.input, outputs=model.layers[3].output)
output = intermediate_layer_model.predict(x_val)

import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [None]:
model.layers

In [None]:
for i in range(1,5):
    intermediate_layer_model = Model(inputs=model.input, outputs=model.layers[i].output)
    output = intermediate_layer_model.predict(x_val)
    plt.clf() #그래프 초기화
    sns.heatmap(output[0])
    plt.show()

In [None]:
loss, acc = model.evaluate(x_val, y_val, verbose=1)
print('-loss:',loss, '\n-accuracy:', acc)

In [None]:
loss, acc = model.evaluate(x_val, y_val, verbose=1)
print('-loss:',loss, '\n-accuracy:', acc)