In [1]:
from keras.models import Model
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Input, LSTM, Dense
from keras.layers.embeddings import Embedding
import numpy as np
import jieba
import re

Using TensorFlow backend.


In [2]:
batch_size = 64
epochs = 100
latent_dim = 256
num_samples = 10000

## Loading data

In [3]:
input_texts, output_texts = [], []
en_vocs, cn_vocs = set(), set()
pattern = re.compile('[\W_]+')
with open('cmn.txt', 'r') as f:
    cnt = 0
    for line in f:
        # source and target are seperated by tab; also lower the letters
        input_text, output_text = line.lower().split('\t')
        # remove English punctuations
        en_words = list(map(lambda x: pattern.sub('', x), input_text.split()))
        en_vocs.update(en_words)
        # remove Chinese punctuations
        output_text = re.sub( "[\s+\.\!\/_,$%^*(+\"\']+|[+——！，。?？、~@#￥%……&*（ ）]+", '',output_text)  
        cn_tokens = ['\t']+list(jieba.cut(output_text))+['\n']
        cn_vocs.update(cn_tokens)
        input_texts.append(en_words)
        output_texts.append(cn_tokens)
        cnt += 1
        if cnt >= num_samples:
            break


Building prefix dict from the default dictionary ...
Loading model from cache /tmp/jieba.cache
Loading model cost 1.217 seconds.
Prefix dict has been built succesfully.


## Generate word-int mapping

In [8]:
en_vocs = sorted(list(en_vocs))
cn_vocs = sorted(list(cn_vocs))
en_to_int, int_to_en = {w: i for i, w in enumerate(en_vocs)}, {i: w for i, w in enumerate(en_vocs)}
cn_to_int, int_to_cn = {w: i for i, w in enumerate(cn_vocs)}, {i: w for i, w in enumerate(cn_vocs)}

num_encoder_tokens = len(en_vocs)
num_decoder_tokens = len(cn_vocs)
max_encoder_seq_length = max(map(len, input_texts))
max_decoder_seq_length = max(map(len, output_texts))

print('Number of samples:', len(input_texts))
print('Number of unique input tokens:', num_encoder_tokens)
print('Number of unique output tokens:', num_decoder_tokens)
print('Max sequence length for inputs:', max_encoder_seq_length)
print('Max sequence length for outputs:', max_decoder_seq_length)

Number of samples: 10000
Number of unique input tokens: 3597
Number of unique output tokens: 7071
Max sequence length for inputs: 9
Max sequence length for outputs: 14


## Vectorization and Padding

In [9]:
int_input_data = [[en_to_int[w] for w in row] for row in input_texts]
int_output_data = [[cn_to_int[w] for w in row] for row in output_texts]
encoder_input_data = pad_sequences(int_input_data, maxlen=max_encoder_seq_length, padding='post')
decoder_input_data = pad_sequences(int_output_data, maxlen=max_decoder_seq_length, padding='post')
#decoder_target_data = pad_sequences(decoder_input_data[:,1:], maxlen=max_decoder_seq_length, padding='post')

#decoder_target_data needs to be one-hot encoded
decoder_target_data = np.zeros((len(input_texts), max_decoder_seq_length, num_decoder_tokens), dtype='float32')

for i, row in enumerate(int_output_data):
    for t, w in enumerate(row):
        if t == 0:
            continue
        decoder_target_data[i, t-1, w] = 1
#print(decoder_target_data.shape)

## Define the model

In [11]:
# Define the input sequence
encoder_inputs = Input(shape=(None,))
_, state_h, state_c = LSTM(latent_dim, return_state=True)(Embedding(num_encoder_tokens, latent_dim)(encoder_inputs))
encoder_states = [state_h, state_c]

# Set up the decoder and use encoder_states as initial state
decoder_inputs = Input(shape=(None,))
y = Embedding(num_decoder_tokens, latent_dim)(decoder_inputs)
x = LSTM(latent_dim, return_sequences=True)(y, initial_state=encoder_states)
decoder_outputs = Dense(num_decoder_tokens, activation='softmax')(x)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
model.fit([encoder_input_data, decoder_input_data], decoder_target_data,
          batch_size=batch_size, epochs=epochs,validation_split=0.2)

Train on 8000 samples, validate on 2000 samples
Epoch 1/100


InternalError: Dst tensor is not initialized.
	 [[Node: training_1/RMSprop/Const = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [3597,256] values: [0 0 0]...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]

Caused by op 'training_1/RMSprop/Const', defined at:
  File "/usr/lib64/python3.4/runpy.py", line 170, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib64/python3.4/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/lib/python3.4/dist-packages/ipykernel/__main__.py", line 3, in <module>
    app.launch_new_instance()
  File "/usr/lib/python3.4/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/lib/python3.4/dist-packages/ipykernel/kernelapp.py", line 474, in start
    ioloop.IOLoop.instance().start()
  File "/usr/lib64/python3.4/dist-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/usr/lib64/python3.4/dist-packages/tornado/ioloop.py", line 887, in start
    handler_func(fd_obj, events)
  File "/usr/lib64/python3.4/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/lib64/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/usr/lib64/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/usr/lib64/python3.4/dist-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/usr/lib64/python3.4/dist-packages/tornado/stack_context.py", line 275, in null_wrapper
    return fn(*args, **kwargs)
  File "/usr/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 276, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/usr/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 228, in dispatch_shell
    handler(stream, idents, msg)
  File "/usr/lib/python3.4/dist-packages/ipykernel/kernelbase.py", line 390, in execute_request
    user_expressions, allow_stdin)
  File "/usr/lib/python3.4/dist-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/lib/python3.4/dist-packages/ipykernel/zmqshell.py", line 501, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2827, in run_ast_nodes
    if self.run_code(code, result):
  File "/usr/lib/python3.4/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-11-43789224e7ec>", line 15, in <module>
    batch_size=batch_size, epochs=epochs,validation_split=0.2)
  File "/usr/local/lib64/python3.4/site-packages/keras/engine/training.py", line 1634, in fit
    self._make_train_function()
  File "/usr/local/lib64/python3.4/site-packages/keras/engine/training.py", line 990, in _make_train_function
    loss=self.total_loss)
  File "/usr/local/lib64/python3.4/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib64/python3.4/site-packages/keras/optimizers.py", line 226, in get_updates
    accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
  File "/usr/local/lib64/python3.4/site-packages/keras/optimizers.py", line 226, in <listcomp>
    accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
  File "/usr/local/lib64/python3.4/site-packages/keras/backend/tensorflow_backend.py", line 680, in zeros
    return variable(tf.constant_initializer(0., dtype=tf_dtype)(shape),
  File "/usr/lib/python3.4/dist-packages/tensorflow/python/ops/init_ops.py", line 153, in __call__
    return constant_op.constant(self.value, dtype=dtype, shape=shape)
  File "/usr/lib/python3.4/dist-packages/tensorflow/python/framework/constant_op.py", line 169, in constant
    attrs={"value": tensor_value, "dtype": dtype_value}, name=name).outputs[0]
  File "/usr/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 2395, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/lib/python3.4/dist-packages/tensorflow/python/framework/ops.py", line 1264, in __init__
    self._traceback = _extract_stack()

InternalError (see above for traceback): Dst tensor is not initialized.
	 [[Node: training_1/RMSprop/Const = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [3597,256] values: [0 0 0]...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]
