In [1]:
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Bidirectional
from keras import callbacks
import numpy as np
import pickle
import os
from keras.callbacks import ModelCheckpoint, EarlyStopping

Using TensorFlow backend.


In [2]:
# 使用GPU训练
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [3]:
# 基本参数
batch_size = 64
epochs = 100
latent_dim = 256 # LSTM 的单元个数
vocab = pickle.load(open("../models/word_id_map.pkl","rb"))
id2word, word2id = vocab[0], vocab[1]
data_all = pickle.load(open("../models/train_test_emotion_data.pkl","rb"))
train_x, train_y1, train_y2, train_e, test_x, test_y1, test_y2, test_e = data_all[0], data_all[1], data_all[2], data_all[3], data_all[4], data_all[5], data_all[6], data_all[7]
num_samples = len(train_x) # 训练样本的大小

In [4]:
train_x.shape

(2410, 50, 3125)

## encoder

In [4]:
# 定义编码器的输入
# encoder_inputs (None, num_encoder_tokens), None表示可以处理任意长度的序列
encoder_inputs = Input(shape=(None, train_x.shape[2]), name='encoder_inputs')

# 编码器，要求其返回状态（3层LSTM）
# 调用编码器，得到编码器的输出（输入其实不需要），以及状态信息 state_h 和 state_c
# 丢弃encoder_outputs, 我们只需要编码器的状态
encoder_lstm1 = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.2, name='encoder_lstm1')(encoder_inputs)
state_h1 = encoder_lstm1[1]
state_c1 = encoder_lstm1[2]
encoder_state1 = [state_h1, state_c1]

encoder_lstm2 = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.2, name='encoder_lstm2')(encoder_lstm1[0])
state_h2 = encoder_lstm2[1]
state_c2 = encoder_lstm2[2]
encoder_state2 = [state_h2, state_c2]

encoder_lstm3 = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.2, name='encoder_lstm3')(encoder_lstm2[0])
state_h3 = encoder_lstm3[1]
state_c3 = encoder_lstm3[2]
encoder_state3 = [state_h3, state_c3]

## decoder

In [5]:
# 定义解码器的输入
# 同样的，None表示可以处理任意长度的序列（3层单向LSTM）
# 这里除了真实的answer句子，还拼上了answer的情绪（5维的向量）
decoder_inputs = Input(shape=(None, train_x.shape[2]+train_e.shape[1]), name='decoder_inputs')

# 接下来建立解码器，解码器将返回整个输出序列（3层单向LSTM）
# 并且返回其中间状态，中间状态在训练阶段不会用到，但是在推理阶段将是有用的
decoder_lstm1 = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.2, name='decoder_lstm1')(decoder_inputs, initial_state=encoder_state1)
decoder_lstm2 = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.2, name='decoder_lstm2')(decoder_lstm1[0], initial_state=encoder_state2)
decoder_lstm3 = LSTM(latent_dim, return_sequences=True, return_state=True, dropout=0.2, name='decoder_lstm3')(decoder_lstm2[0], initial_state=encoder_state3)

# 将编码器输出的状态作为初始解码器的初始状态
decoder_outputs, _, _ = decoder_lstm3

# 添加全连接层
decoder_dense = Dense(train_x.shape[2], activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

In [6]:
# 定义整个模型
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
# 编译模型
# model.compile(optimizer='adadelta', loss='categorical_crossentropy')
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

In [7]:
# 将emotion数据扩展维度到 (batchSize, 50, 5)
def expand_emotion_dim(e_data, MaxLen):
    t = np.expand_dims(e_data, axis=1)
    t = t.tolist()
    for i in range(len(t)):
        for j in range(MaxLen-1):
            t[i].append(t[i][0])
    return np.array(t)

In [8]:
train_e = expand_emotion_dim(train_e, 50)
# 这里将真实的answer句子和answer的情绪拼成一个向量
# (只有decoder_input需要，decoder_target(decoder_output进入dense)不用，softmax中不需要拼接情绪向量)
# y1是decoder_input(最前面是SOS)
concat_y1_emotion = np.concatenate([train_y1,train_e], axis=2)
# y2是decoder_output(最后面是EOS)

**decoder_target_data 与 decoder_input_data 相同，但是有一个时间的偏差。 decoder_target_data[:, t, :] 与decoder_input_data[:, t+1, :]相同**
<br>
decoder_target_data比与decoder_input_data多了一个EOS<br>
decoder_input_data比与decoder_target_data多了一个SOS

In [9]:
concat_y1_emotion.shape

(2410, 50, 3130)

In [12]:
checkpoint = ModelCheckpoint('../models/seq2seq_model_3layers.hdf5', monitor='val_loss', verbose=1, save_best_only=True, mode='auto')
early_stopping = EarlyStopping(monitor='val_loss', patience=50)

In [13]:
model.fit([train_x, concat_y1_emotion], train_y2,
          batch_size=batch_size,
          epochs = 120,
          callbacks=[early_stopping, checkpoint],
          validation_split=0.2)

Train on 1928 samples, validate on 482 samples
Epoch 1/120

Epoch 00001: val_loss improved from inf to 1.40866, saving model to ../models/seq2seq_model_3layers_1.hdf5
Epoch 2/120

Epoch 00002: val_loss improved from 1.40866 to 1.39566, saving model to ../models/seq2seq_model_3layers_1.hdf5
Epoch 3/120

Epoch 00003: val_loss improved from 1.39566 to 1.39179, saving model to ../models/seq2seq_model_3layers_1.hdf5
Epoch 4/120

Epoch 00004: val_loss did not improve from 1.39179
Epoch 5/120

Epoch 00005: val_loss did not improve from 1.39179
Epoch 6/120

Epoch 00006: val_loss did not improve from 1.39179
Epoch 7/120

Epoch 00007: val_loss did not improve from 1.39179
Epoch 8/120

Epoch 00008: val_loss did not improve from 1.39179
Epoch 9/120

Epoch 00009: val_loss did not improve from 1.39179
Epoch 10/120

Epoch 00010: val_loss improved from 1.39179 to 1.37847, saving model to ../models/seq2seq_model_3layers_1.hdf5
Epoch 11/120

Epoch 00011: val_loss did not improve from 1.37847
Epoch 12/12

<keras.callbacks.callbacks.History at 0x1991ce80>

In [14]:
model.summary()

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_inputs (InputLayer)     (None, None, 3125)   0                                            
__________________________________________________________________________________________________
decoder_inputs (InputLayer)     (None, None, 3130)   0                                            
__________________________________________________________________________________________________
encoder_lstm1 (LSTM)            [(None, None, 256),  3463168     encoder_inputs[0][0]             
__________________________________________________________________________________________________
decoder_lstm1 (LSTM)            [(None, None, 256),  3468288     decoder_inputs[0][0]             
                                                                 encoder_lstm1[0][1]        

In [15]:
from numba import cuda
cuda.select_device(0)
cuda.close()