In [1]:
import tensorflow as tf
import numpy as np

### 定义rnn解码部分

In [2]:
def rnn_decoder_with_attention(decoder_inputs, initial_state, cell, attention_states, batch_size):
    '''
    rnn解码过程——
      对decoder_inputs中的每个input:
        1.计算attention: context_vector = softmax(query * key) * value
        2.rnn cell运行: output, state = cell(input, state, context_vector)
    输入——
      decoder_inputs: 一个2D-tensor的列表，每个tensor的shape=[batch_size, input_size]
      initial_state: cell的初始状态，此处使用的是由encoder最后一个输出计算的结果，shape=[batch_size, cell.state_size]，
                     此处state_size=hidden_size * 2
      cell: 使用的rnn_cell，此处使用带attention的双向gru cell
      loop_function: 测试的时候使用的，TODO
      attention_states: encoder的outputs，是一个3D-tensor，shape=[batch_size, sentence_len, hidden_size * 2]
    '''
    #定义一些量
    state = initial_state
    attention_states_origin = attention_states #[batch_size, sentence_len, state_size]
    _, sentence_len, state_size = attention_states.get_shape().as_list()
    outputs = []
    for idx in range(len(decoder_inputs)):
        #训练时把上一轮的state作为输入
        #1.计算attention
#         Q = tf.get_variable('Q', shape=[state_size, state_size], initializer=tf.random_normal_initializer(stddev=0.1))
#         K = tf.get_variable('K', shape=[state_size, state_size], initializer=tf.random_normal_initializer(stddev=0.1))
#         V = tf.get_variable('V', shape=[state_size, state_size], initializer=tf.random_normal_initializer(stddev=0.1))
        query = tf.reshape(state, (-1, 1, state_size)) #[batch_size, 1, state_size]
        key = attention_states #[batch_size, state_size, sentence_len]
        attention_logits = [tf.matmul(query[i], tf.transpose(key[i])) for i in range(batch_size)] #batch中的每个query与输入的key计算匹配程度
        attention_logits = tf.random_normal([batch_size, sentence_len])
        attention_logits = tf.reshape(attention_logits, [batch_size, sentence_len]) #[batch_size, sentence_len]
        p_attention = tf.nn.softmax(attention_logits) #计算概率，[batch_size, sentence_len]
        p_attention = tf.reshape(p_attention, [batch_size, sentence_len, 1])
        context_vector = tf.multiply(attention_states_origin, p_attention) #按位乘，在第三维自动扩展，[batch_size, sentence_len, state_size]
        context_vector = tf.reduce_sum(context_vector, axis=1) #[batch_size, state_size]
        #2.运行cell
        output, state = cell(decoder_inputs[idx], state, context_vector)
        outputs.append(output)
    return outputs, state

### 定义seq2seq

In [3]:
class Seq2Seq_with_attention:
    def __init__(self, num_classes, batch_size, sentence_len, 
                 embed_size, vocab_size, hidden_size, learning_rate, 
                 decay_steps, decay_rate, is_training=True, decode_sent_length=5):
        #初始化hyperparameter
        self.num_classes = num_classes
        self.batch_size = batch_size
        self.sentence_len = sentence_len
        self.embed_size = embed_size
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.learning_rate = learning_rate
        self.is_training = is_training
        self.decode_sent_length = decode_sent_length
        self.initializer = tf.random_normal_initializer(stddev=0.1)
        
        #输入
        self.input_x = tf.placeholder(tf.int32, [None, self.sentence_len], 'input_x')
        self.decoder_input = tf.placeholder(tf.int32, [None, self.decode_sent_length], 'decoder_input') #
        self.input_y = tf.placeholder(tf.int32, [None, self.decode_sent_length], 'input_y')
#         self.dropout_keep_prob = tf.placeholder(tf.float32, name='dropout_keep_prob')
        
        #epoch信息
        self.global_step = tf.Variable(0, trainable=False, dtype=tf.int32, name='global_step')
        self.epoch_step = tf.Variable(0, trainable=False, dtype=tf.int32, name='epoch_step')
        self.epoch_increment = tf.assign(self.epoch_step, tf.add(self.epoch_step, tf.constant(1)))
        self.decay_steps, self.decay_rate = decay_steps, decay_rate
        
        self.init_weight()
        self.logits = self.inference()
        
        self.predictions = tf.argmax(self.logits, axis=2,name='prediction')
        self.loss_val = self.loss()
        self.train_op = self.train()
    
    def inference(self):
        '''
        embedding --> encode using gru --> decode using gru with attention --> linear classifier
        '''
        #1. embedding
        self.embedding_sentence = tf.nn.embedding_lookup(self.Embedding, self.input_x) #[batch_size, sentence_len, embed_size]
        #2. encoder with GRU
        h_t_list_forward = self.gru_cell_run(self.embedding_sentence, self.gru_cell) #list,每个元素为[batch_size, hidden_size]
        h_t_list_backward = self.gru_cell_run(self.embedding_sentence, self.gru_cell, forward=False) 
        encoder_outputs_list = [tf.concat((f,b), axis=1) for f, b in zip(h_t_list_forward, h_t_list_backward)]
        #list,每个元素为[batch_size, hidden_size * 2]
        encoder_outputs = tf.stack(encoder_outputs_list, axis=1) #[batch_size, sentence_len, hidden_size * 2]
        #3. decode using GRU with attention
        #a. 初始化decoder的第一个状态
        initial_state = tf.tanh(tf.matmul(h_t_list_backward[0], self.W_init_state) + self.b_init_state) #[batch_size, hidden_size * 2]
        #b. embedding decoder input
        embedding_decoder_input = tf.nn.embedding_lookup(self.Embedding_label, self.decoder_input) #[batch_size, deccode_len, embed_size]
        embedding_decoder_input_split = tf.split(embedding_decoder_input, self.decode_sent_length, axis=1)
        embedding_decoder_input_list = [tf.squeeze(x,axis=1) for x in embedding_decoder_input_split]#list,每个元素为[batch_size, embed_size]
        #c. run cell with attention
        cell = self.gru_cell_decode
        attention_states = encoder_outputs
        loop_function = None
        outputs, state = rnn_decoder_with_attention(embedding_decoder_input_list, initial_state, 
                                                    cell, attention_states, self.batch_size)
        # outputs: list，每个元素为[batch_size, output_size]，这里output_size = hidden_size*2(只用了一个gru，注意维度)
        decoder_output = tf.stack(outputs, axis=1) #[batch_size, decode_len, hidden_size * 2]
        decoder_output = tf.reshape(decoder_output, [-1, self.hidden_size * 2]) #[batch_size * decode_len, hidden_size * 2]
        logits = tf.matmul(decoder_output, self.W_logits) + self.b_logits   #[batch_size * decode_len, num_classes]
        logits = tf.reshape(logits, [self.batch_size, self.decode_sent_length, self.num_classes])# [batch_size, decode_len, num_classes]
        return logits
        
    def loss(self, l2_lambda=0.001):
        losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.input_y, logits=self.logits)#[batch_size, decode_sen_len]
        loss1 = tf.reduce_mean(tf.reduce_sum(losses, axis=1))
        l2_loss = tf.add_n([tf.nn.l2_loss(t) for t in tf.trainable_variables()]) * l2_lambda
        return loss1 + l2_loss
        
    def train(self):
        learning_rate = tf.train.exponential_decay(self.learning_rate,self.global_step,self.decay_steps,self.decay_rate,True)
        train_op = tf.contrib.layers.optimize_loss(self.loss_val, self.global_step,learning_rate, 'Adam')
        return train_op
    
    def gru_cell(self, X_t, h_t_minus_1):
        #reset gate
        r_t = tf.sigmoid(tf.matmul(h_t_minus_1, self.W_r) + tf.matmul(X_t, self.U_r) + self.b_r) #[batch_size, hidden_size]
        #生成候选h_t
        h_t_cand = tf.tanh(tf.matmul(r_t * h_t_minus_1, self.W_c) + tf.matmul(X_t, self.U_c) + self.b_c) #[batch_size, hidden_size]
        #update gate
        z_t = tf.sigmoid(tf.matmul(h_t_minus_1, self.W_z) + tf.matmul(X_t, self.U_z) + self.b_z) #[batch_size, hidden_size]
        #生成新的h_t
        h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_cand #[batch_size, hidden_size]
        return h_t
        
    def gru_cell_run(self, embedding_sentence, gru_cell, forward=True):
        '''
        input：embedding_sentence: [batch_size, sentence_len, embed_size]
        output：list of h_t,每个h_t的shape: [batch_size, hidden_size]
        '''
        embedding_sentence_split = tf.split(embedding_sentence, self.sentence_len, axis=1) #a list, each element is [batch_size,1,embed_size]
        embedding_sentence_list = [tf.squeeze(x, axis=1) for x in embedding_sentence_split] #a list, each element is [batch_size,embed_size]
        h_t = tf.ones((self.batch_size, self.hidden_size)) #初始化
        h_t_list = []
        if forward == False:
            embedding_sentence_list.reverse()
        for i in range(self.sentence_len):
            h_t = gru_cell(embedding_sentence_list[i], h_t)
            h_t_list.append(h_t)
        if forward == False:
            h_t_list.reverse()
        return h_t_list
    
    def gru_cell_decode(self, X_t, h_t_minus_1, context_vector):
        #reset gate
        r_t = tf.sigmoid(tf.matmul(h_t_minus_1, self.W_r_decode) + tf.matmul(X_t, self.U_r_decode) + self.b_r_decode) #[batch_size, hidden_size * 2]
        #生成候选h_t
        h_t_cand = tf.tanh(tf.matmul(r_t * h_t_minus_1, self.W_c_decode) + tf.matmul(X_t, self.U_c_decode)+ self.b_c_decode) #[batch_size, hidden_size * 2]
        #update gate
        z_t = tf.sigmoid(tf.matmul(h_t_minus_1, self.W_z_decode) + tf.matmul(X_t, self.U_z_decode) + self.b_z_decode) #[batch_size, hidden_size * 2]
        #生成新的h_t
        h_t = (1 - z_t) * h_t_minus_1 + z_t * h_t_cand #[batch_size, hidden_size * 2]
        #print(h_t.get_shape())
        #print(h_t_minus_1.get_shape())
        return h_t, h_t
        #return h_t_minus_1, h_t_minus_1
        
        
    def init_weight(self):
        '''定义所有weights'''
        #单词的embedding和label的embedding，其中label的embedding负责将decoder_input做embedding
        self.Embedding = tf.get_variable('Embedding', [self.vocab_size, self.embed_size], tf.float32, self.initializer)
        self.Embedding_label = tf.get_variable('Embedding_label', [self.num_classes, self.embed_size], tf.float32, self.initializer)
        #将encoder的第一个output映射为decoder的初始state
        self.W_init_state = tf.get_variable('W_init_state', [self.hidden_size, self.hidden_size * 2], tf.float32, self.initializer)
        self.b_init_state = tf.get_variable('b_init_state', [self.hidden_size * 2], tf.float32, self.initializer)
        #将decoder的outputs映射为logits
        self.W_logits = tf.get_variable('W_logits', [self.hidden_size * 2, self.num_classes], tf.float32, self.initializer)
        self.b_logits = tf.get_variable('b_logits', [self.num_classes], tf.float32, self.initializer)
        #encoder的gru门所需参数
            #reset gate
        self.W_r = tf.get_variable('W_r', [self.hidden_size, self.hidden_size], tf.float32, self.initializer)
        self.U_r = tf.get_variable('U_r', [self.embed_size, self.hidden_size], tf.float32, self.initializer)
        self.b_r = tf.get_variable('b_r', [self.hidden_size], tf.float32, self.initializer)
            #生成候选h_t
        self.W_c = tf.get_variable('W_c', [self.hidden_size, self.hidden_size], tf.float32, self.initializer)
        self.U_c = tf.get_variable('U_c', [self.embed_size, self.hidden_size], tf.float32, self.initializer)
        self.b_c = tf.get_variable('b_c', [self.hidden_size], tf.float32, self.initializer)
            #update gate
        self.W_z = tf.get_variable('W_z', [self.hidden_size, self.hidden_size], tf.float32, self.initializer)
        self.U_z = tf.get_variable('U_z', [self.embed_size, self.hidden_size], tf.float32, self.initializer)
        self.b_z = tf.get_variable('b_z', [self.hidden_size], tf.float32, self.initializer)
        #dncoder的gru门所需参数
            #reset gate
        self.W_r_decode = tf.get_variable('W_r_decode', [self.hidden_size * 2, self.hidden_size * 2], tf.float32, self.initializer)
        self.U_r_decode = tf.get_variable('U_r_decode', [self.embed_size, self.hidden_size * 2], tf.float32, self.initializer)
        self.C_r_decode = tf.get_variable('C_r_decode', [self.hidden_size * 2, self.hidden_size * 2], tf.float32, self.initializer)
        self.b_r_decode = tf.get_variable('b_r_decode', [self.hidden_size * 2], tf.float32, self.initializer)
            #生成候选h_t
        self.W_c_decode = tf.get_variable('W_c_decode', [self.hidden_size * 2, self.hidden_size * 2], tf.float32, self.initializer)
        self.U_c_decode = tf.get_variable('U_c_decode', [self.embed_size, self.hidden_size * 2], tf.float32, self.initializer)
        self.C_c_decode = tf.get_variable('C_c_decode', [self.hidden_size * 2, self.hidden_size * 2], tf.float32, self.initializer)
        self.b_c_decode = tf.get_variable('b_c_decode', [self.hidden_size * 2], tf.float32, self.initializer)
            #update gate
        self.W_z_decode = tf.get_variable('W_z_decode', [self.hidden_size * 2, self.hidden_size * 2], tf.float32, self.initializer)
        self.U_z_decode = tf.get_variable('U_z_decode', [self.embed_size, self.hidden_size * 2], tf.float32, self.initializer)
        self.C_z_decode = tf.get_variable('C_z_decode', [self.hidden_size * 2, self.hidden_size * 2], tf.float32, self.initializer)
        self.b_z_decode = tf.get_variable('b_z_decode', [self.hidden_size * 2], tf.float32, self.initializer)

#### 测试

In [4]:
import copy
def test():
    # below is a function test; if you use this for text classifiction, you need to tranform sentence to indices of vocabulary first. then feed data to the graph.
    num_classes = 9+2 #additional two classes:one is for _GO, another is for _END
    learning_rate = 0.0001
    batch_size = 1
    decay_steps = 1000
    decay_rate = 0.9
    sequence_length = 5
    vocab_size = 300
    embed_size = 100 #100
    hidden_size = 100
    is_training = True
    dropout_keep_prob = 1  # 0.5 #num_sentences
    decoder_sent_length=6
    l2_lambda=0.0001
    model = Seq2Seq_with_attention(num_classes, batch_size, sequence_length, 
                 embed_size, vocab_size, hidden_size, learning_rate, 
                 decay_steps, decay_rate, is_training, decoder_sent_length)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(1500):
            #input_x = np.zeros((batch_size, sequence_length),dtype=np.int32) #[None, self.sequence_length]
            label_list=get_unique_labels()
            input_x = np.array([label_list],dtype=np.int32) #[2,3,4,5,6]
            label_list_original=copy.deepcopy(label_list)
            label_list.reverse()
            decoder_input=np.array([[0]+label_list],dtype=np.int32) #[[0,2,3,4,5,6]]
            input_y_label=np.array([label_list+[1]],dtype=np.int32) #[[2,3,4,5,6,1]]
            loss, predict, _ = sess.run([model.loss_val, model.predictions, model.train_op],
                                        feed_dict={model.input_x:input_x,model.decoder_input:decoder_input, model.input_y: input_y_label})
            print(i,"loss:", loss, "label_list_original as input x:",label_list_original,";input_y_label:", input_y_label, "prediction:", predict)
            
import random
def get_unique_labels():
    x=[2,3,4,5,6]
    random.shuffle(x)
    return x

tf.reset_default_graph()
test()

0 loss: 16.538515 label_list_original as input x: [3, 5, 4, 2, 6] ;input_y_label: [[6 2 4 5 3 1]] prediction: [[1 1 7 5 7 7]]
1 loss: 16.948746 label_list_original as input x: [2, 4, 6, 5, 3] ;input_y_label: [[3 5 6 4 2 1]] prediction: [[1 0 8 5 5 5]]
2 loss: 16.605978 label_list_original as input x: [3, 6, 4, 2, 5] ;input_y_label: [[5 2 4 6 3 1]] prediction: [[1 1 7 5 5 5]]
3 loss: 16.61293 label_list_original as input x: [6, 2, 5, 3, 4] ;input_y_label: [[4 3 5 2 6 1]] prediction: [[0 5 5 7 7 5]]
4 loss: 16.431637 label_list_original as input x: [5, 6, 3, 2, 4] ;input_y_label: [[4 2 3 6 5 1]] prediction: [[3 5 4 5 5 5]]
5 loss: 16.191038 label_list_original as input x: [6, 5, 4, 3, 2] ;input_y_label: [[2 3 4 5 6 1]] prediction: [[0 3 0 5 5 5]]
6 loss: 16.36128 label_list_original as input x: [6, 5, 2, 4, 3] ;input_y_label: [[3 4 2 5 6 1]] prediction: [[3 0 5 4 5 5]]
7 loss: 16.53752 label_list_original as input x: [2, 4, 6, 5, 3] ;input_y_label: [[3 5 6 4 2 1]] prediction: [[1 6 5 5 5

87 loss: 11.359814 label_list_original as input x: [3, 5, 2, 4, 6] ;input_y_label: [[6 4 2 5 3 1]] prediction: [[3 2 5 5 1 1]]
88 loss: 10.831002 label_list_original as input x: [6, 5, 2, 4, 3] ;input_y_label: [[3 4 2 5 6 1]] prediction: [[3 4 6 5 1 1]]
89 loss: 11.501481 label_list_original as input x: [4, 2, 6, 5, 3] ;input_y_label: [[3 5 6 2 4 1]] prediction: [[3 6 2 5 1 1]]
90 loss: 11.744295 label_list_original as input x: [3, 6, 4, 5, 2] ;input_y_label: [[2 5 4 6 3 1]] prediction: [[3 2 2 5 1 1]]
91 loss: 11.188267 label_list_original as input x: [4, 6, 5, 2, 3] ;input_y_label: [[3 2 5 6 4 1]] prediction: [[3 6 6 6 1 1]]
92 loss: 11.262507 label_list_original as input x: [6, 4, 5, 3, 2] ;input_y_label: [[2 3 5 4 6 1]] prediction: [[3 4 6 6 1 1]]
93 loss: 10.540778 label_list_original as input x: [5, 4, 6, 2, 3] ;input_y_label: [[3 2 6 4 5 1]] prediction: [[3 6 6 5 1 1]]
94 loss: 11.307965 label_list_original as input x: [4, 2, 5, 3, 6] ;input_y_label: [[6 3 5 2 4 1]] prediction: 

179 loss: 10.283598 label_list_original as input x: [4, 3, 6, 2, 5] ;input_y_label: [[5 2 6 3 4 1]] prediction: [[6 2 4 5 5 1]]
180 loss: 9.928984 label_list_original as input x: [5, 2, 6, 4, 3] ;input_y_label: [[3 4 6 2 5 1]] prediction: [[3 6 6 5 5 1]]
181 loss: 10.087963 label_list_original as input x: [2, 4, 5, 3, 6] ;input_y_label: [[6 3 5 4 2 1]] prediction: [[3 3 5 4 5 1]]
182 loss: 10.612887 label_list_original as input x: [3, 2, 6, 5, 4] ;input_y_label: [[4 5 6 2 3 1]] prediction: [[6 2 2 5 5 1]]
183 loss: 9.93905 label_list_original as input x: [5, 3, 2, 4, 6] ;input_y_label: [[6 4 2 3 5 1]] prediction: [[6 2 2 5 5 1]]
184 loss: 9.827417 label_list_original as input x: [5, 2, 4, 3, 6] ;input_y_label: [[6 3 4 2 5 1]] prediction: [[3 3 4 5 5 1]]
185 loss: 10.437409 label_list_original as input x: [2, 3, 5, 6, 4] ;input_y_label: [[4 6 5 3 2 1]] prediction: [[3 2 5 2 1 1]]
186 loss: 10.24734 label_list_original as input x: [3, 4, 6, 2, 5] ;input_y_label: [[5 2 6 4 3 1]] predictio

269 loss: 10.272279 label_list_original as input x: [5, 4, 2, 6, 3] ;input_y_label: [[3 6 2 4 5 1]] prediction: [[2 4 4 3 3 1]]
270 loss: 9.166684 label_list_original as input x: [6, 5, 4, 3, 2] ;input_y_label: [[2 3 4 5 6 1]] prediction: [[2 4 4 6 3 1]]
271 loss: 8.925596 label_list_original as input x: [6, 3, 4, 2, 5] ;input_y_label: [[5 2 4 3 6 1]] prediction: [[2 2 4 6 6 1]]
272 loss: 9.863752 label_list_original as input x: [2, 6, 5, 3, 4] ;input_y_label: [[4 3 5 6 2 1]] prediction: [[2 2 6 6 3 1]]
273 loss: 8.531391 label_list_original as input x: [6, 3, 5, 4, 2] ;input_y_label: [[2 4 5 3 6 1]] prediction: [[2 4 5 3 6 1]]
274 loss: 10.436214 label_list_original as input x: [4, 2, 6, 3, 5] ;input_y_label: [[5 3 6 2 4 1]] prediction: [[2 2 4 5 5 1]]
275 loss: 10.23581 label_list_original as input x: [2, 5, 4, 3, 6] ;input_y_label: [[6 3 4 5 2 1]] prediction: [[2 4 4 5 3 1]]
276 loss: 10.288704 label_list_original as input x: [2, 4, 6, 5, 3] ;input_y_label: [[3 5 6 4 2 1]] predictio

358 loss: 9.71888 label_list_original as input x: [6, 4, 3, 2, 5] ;input_y_label: [[5 2 3 4 6 1]] prediction: [[3 2 4 6 5 1]]
359 loss: 9.723031 label_list_original as input x: [3, 6, 4, 5, 2] ;input_y_label: [[2 5 4 6 3 1]] prediction: [[3 4 6 6 5 1]]
360 loss: 10.101971 label_list_original as input x: [2, 3, 4, 5, 6] ;input_y_label: [[6 5 4 3 2 1]] prediction: [[3 3 2 2 5 1]]
361 loss: 10.177777 label_list_original as input x: [6, 2, 3, 4, 5] ;input_y_label: [[5 4 3 2 6 1]] prediction: [[3 2 2 6 5 1]]
362 loss: 9.230056 label_list_original as input x: [2, 5, 3, 6, 4] ;input_y_label: [[4 6 3 5 2 1]] prediction: [[3 2 2 5 2 1]]
363 loss: 9.816792 label_list_original as input x: [6, 2, 3, 5, 4] ;input_y_label: [[4 5 3 2 6 1]] prediction: [[3 2 2 6 5 1]]
364 loss: 8.714923 label_list_original as input x: [5, 2, 6, 4, 3] ;input_y_label: [[3 4 6 2 5 1]] prediction: [[3 6 2 5 5 1]]
365 loss: 9.710245 label_list_original as input x: [3, 2, 4, 5, 6] ;input_y_label: [[6 5 4 2 3 1]] prediction:

446 loss: 8.501342 label_list_original as input x: [2, 5, 6, 3, 4] ;input_y_label: [[4 3 6 5 2 1]] prediction: [[5 3 6 2 2 1]]
447 loss: 9.429948 label_list_original as input x: [6, 2, 5, 4, 3] ;input_y_label: [[3 4 5 2 6 1]] prediction: [[5 6 6 2 3 1]]
448 loss: 9.095272 label_list_original as input x: [4, 6, 3, 5, 2] ;input_y_label: [[2 5 3 6 4 1]] prediction: [[5 4 3 6 4 1]]
449 loss: 8.753942 label_list_original as input x: [5, 3, 4, 6, 2] ;input_y_label: [[2 6 4 3 5 1]] prediction: [[6 4 3 3 5 1]]
450 loss: 8.835999 label_list_original as input x: [5, 3, 2, 6, 4] ;input_y_label: [[4 6 2 3 5 1]] prediction: [[6 3 3 3 5 1]]
451 loss: 8.680359 label_list_original as input x: [3, 6, 4, 2, 5] ;input_y_label: [[5 2 4 6 3 1]] prediction: [[5 6 4 3 3 1]]
452 loss: 8.971254 label_list_original as input x: [3, 5, 4, 6, 2] ;input_y_label: [[2 6 4 5 3 1]] prediction: [[5 6 3 3 3 1]]
453 loss: 8.974264 label_list_original as input x: [6, 5, 3, 4, 2] ;input_y_label: [[2 4 3 5 6 1]] prediction: 

538 loss: 8.955812 label_list_original as input x: [5, 3, 4, 6, 2] ;input_y_label: [[2 6 4 3 5 1]] prediction: [[4 4 3 3 5 1]]
539 loss: 8.795486 label_list_original as input x: [4, 5, 6, 3, 2] ;input_y_label: [[2 3 6 5 4 1]] prediction: [[5 5 6 5 4 1]]
540 loss: 8.607666 label_list_original as input x: [4, 2, 5, 6, 3] ;input_y_label: [[3 6 5 2 4 1]] prediction: [[5 5 2 2 4 1]]
541 loss: 8.916233 label_list_original as input x: [3, 2, 6, 4, 5] ;input_y_label: [[5 4 6 2 3 1]] prediction: [[5 2 2 2 3 1]]
542 loss: 8.879765 label_list_original as input x: [3, 4, 6, 5, 2] ;input_y_label: [[2 5 6 4 3 1]] prediction: [[5 5 3 4 3 1]]
543 loss: 9.018264 label_list_original as input x: [3, 6, 4, 5, 2] ;input_y_label: [[2 5 4 6 3 1]] prediction: [[5 4 3 6 3 1]]
544 loss: 8.493816 label_list_original as input x: [6, 5, 4, 2, 3] ;input_y_label: [[3 2 4 5 6 1]] prediction: [[4 5 4 6 6 1]]
545 loss: 8.157564 label_list_original as input x: [4, 6, 3, 5, 2] ;input_y_label: [[2 5 3 6 4 1]] prediction: 

627 loss: 7.7041793 label_list_original as input x: [4, 2, 5, 3, 6] ;input_y_label: [[6 3 5 2 4 1]] prediction: [[6 3 5 2 4 1]]
628 loss: 7.8359065 label_list_original as input x: [5, 3, 2, 4, 6] ;input_y_label: [[6 4 2 3 5 1]] prediction: [[6 2 2 5 5 1]]
629 loss: 7.997002 label_list_original as input x: [3, 4, 5, 6, 2] ;input_y_label: [[2 6 5 4 3 1]] prediction: [[6 6 5 3 3 1]]
630 loss: 7.6659255 label_list_original as input x: [6, 5, 3, 4, 2] ;input_y_label: [[2 4 3 5 6 1]] prediction: [[2 4 5 5 6 1]]
631 loss: 8.13455 label_list_original as input x: [2, 5, 6, 3, 4] ;input_y_label: [[4 3 6 5 2 1]] prediction: [[3 3 5 5 2 1]]
632 loss: 7.875415 label_list_original as input x: [6, 3, 5, 4, 2] ;input_y_label: [[2 4 5 3 6 1]] prediction: [[2 4 5 3 6 1]]
633 loss: 8.080778 label_list_original as input x: [3, 4, 2, 5, 6] ;input_y_label: [[6 5 2 4 3 1]] prediction: [[6 2 2 4 3 1]]
634 loss: 8.444984 label_list_original as input x: [2, 6, 4, 5, 3] ;input_y_label: [[3 5 4 6 2 1]] prediction

712 loss: 7.6537375 label_list_original as input x: [2, 5, 4, 3, 6] ;input_y_label: [[6 3 4 5 2 1]] prediction: [[4 3 5 2 2 1]]
713 loss: 8.0545845 label_list_original as input x: [3, 2, 4, 5, 6] ;input_y_label: [[6 5 4 2 3 1]] prediction: [[6 5 2 2 3 1]]
714 loss: 7.059158 label_list_original as input x: [2, 5, 6, 3, 4] ;input_y_label: [[4 3 6 5 2 1]] prediction: [[4 3 5 2 2 1]]
715 loss: 7.8753095 label_list_original as input x: [3, 5, 6, 4, 2] ;input_y_label: [[2 4 6 5 3 1]] prediction: [[6 4 5 5 3 1]]
716 loss: 7.801469 label_list_original as input x: [4, 5, 6, 2, 3] ;input_y_label: [[3 2 6 5 4 1]] prediction: [[3 6 6 5 4 1]]
717 loss: 8.002236 label_list_original as input x: [2, 3, 4, 5, 6] ;input_y_label: [[6 5 4 3 2 1]] prediction: [[4 3 3 2 2 1]]
718 loss: 7.4806743 label_list_original as input x: [2, 5, 4, 3, 6] ;input_y_label: [[6 3 4 5 2 1]] prediction: [[3 3 5 5 2 1]]
719 loss: 7.762645 label_list_original as input x: [6, 2, 4, 3, 5] ;input_y_label: [[5 3 4 2 6 1]] predicti

800 loss: 6.2999377 label_list_original as input x: [3, 4, 2, 5, 6] ;input_y_label: [[6 5 2 4 3 1]] prediction: [[6 2 2 3 3 1]]
801 loss: 7.0027122 label_list_original as input x: [4, 2, 6, 3, 5] ;input_y_label: [[5 3 6 2 4 1]] prediction: [[5 2 6 2 4 1]]
802 loss: 7.7156277 label_list_original as input x: [5, 6, 3, 4, 2] ;input_y_label: [[2 4 3 6 5 1]] prediction: [[6 6 6 6 5 1]]
803 loss: 7.4729576 label_list_original as input x: [3, 5, 6, 2, 4] ;input_y_label: [[4 2 6 5 3 1]] prediction: [[6 2 5 3 3 1]]
804 loss: 7.0127525 label_list_original as input x: [5, 4, 3, 6, 2] ;input_y_label: [[2 6 3 4 5 1]] prediction: [[6 6 3 5 5 1]]
805 loss: 6.1408505 label_list_original as input x: [4, 3, 5, 6, 2] ;input_y_label: [[2 6 5 3 4 1]] prediction: [[6 6 5 3 4 1]]
806 loss: 7.3547463 label_list_original as input x: [6, 3, 5, 2, 4] ;input_y_label: [[4 2 5 3 6 1]] prediction: [[5 5 5 3 6 1]]
807 loss: 6.9940023 label_list_original as input x: [2, 4, 3, 6, 5] ;input_y_label: [[5 6 3 4 2 1]] pred

894 loss: 5.7153845 label_list_original as input x: [5, 3, 4, 2, 6] ;input_y_label: [[6 2 4 3 5 1]] prediction: [[6 2 3 3 5 1]]
895 loss: 6.583956 label_list_original as input x: [6, 5, 3, 4, 2] ;input_y_label: [[2 4 3 5 6 1]] prediction: [[2 4 3 5 6 1]]
896 loss: 6.282055 label_list_original as input x: [2, 4, 6, 5, 3] ;input_y_label: [[3 5 6 4 2 1]] prediction: [[5 5 6 4 2 1]]
897 loss: 6.270211 label_list_original as input x: [4, 2, 3, 5, 6] ;input_y_label: [[6 5 3 2 4 1]] prediction: [[6 5 3 4 4 1]]
898 loss: 5.781111 label_list_original as input x: [4, 5, 3, 6, 2] ;input_y_label: [[2 6 3 5 4 1]] prediction: [[6 6 3 5 4 1]]
899 loss: 5.77731 label_list_original as input x: [6, 2, 5, 3, 4] ;input_y_label: [[4 3 5 2 6 1]] prediction: [[3 3 5 2 6 1]]
900 loss: 6.705113 label_list_original as input x: [5, 2, 3, 4, 6] ;input_y_label: [[6 4 3 2 5 1]] prediction: [[6 3 3 5 5 1]]
901 loss: 6.410775 label_list_original as input x: [6, 3, 5, 4, 2] ;input_y_label: [[2 4 5 3 6 1]] prediction: 

987 loss: 5.5013924 label_list_original as input x: [6, 3, 5, 4, 2] ;input_y_label: [[2 4 5 3 6 1]] prediction: [[2 5 5 3 6 1]]
988 loss: 4.798729 label_list_original as input x: [4, 3, 5, 6, 2] ;input_y_label: [[2 6 5 3 4 1]] prediction: [[2 6 5 3 4 1]]
989 loss: 5.4514675 label_list_original as input x: [6, 2, 4, 5, 3] ;input_y_label: [[3 5 4 2 6 1]] prediction: [[3 5 4 6 6 1]]
990 loss: 5.513541 label_list_original as input x: [3, 5, 4, 6, 2] ;input_y_label: [[2 6 4 5 3 1]] prediction: [[2 6 5 5 3 1]]
991 loss: 5.7760506 label_list_original as input x: [6, 4, 3, 2, 5] ;input_y_label: [[5 2 3 4 6 1]] prediction: [[2 2 3 6 6 1]]
992 loss: 5.624931 label_list_original as input x: [4, 5, 2, 3, 6] ;input_y_label: [[6 3 2 5 4 1]] prediction: [[6 2 2 5 4 1]]
993 loss: 6.2131786 label_list_original as input x: [6, 4, 5, 2, 3] ;input_y_label: [[3 2 5 4 6 1]] prediction: [[3 5 5 6 6 1]]
994 loss: 5.909641 label_list_original as input x: [2, 5, 3, 6, 4] ;input_y_label: [[4 6 3 5 2 1]] predicti

1078 loss: 4.607481 label_list_original as input x: [5, 2, 4, 3, 6] ;input_y_label: [[6 3 4 2 5 1]] prediction: [[6 3 4 2 5 1]]
1079 loss: 5.1482377 label_list_original as input x: [3, 2, 4, 5, 6] ;input_y_label: [[6 5 4 2 3 1]] prediction: [[6 5 2 2 3 1]]
1080 loss: 4.9727473 label_list_original as input x: [6, 4, 2, 5, 3] ;input_y_label: [[3 5 2 4 6 1]] prediction: [[3 5 2 4 6 1]]
1081 loss: 5.491071 label_list_original as input x: [5, 6, 4, 3, 2] ;input_y_label: [[2 3 4 6 5 1]] prediction: [[2 4 4 6 5 1]]
1082 loss: 4.906923 label_list_original as input x: [6, 4, 2, 5, 3] ;input_y_label: [[3 5 2 4 6 1]] prediction: [[3 5 2 4 6 1]]
1083 loss: 5.6907363 label_list_original as input x: [3, 5, 6, 4, 2] ;input_y_label: [[2 4 6 5 3 1]] prediction: [[6 6 6 5 3 1]]
1084 loss: 5.279315 label_list_original as input x: [4, 6, 2, 3, 5] ;input_y_label: [[5 3 2 6 4 1]] prediction: [[5 3 2 6 4 1]]
1085 loss: 4.5353427 label_list_original as input x: [2, 4, 5, 3, 6] ;input_y_label: [[6 3 5 4 2 1]] 

1167 loss: 4.2492433 label_list_original as input x: [3, 4, 5, 2, 6] ;input_y_label: [[6 2 5 4 3 1]] prediction: [[6 2 5 4 3 1]]
1168 loss: 4.041007 label_list_original as input x: [6, 2, 4, 5, 3] ;input_y_label: [[3 5 4 2 6 1]] prediction: [[3 5 4 2 6 1]]
1169 loss: 4.682128 label_list_original as input x: [2, 6, 3, 4, 5] ;input_y_label: [[5 4 3 6 2 1]] prediction: [[5 3 3 6 2 1]]
1170 loss: 4.9392586 label_list_original as input x: [5, 3, 6, 2, 4] ;input_y_label: [[4 2 6 3 5 1]] prediction: [[4 2 6 3 5 1]]
1171 loss: 4.7698627 label_list_original as input x: [4, 5, 6, 2, 3] ;input_y_label: [[3 2 6 5 4 1]] prediction: [[3 2 6 5 4 1]]
1172 loss: 4.229416 label_list_original as input x: [4, 2, 5, 3, 6] ;input_y_label: [[6 3 5 2 4 1]] prediction: [[6 3 5 2 4 1]]
1173 loss: 4.323655 label_list_original as input x: [4, 6, 2, 3, 5] ;input_y_label: [[5 3 2 6 4 1]] prediction: [[3 3 2 6 4 1]]
1174 loss: 4.0031376 label_list_original as input x: [5, 6, 2, 4, 3] ;input_y_label: [[3 4 2 6 5 1]] 

1257 loss: 4.210622 label_list_original as input x: [3, 2, 4, 5, 6] ;input_y_label: [[6 5 4 2 3 1]] prediction: [[6 5 4 2 3 1]]
1258 loss: 4.304081 label_list_original as input x: [2, 5, 4, 6, 3] ;input_y_label: [[3 6 4 5 2 1]] prediction: [[4 6 4 5 2 1]]
1259 loss: 3.7663004 label_list_original as input x: [2, 5, 6, 4, 3] ;input_y_label: [[3 4 6 5 2 1]] prediction: [[4 4 6 5 2 1]]
1260 loss: 3.6296244 label_list_original as input x: [5, 6, 3, 4, 2] ;input_y_label: [[2 4 3 6 5 1]] prediction: [[2 4 3 6 5 1]]
1261 loss: 3.7332432 label_list_original as input x: [3, 2, 6, 5, 4] ;input_y_label: [[4 5 6 2 3 1]] prediction: [[4 5 6 2 3 1]]
1262 loss: 4.318981 label_list_original as input x: [4, 3, 2, 5, 6] ;input_y_label: [[6 5 2 3 4 1]] prediction: [[6 2 2 3 4 1]]
1263 loss: 4.097961 label_list_original as input x: [4, 5, 2, 6, 3] ;input_y_label: [[3 6 2 5 4 1]] prediction: [[3 6 2 5 4 1]]
1264 loss: 3.8789186 label_list_original as input x: [4, 5, 2, 3, 6] ;input_y_label: [[6 3 2 5 4 1]] 

1349 loss: 3.609458 label_list_original as input x: [4, 5, 6, 3, 2] ;input_y_label: [[2 3 6 5 4 1]] prediction: [[2 3 6 5 4 1]]
1350 loss: 3.736047 label_list_original as input x: [5, 4, 2, 6, 3] ;input_y_label: [[3 6 2 4 5 1]] prediction: [[3 6 2 4 5 1]]
1351 loss: 3.5202184 label_list_original as input x: [3, 4, 5, 6, 2] ;input_y_label: [[2 6 5 4 3 1]] prediction: [[2 6 5 4 3 1]]
1352 loss: 3.0759587 label_list_original as input x: [6, 4, 3, 5, 2] ;input_y_label: [[2 5 3 4 6 1]] prediction: [[2 5 3 4 6 1]]
1353 loss: 3.306217 label_list_original as input x: [4, 6, 5, 2, 3] ;input_y_label: [[3 2 5 6 4 1]] prediction: [[3 2 5 6 4 1]]
1354 loss: 3.6401665 label_list_original as input x: [3, 2, 5, 4, 6] ;input_y_label: [[6 4 5 2 3 1]] prediction: [[6 4 5 2 3 1]]
1355 loss: 3.428926 label_list_original as input x: [2, 5, 4, 3, 6] ;input_y_label: [[6 3 4 5 2 1]] prediction: [[6 3 4 5 2 1]]
1356 loss: 3.407991 label_list_original as input x: [2, 6, 5, 4, 3] ;input_y_label: [[3 4 5 6 2 1]] p

1439 loss: 2.858265 label_list_original as input x: [5, 3, 4, 2, 6] ;input_y_label: [[6 2 4 3 5 1]] prediction: [[6 2 4 3 5 1]]
1440 loss: 2.9798517 label_list_original as input x: [4, 6, 2, 5, 3] ;input_y_label: [[3 5 2 6 4 1]] prediction: [[3 5 2 6 4 1]]
1441 loss: 3.1240087 label_list_original as input x: [5, 4, 3, 2, 6] ;input_y_label: [[6 2 3 4 5 1]] prediction: [[6 2 3 4 5 1]]
1442 loss: 2.909051 label_list_original as input x: [2, 3, 6, 5, 4] ;input_y_label: [[4 5 6 3 2 1]] prediction: [[4 5 6 3 2 1]]
1443 loss: 3.226471 label_list_original as input x: [6, 3, 4, 2, 5] ;input_y_label: [[5 2 4 3 6 1]] prediction: [[2 2 4 3 6 1]]
1444 loss: 3.140016 label_list_original as input x: [3, 6, 4, 2, 5] ;input_y_label: [[5 2 4 6 3 1]] prediction: [[5 2 4 6 3 1]]
1445 loss: 2.842613 label_list_original as input x: [6, 5, 3, 4, 2] ;input_y_label: [[2 4 3 5 6 1]] prediction: [[2 4 3 5 6 1]]
1446 loss: 3.011074 label_list_original as input x: [6, 3, 4, 5, 2] ;input_y_label: [[2 5 4 3 6 1]] pr

#### 训练

In [5]:
import sys
import tensorflow as tf
import numpy as np
# from tflearn.data_utils import to_categorical, pad_sequences
import os
import pickle
import h5py

In [6]:
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_integer('batch_size', 256, 'batch_size')
tf.app.flags.DEFINE_integer('num_classes', 1999+3, 'num_classes')
tf.app.flags.DEFINE_integer('sentence_len', 100, 'length of each sentence')
tf.app.flags.DEFINE_integer('embed_size', 100, 'embedding size')
tf.app.flags.DEFINE_integer('hidden_size', 100, 'hidden size')
tf.app.flags.DEFINE_float('learning_rate', 0.01, '')
tf.app.flags.DEFINE_float('decay_rate', 1, '')
tf.app.flags.DEFINE_integer('decay_steps', 10000, 'number of steps before decay learning rate')
tf.app.flags.DEFINE_bool('is_training', True, '')

tf.app.flags.DEFINE_integer('num_epoch', 10, 'number of epoch')

tf.app.flags.DEFINE_string("ckpt_dir","seq2seq_multilabel_checkpoint/","checkpoint location for the model")
tf.app.flags.DEFINE_string("cache_path","seq2seq_multilabel_checkpoint/data_cache.pik","data chche for the model")

In [7]:
import time
def log(str):
    t = time.localtime()
    print("[%4d/%02d/%02d %02d:%02d:%02d]"%(t.tm_year, t.tm_mon, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec), end=' ')
    print(str)

In [8]:
def load_data(h5_file_path, pik_file_path):
    if not os.path.exists(h5_file_path) or not os.path.exists(pik_file_path):
        raise RuntimeError('No such file!!')

    print('cache files exist, going to load in...')
    print('loading h5_file...')
    h5_file = h5py.File(h5_file_path, 'r+')
    print('h5_file.keys:', h5_file.keys())
    train_X, train_Y, train_decoder_Y = h5_file['train_X'], h5_file['train_Y'], h5_file['train_decoder_Y']
    test_X,  test_Y, test_decoder_Y  = h5_file['test_X'],  h5_file['test_Y'], h5_file['test_decoder_Y']
    embedding_final = h5_file['embedding']
    
    print('loading pickle file')
    word2index, label2index = None, None
    with open(pik_file_path, 'rb') as pkl:
        word2index,label2index = pickle.load(pkl)
    print('cache files load successful!')
    return word2index,label2index,train_X, train_Y, train_decoder_Y,test_X,  test_Y, test_decoder_Y, embedding_final

In [43]:
def main(_):
    #1.加载数据
    base_path = '/data/chenhy/data/ieee_zhihu_cup/'
    cache_file_h5py = base_path + 'seq2seq_data.h5'
    cache_file_pickle = base_path + 'seq2seq_w2i_l2i.pkl'
    word2index,label2index,train_X, train_Y, train_decoder_Y,test_X,  test_Y, test_decoder_Y, embedding_final = load_data(cache_file_h5py, cache_file_pickle)
    vocab_size = len(word2index)
    index2word = {index: word for word, index in word2index.items()}
    index2label = {index: label for label, index in label2index.items()}

    print("train_X[0:5]:", train_X[0:5])
    print("train_Y[0:5]:", train_Y[0:5])
    
    #2.创建session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        model = Seq2Seq_with_attention(FLAGS.num_classes, FLAGS.batch_size, FLAGS.sentence_len, FLAGS.embed_size, vocab_size, 
                        FLAGS.hidden_size, FLAGS.learning_rate, FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.is_training)
        saver = tf.train.Saver()
        batch_size = FLAGS.batch_size
        CONTINUE_TRAIN = True
        if os.path.exists(FLAGS.ckpt_dir + 'checkpoint'):
            log("restore from checkpoint")
            saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir))
            if CONTINUE_TRAIN: log("continue training...")
        else:
            log('init variables')
            sess.run(tf.global_variables_initializer())
            #是否使用embedding
            print('assign pre-trained embedding')
            embedding_assign = tf.assign(model.Embedding, tf.constant(np.array(embedding_final))) #为model.Embedding赋值
            sess.run(embedding_assign)
        if not os.path.exists(FLAGS.ckpt_dir + 'checkpoint') or CONTINUE_TRAIN:
            num_of_data = len(train_Y)
            for _ in range(FLAGS.num_epoch):
                epoch = sess.run(model.epoch_step)
                loss, counter = 0., 0.
                for start, end in zip(range(0, num_of_data, batch_size), range(batch_size, num_of_data, batch_size)):
                    loss_tmp,  pre, _ = sess.run([model.loss_val, model.predictions, model.train_op], 
                                                    feed_dict={model.input_x: train_X[start:end], 
                                                               model.input_y: train_Y[start:end],
                                                               model.decoder_input: train_decoder_Y[start:end]})
                    loss, counter = loss + loss_tmp, counter + 1
                    if counter % 100 == 0:
                        log("Epoch %d\Batch %d\ Train Loss:%.3f"%(epoch, counter, loss/float(counter)))
#                     if counter % 1 == 0:
#                         print('run model on validation data...')
# #                         loss_valid, acc = do_eval(sess, model, test_X, test_Y, test_decoder_Y)
#                         lo,pre = sess.run([model.loss_val, model.predictions], 
#                                                 feed_dict={model.input_x: test_X[start:end], 
#                                                            model.input_y: test_Y[start:end],
#                                                            model.decoder_input: test_decoder_Y[start:end]})
#                         loss_valid, acc = do_eval(sess, model, train_X[:30], train_Y[:30], train_decoder_Y[:30])
#                         log("Epoch %d/ Validation Loss:%.3f/ Accuracy:%.3f"%(epoch, loss_valid, acc))
#                         #save the checkpoint
#                         save_path = FLAGS.ckpt_dir + 'model.ckpt'
#                         saver.save(sess, save_path, global_step=model.epoch_step)
                sess.run(model.epoch_increment)
            
    

In [44]:
def do_eval(sess, model, test_X, test_Y, test_decoder_Y):
    test_X, test_Y, test_decoder_Y = test_X[:3], test_Y[:3], test_decoder_Y[:3]
    num_of_data = 3
    batch_size = 1
    right, wrong = 0., 0.
    for start in range(num_of_data):
        end = start + 1
        lo,pre,_ = sess.run([model.loss_val, model.predictions,model.train_op], 
                        feed_dict={model.input_x: test_X[start:end], 
                                   model.input_y: test_Y[start:end],
                                   model.decoder_input: test_decoder_Y[start:end]})
        loss += lo
        
        label = test_y[start]
#         pre = np.argsort(logits[0])[-5:]
#         label = [i for i in range(len(test_y[start])) if test_y[start][i] > 0]
        if start == 0: print('label:',label, 'predict:', pre)
        right += len([x for x in pre if x in label and x != 0])
        wrong += len([x for x in pre if x not in label and x != 0])
    return loss/num_of_data, right / (right + wrong)

In [None]:
tf.reset_default_graph()
tf.app.run()

cache files exist, going to load in...
loading h5_file...
h5_file.keys: KeysView(<HDF5 file "seq2seq_data.h5" (mode r+)>)
loading pickle file
cache files load successful!
train_X[0:5]: [[  26  121  414   41  115  115  171    4    7  402  560   26   12  139
   141  865  265    9 1939    6  401  644  713  439   10   90  203   13
     4    7   61   27   10    2  517    5  374  100   51   40    8    5
    52   61   27    9 1435 1705   68    5   19  482   51   33    5   15
    51   19    5   52  101  399  283  115   10    5   30   15   15   49
   179   32   32   33   33   33   29  245   30   24   30  100   29   36
    51   84   32  238  100   18   46   15   24   51   77   32   14   43
    38   22]
 [ 104  137    5   25   29   22    5  516 1386  191  661    6  188  909
  1605  503   52   61   27    9   54   58   78 1411  642  262   60  137
   103   10    2  537   90   58   20   67   13   61   27  357  595   10
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    