双端 GRU 模型，对所有的 timestep 采用attention 模型输出。

把 title 和 content 两部分分别放在两个网络中，在最后的 softmax 层之前拼起来。由于两部分的序列长度差的太多，所以跑起来会比较慢

参考代码： 

https://github.com/yongyehuang/deep-text-classifier/blob/master/HAN_model.py 其中attention最后加权的地方应该写错了

https://github.com/indiejoseph/doc-han-att/blob/master/model.py

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
from gensim.models import KeyedVectors
import pickle
from tqdm import tqdm

# 导入预训练好的词向量
%time W_embedding = np.load('../data/W_embedding.npy')

# 模型保存位置
import os
model_save_path = '../ckpt/attention-bigru-title-content/'  # 模型保存位置
if not os.path.exists(model_save_path):
    os.makedirs(model_save_path)
model_save_path = model_save_path + 'bi-gru.ckpt'

# summary 位置
summary_path = '../summary/attention-bigru-title-content/'
if not os.path.exists(summary_path):
    os.makedirs(summary_path)
result_path = '../result/attention-gru-title-content-256.csv'
predict_scores_path = '../scores/attention-gru-title-content-256.npy' 

# ##################### config ######################
n_step1 = max_len1 = 30           # 句子长度
n_step2 = max_len2 = 100
input_size = embedding_size = 256       # 字向量长度
n_class = 1999
hidden_size = 256    # 隐含层节点数
n_layer = 2        # bi-gru 层数
l2_lambda = 1e-4
max_grad_norm = 10.0  # 最大梯度（超过此值的梯度将被裁剪）

CPU times: user 36 ms, sys: 656 ms, total: 692 ms
Wall time: 8.25 s


In [7]:
result_path = '../result/attention-gru-title-content-256.csv'
predict_scores_path = '../scores/attention-gru-title-content-256.npy' 

In [2]:
import tensorflow as tf
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
from tensorflow.contrib import rnn
import tensorflow.contrib.layers as layers

'''
双端 GRU，知乎问题多标签分类。
'''

lr = tf.placeholder(tf.float32)
keep_prob = tf.placeholder(tf.float32, [])
batch_size = tf.placeholder(tf.int32, [])  # 注意类型必须为 tf.int32

def weight_variable(shape, name):
    """Create a weight variable with appropriate initialization."""
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name=name)

def bias_variable(shape, name):
    """Create a bias variable with appropriate initialization."""
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial, name=name)

X1_inputs = tf.placeholder(tf.int64, [None, n_step1], name='X1_input')
X2_inputs = tf.placeholder(tf.int64, [None, n_step2], name='X2_input')
y_inputs = tf.placeholder(tf.float32, [None, n_class], name='y_input')    

def gru_cell():
    with tf.name_scope('gru_cell'):
        cell = rnn.GRUCell(hidden_size, reuse=tf.get_variable_scope().reuse)
    return rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)

# with tf.device('/cpu:0'):   # 把 embedding 放在 CPU上
embedding = tf.get_variable(name="embedding", shape=W_embedding.shape, 
                        initializer=tf.constant_initializer(W_embedding), trainable=True)   # fine-tune
    
def bi_gru(X_inputs):
    """build the bi-GRU network. 返回个最后一层的隐含状态。"""
    inputs = tf.nn.embedding_lookup(embedding, X_inputs)  
    cells_fw = [gru_cell() for _ in range(n_layer)]
    cells_bw = [gru_cell() for _ in range(n_layer)]
    initial_states_fw = [cell_fw.zero_state(batch_size, tf.float32) for cell_fw in cells_fw]
    initial_states_bw = [cell_bw.zero_state(batch_size, tf.float32) for cell_bw in cells_bw] 
    outputs, _, _ = rnn.stack_bidirectional_dynamic_rnn(cells_fw, cells_bw, inputs, 
                        initial_states_fw = initial_states_fw, initial_states_bw = initial_states_bw, dtype=tf.float32)
    return outputs
    

def task_specific_attention(inputs, output_size,
                            initializer=layers.xavier_initializer(),
                            activation_fn=tf.tanh, scope=None):
    """
    Performs task-specific attention reduction, using learned
    attention context vector (constant within task of interest).
    Args:
        inputs: Tensor of shape [batch_size, units, input_size]
            `input_size` must be static (known)
            `units` axis will be attended over (reduced from output)
            `batch_size` will be preserved
        output_size: Size of output's inner (feature) dimension
    Returns:
        outputs: Tensor of shape [batch_size, output_dim].
    """
    assert len(inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None
    with tf.variable_scope(scope or 'attention') as scope:
        # u_w, attention 向量
        attention_context_vector = tf.get_variable(name='attention_context_vector',
                                                   shape=[output_size],
                                                   initializer=initializer,
                                                   dtype=tf.float32)
        # 全连接层，把 h_i 转为 u_i ， shape= [batch_size, units, input_size] -> [batch_size, units, output_size]
        input_projection = layers.fully_connected(inputs, output_size,
                                                  activation_fn=activation_fn,
                                                  scope=scope)
        # 输出 [batch_size, units]
        vector_attn = tf.reduce_sum(tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True)
        attention_weights = tf.nn.softmax(vector_attn, dim=1)   # 输出 [batch_size, units]
        tf.summary.histogram('attention_weigths', attention_weights)
        weighted_projection = tf.multiply(inputs, attention_weights)   # ???源代码,原文应该是对 hi 加权！！
        outputs = tf.reduce_sum(weighted_projection, axis=1)
        return outputs

    
with tf.variable_scope('bi_gru_title'):
    word_encoder_title = bi_gru(X1_inputs)      # title 部分输出
    output_title = task_specific_attention(word_encoder_title, hidden_size*2)
with tf.variable_scope('bi_gru_content'):
    word_encoder_content = bi_gru(X2_inputs)    # content 部分输出
    output_content = task_specific_attention(word_encoder_content, hidden_size*2)
    
output = tf.concat([output_title, output_content], axis=1)
W_out = weight_variable([hidden_size * 4, n_class], name='Weight_out') 
tf.summary.histogram('W_out', W_out)
b_out = bias_variable([n_class], name='bias_out') 
tf.summary.histogram('b_out', b_out)
y_pred = tf.nn.xw_plus_b(output, W_out, b_out, name='scores')  #每个类别的分数 scores


with tf.name_scope('cost'):
    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_pred, labels=y_inputs))
    tf.summary.scalar('cost', cost)
    
# ***** 优化求解 *******
# 获取模型的所有参数
tvars = tf.trainable_variables()
# 获取损失函数对于每个参数的梯度
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), max_grad_norm)
# 优化器
optimizer = tf.train.AdamOptimizer(learning_rate=lr)
train_op = optimizer.apply_gradients( zip(grads, tvars),
    global_step=tf.contrib.framework.get_or_create_global_step())

merged = tf.summary.merge_all() # summary
train_writer = tf.summary.FileWriter(summary_path + 'train', sess.graph)
# test_writer = tf.summary.FileWriter( '../summary/attention-bigru-title/test')
print 'Finished creating the bi-gru model.'

Finished creating the bi-gru model.


## 导入数据

In [3]:
import time
import sys
sys.path.append('..')
from data_helpers import BatchGenerator
from data_helpers import to_categorical

save_path = '../data/'
print('loading data...')
time0 = time.time()
X_title = np.load(save_path+'X_tr_title.npy')
X_content = np.load(save_path+'X_tr_content.npy')
X = np.hstack([X_title, X_content])
y = np.load(save_path+'y_tr.npy')
print('finished loading data, time cost %g' % (time.time() - time0))
# 划分验证集
sample_num = X.shape[0]
valid_num = 100000
np.random.seed(13)
new_index = np.random.permutation(sample_num)
X = X[new_index]
y = y[new_index]
X_valid = X[:valid_num]
# y_valid = y[:valid_num]
# # X_train = X[valid_num:]
# # y_train = y[valid_num:]
# X_train = X[valid_num:]
# y_train = y[valid_num:]
# print('train_num=%d, valid_num=%d' % (X_train.shape[0], X_valid.shape[0]))

# # 构建数据生成器
# data_train = BatchGenerator(X_train, y_train, shuffle=True)
# data_valid = BatchGenerator(X_valid, y_valid, shuffle=False)

# print('X_train.shape=', X_train.shape)
# print('X_valid.shape=', X_valid.shape)
# print('y_train.shape=', y_train.shape)
# print('y_valid.shape=', y_valid.shape)

loading data...
finished loading data, time cost 44.5888


In [4]:
import sys
sys.path.append('..')
from evaluator import score_eval

marked_labels_list = data_valid.y.tolist() # 所有的标注结果
valid_data_size = data_valid.y.shape[0]
def valid_epoch():
    """Testing or valid."""
    data_valid._index_in_epoch = 0  # 先指向第一个值
    _batch_size = 2000
    fetches = [cost, y_pred]   
    batch_num = int(valid_data_size / _batch_size)
    start_time = time.time()
    _costs = 0.0
    predict_labels_list = list()  # 所有的预测结果
    for i in xrange(batch_num):
        X_batch, y_batch = data_valid.next_batch(_batch_size)
        X1_batch = X_batch[:, :n_step1]
        X2_batch = X_batch[:, n_step1:]
        y_batch = to_categorical(y_batch)
        feed_dict = {X1_inputs:X1_batch, X2_inputs:X2_batch,  y_inputs:y_batch, lr:1e-5, batch_size:_batch_size, keep_prob:1.0}
        _cost, predict_labels = sess.run(fetches, feed_dict)
        _costs += _cost    
        predict_labels = map(lambda label: label.argsort()[-1:-6:-1], predict_labels) # 取最大的5个下标
        predict_labels_list.extend(predict_labels)
    predict_label_and_marked_label_list = zip(predict_labels_list, marked_labels_list)
    precision, recall, f1 = score_eval(predict_label_and_marked_label_list)
    mean_cost = _costs / batch_num
    return mean_cost, precision, recall, f1

## 训练模型

In [5]:
decay = 0.90
max_epoch = 5
max_max_epoch = 16  # 本例中，
tr_batch_size = 256 
_lr = 1e-4
tr_batch_num = int(data_train.y.shape[0] / tr_batch_size)  # 每个 epoch 中包含的 batch 数
global_step = 0   # 迭代的步数，2,700,000 / 256, 则每个 epoch 大概是 10000 步
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(max_to_keep=10)           # 最多保存的模型数量
for epoch in xrange(max_max_epoch):
    if epoch > max_epoch:
        _lr = _lr * decay
    print 'EPOCH %d， lr=%g， training ...' % (epoch+1, _lr)
    time0 = time.time()
    start_time = time.time()
    _costs = 0.0
    show_costs = 0.0
    for batch in xrange(tr_batch_num): 
        global_step += 1
        fetches = [merged, cost, train_op]
        X_batch, y_batch = data_train.next_batch(tr_batch_size)
        X1_batch = X_batch[:, :n_step1]
        X2_batch = X_batch[:, n_step1:]
        y_batch = to_categorical(y_batch)
        feed_dict = {X1_inputs:X1_batch, X2_inputs:X2_batch, y_inputs:y_batch, lr:_lr, batch_size:tr_batch_size, keep_prob:0.5}
        summary, _cost, _ = sess.run(fetches, feed_dict) # the cost is the mean cost of one batch
        if global_step % 100:
            train_writer.add_summary(summary, global_step)
        _costs += _cost
    # 每个 epoch 进行一次验证    
    valid_cost, precision, recall, f1 = valid_epoch()  # valid
    mean_cost = _costs / tr_batch_num
    print('training cost=%g; valid cost=%g; p=%g, r=%g, f1=%g; speed=%g s/epoch' % (
        mean_cost, valid_cost, precision, recall, f1, time.time()-time0) )
    if (epoch + 1) % 2 == 0:  # 每 2 个 epoch 保存一次模型
        save_path = saver.save(sess, model_save_path, global_step=(epoch+1))
        print('the save path is ', save_path)           

EPOCH 1， lr=0.0001， training ...
training cost=0.00958835; valid cost=0.00536478; p=0.873054, r=0.371612, f1=0.260662; speed=5646.19 s/epoch
EPOCH 2， lr=0.0001， training ...
training cost=0.00489762; valid cost=0.00459141; p=1.1958, r=0.487733, f1=0.346433; speed=5685.07 s/epoch
('the save path is ', '../ckpt/attention-bigru-title-content/bi-gru.ckpt-2')
EPOCH 3， lr=0.0001， training ...
training cost=0.00442403; valid cost=0.00437712; p=1.27545, r=0.517992, f1=0.368382; speed=5676.87 s/epoch
EPOCH 4， lr=0.0001， training ...
training cost=0.0042049; valid cost=0.00426587; p=1.31414, r=0.533722, f1=0.379565; speed=7131.54 s/epoch
('the save path is ', '../ckpt/attention-bigru-title-content/bi-gru.ckpt-4')
EPOCH 5， lr=0.0001， training ...
training cost=0.00406307; valid cost=0.00419162; p=1.33771, r=0.542876, f1=0.386162; speed=6999.3 s/epoch
EPOCH 6， lr=0.0001， training ...
training cost=0.00395787; valid cost=0.00417651; p=1.35181, r=0.549024, f1=0.390448; speed=5609.62 s/epoch
('the sa

KeyboardInterrupt: 

每个 epoch 2899952/256=11327 个batch

- Bi-LSTM 模型<br/>
**batch_size=256，迭代12个epoch，基本收敛。结果： 验证集 f1=0.38618； 提交结果 0.3873186**
- Bi-GRU 模型<br/>
**batch_size=256，迭代15个epoch。大概在 13 个 epoch 就收敛了。结果： 验证集 f1=0.390534； 提交结果 0.39198**
- attention-Bi-GRU 模型<br/>
**batch_size=256，迭代18个epoch。在16个epoch收敛。结果：验证集 f1=0.391734 ；提交结果 0.39310**
- attention-Bi-GRU-title-content 模型<br/>
**batch_size=256，迭代11个epoch收敛。结果，验证集 f1=0.396892；提交结果 0.399367 **


## 若没有收敛，继续迭代，改成 sgd 优化器

In [None]:
add_epoch = 8          # 继续迭代的 epoch 数
tr_batch_size = 256    # 把 batch_size 调小一些   
_lr = 5e-4
for epoch in xrange(max_max_epoch, max_max_epoch+add_epoch):
    _lr = _lr * decay
    print 'EPOCH %d， lr=%g， training ...' % (epoch+1, _lr)
    time0 = time.time()
    start_time = time.time()
    _costs = 0.0
    show_costs = 0.0
    for batch in xrange(tr_batch_num): 
        global_step += 1
        fetches = [merged, cost, sgd_train_op]
        X_batch, y_batch = data_train.next_batch(tr_batch_size)
        X1_batch = X_batch[:, :n_step1]
        X2_batch = X_batch[:, n_step1:]
        y_batch = to_categorical(y_batch)
        feed_dict = {X1_inputs:X1_batch, X2_inputs:X2_batch, y_inputs:y_batch, lr:_lr, batch_size:tr_batch_size, keep_prob:0.5}
        summary, _cost, _ = sess.run(fetches, feed_dict) # the cost is the mean cost of one batch
        if global_step % 100:
            train_writer.add_summary(summary, global_step)
        _costs += _cost
    # 每个 epoch 进行一次验证    
    valid_cost, precision, recall, f1 = valid_epoch()  # valid
    mean_cost = _costs / tr_batch_num
    print('training cost=%g; valid cost=%g; p=%g, r=%g, f1=%g; speed=%g s/epoch' % (
        mean_cost, valid_cost, precision, recall, f1, time.time()-time0) )
    if (epoch + 1) % 2 == 0:  # 每 2 个 epoch 保存一次模型
        save_path = saver.save(sess, model_save_path, global_step=(epoch+1))
        print('the save path is ', save_path)      

## 本地测试

In [5]:
sys.path.append('..')
from evaluator import score_eval

print('begin ...')
# X_valid = np.load('../data/X_valid.npy')
model_name = 'attention-bigru-title-content-256'
# 保存 本地测试的标注数据
marked_labels_list = np.load('../data/marked_labels_list.npy')
local_scores_path = '../local_scores/' + model_name + '.npy'

# 导入保存好的模型
saver = tf.train.Saver()
best_model_path = '../ckpt/attention-bigru-title-content/bi-gru.ckpt-12'  # 导入最优模型
saver.restore(sess, best_model_path)
print('Finished loading model.')

begin ...
INFO:tensorflow:Restoring parameters from ../ckpt/attention-bigru-title-content/bi-gru.ckpt-12
Finished loading model.


In [7]:
# 导入测试数据
def local_predict(scores_path=local_scores_path):
    """预测  valid 结果，并保存预测概率 到  scores.csv 文件中。"""
    print('local predicting ...')
    time0 = time.time()
    X_te = X_valid
    n_sample = X_te.shape[0]  # 测试样本数量
    _batch_size = 100
    fetches = [y_pred]   
    predict_labels_list = list()  # 所有的预测结果
    predict_scores = list()
    for i in xrange(0, n_sample, _batch_size):
        start = i
        end = start+_batch_size
        if end > n_sample:
            end = n_sample
            _batch_size = end - start
        X_batch = X_te[start:end]
        X1_batch = X_batch[:, :n_step1]
        X2_batch = X_batch[:, n_step1:]
        feed_dict = {X1_inputs:X1_batch, X2_inputs:X2_batch, lr:1e-5,
                     batch_size:_batch_size, keep_prob:1.0}
        predict_labels = sess.run(fetches, feed_dict)[0]
        predict_scores.append(predict_labels)
        predict_labels = map(lambda label: label.argsort()[-1:-6:-1], predict_labels) # 取最大的5个下标
        predict_labels_list.extend(predict_labels)
    predict_scores = np.asarray(predict_scores)
    predict_label_and_marked_label_list = zip(predict_labels_list, marked_labels_list)
    precision, recall, f1 = score_eval(predict_label_and_marked_label_list)
    print('local valid p=%g, r=%g, f1=%g; speed=%g s/epoch' % ( precision, recall, f1, time.time()-time0) )
    np.save(local_scores_path, predict_scores)
    print('Writed the scores into %s, time %g s' % (local_scores_path, time.time()-time0))
    
local_predict()

local predicting ...
local valid p=1.37269, r=0.558328, f1=0.396895; speed=170.177 s/epoch
Writed the scores into ../local_scores/attention-bigru-title-content-256.npy, time 170.583 s


## 对测试数据进行预测

In [3]:
# 导入保存好的模型
saver = tf.train.Saver()
best_model_path = model_save_path + '-' +str(11)
saver.restore(sess, best_model_path)

INFO:tensorflow:Restoring parameters from ../ckpt/attention-bigru-title-content/bi-gru.ckpt-12


In [16]:
# 导入测试数据
def predict():
    """预测测试集结果，并保存到  result.csv 文件中。"""
    print('Predicting ...')
    X1_te = np.load('../data/X_te_title.npy')
    X2_te = np.load('../data/X_te_content.npy')
    X_te = np.hstack([X1_te, X2_te])
    n_sample = X_te.shape[0]  # 测试样本数量
    _batch_size = 1000
    fetches = [y_pred]   
    predict_labels_list = list()  # 所有的预测结果
    predict_scores = list()
    for i in xrange(0, n_sample, _batch_size):
        start = i
        end = start+_batch_size
        if end > n_sample:
            end = n_sample
            _batch_size = end - start
        X_batch = X_te[start:end]
        X1_batch = X_batch[:, :n_step1]
        X2_batch = X_batch[:, n_step1:]
        feed_dict = {X1_inputs:X1_batch, X2_inputs:X2_batch, lr:1e-5, batch_size:_batch_size, keep_prob:1.0}
        predict_labels = sess.run(fetches, feed_dict)[0]
        predict_scores.append(predict_labels)
        predict_labels = map(lambda label: label.argsort()[-1:-6:-1], predict_labels) # 取最大的5个下标
        predict_labels_list.extend(predict_labels)
    predict_scores = np.asarray(predict_scores)
    return predict_labels_list, np.vstack(predict_scores)

def write_result(predict_labels_list, result_path):
    """把结果写到 result.csv 中"""
    eval_question = np.load('../data/eval_question.npy')
    with open('../data/sr_topic2id.pkl', 'rb') as inp:
        sr_topic2id = pickle.load(inp)
        sr_id2topic = pickle.load(inp)
    pred_labels = np.asarray(predict_labels_list).reshape([-1])
    pred_topics = sr_id2topic[pred_labels].values.reshape([-1, 5])   # 转为 topic
    df_result = pd.DataFrame({'question':eval_question, 'tid0': pred_topics[:,0], 'tid1':pred_topics[:, 1],
                         'tid2': pred_topics[:,2], 'tid3':pred_topics[:,3],'tid4': pred_topics[:,4]})
    df_result.to_csv(result_path, index=False, header=False)
    print('Finished writing the result')
    return df_result

In [17]:
# 预测
print('Predicting the result, writing into %s' % result_path)
%time predict_labels_list,predict_scores = predict()
df_result = write_result(predict_labels_list, result_path=result_path) 
print(len(df_result))  # 结果应该为 217360
print('Saving the predict_scores into %s' % predict_scores_path)
print(predict_scores.shape)
%time np.save(predict_scores_path, predict_scores)
print('Finished!')

Predicting the result, writing into ../result/attention-gru-title-content-256.csv
Predicting ...
CPU times: user 1min 33s, sys: 10.5 s, total: 1min 44s
Wall time: 1min 30s
Finished writing the result
217360
Saving the predict_scores into ../scores/attention-gru-title-content-256.npy
(217360, 1999)
CPU times: user 0 ns, sys: 844 ms, total: 844 ms
Wall time: 799 ms
Finished!


In [18]:
print(np.vstack(predict_scores).shape)
print(predict_scores[0].shape)
print(predict_scores[0])

(217360, 1999)
(1999,)
[ -6.42315722  -5.73911715  -7.2089467  ..., -11.32595444 -15.35256863
  -8.03899288]


In [10]:
df_result.head(5)

Unnamed: 0,question,tid0,tid1,tid2,tid3,tid4
0,6215603645409872328,-7506384235581390893,4610596224687453206,-6839713564940654454,2919247920214845195,-5932391056759866388
1,6649324930261961840,-240041917918953337,2858911571784840089,3418451812342379591,2382911985227044227,-7483543763655495143
2,-4251899610700378615,2919247920214845195,-7358589937244777363,2816249700493135244,-7270992690764838239,-3315241959305847628
3,6213817087034420233,-8655945395761165989,-4966205278807386328,5804619920623030604,7476760589625268543,-6655927395515165363
4,-8930652370334418373,3972493657017129406,-8963554618409314978,-1115593437686158905,6018641953300645757,-7790543634407640064


In [11]:
# 参考结果
df_result.head(5)

Unnamed: 0,question,tid0,tid1,tid2,tid3,tid4
0,6215603645409872328,4610596224687453206,-6839713564940654454,5818382893362053755,-7506384235581390893,-5932391056759866388
1,6649324930261961840,3418451812342379591,2858911571784840089,-240041917918953337,2382911985227044227,-7483543763655495143
2,-4251899610700378615,2919247920214845195,-7358589937244777363,2816249700493135244,-7270992690764838239,-3315241959305847628
3,6213817087034420233,5804619920623030604,-4966205278807386328,-8655945395761165989,244937959911721367,7476760589625268543
4,-8930652370334418373,3972493657017129406,-8963554618409314978,-1115593437686158905,-6925670792665757873,6018641953300645757


## 在全部预测正确的情况下，理论值为：f1=0.713933
precision=2.50273, recall=0.998873, f1=0.713933

In [26]:
# 假设全部正确，f1 值最高能到多少
def padding_label(labels):
    """把所有的label补齐到长度为 5"""
    label_len = len(labels)
    if label_len >= 5:
        return labels[:5]
    return np.hstack([labels, np.zeros(5-label_len, dtype=int) - 1])
    

marked_labels_list = data_valid.y.tolist() # 所有的标注结果
predict_labels_list = map(padding_label, marked_labels_list)
predict_label_and_marked_label_list = zip(predict_labels_list, marked_labels_list)
precision, recall, f1 = score_eval(predict_label_and_marked_label_list)
print '在全部预测正确的情况下，理论值为：'
print 'precision=%g, recall=%g, f1=%g' % (precision, recall, f1)

在全部预测正确的情况下，理论值为：
precision=2.50273, recall=0.998873, f1=0.713933
