In [2]:
import pandas as pd
import numpy as np
from collections import Counter
import tensorflow as tf
import math
from sklearn.metrics import roc_auc_score
import jieba
from tkinter import _flatten

In [3]:
train = pd.read_csv('../senta_data/new_train.tsv', sep='\t')
val = pd.read_csv('../senta_data/dev.tsv', sep='\t')
test = pd.read_csv('../senta_data/test.tsv', sep='\t')
output_folder = './run_text_rnn'
model_path = './model/'

# 1、构造词典

In [4]:
train['text_a'] = train['text_a'].apply(lambda x:x.split(' '))

In [5]:
train['text_a'] = train['text_a'].apply(lambda x:['，' if i.strip() == ',' else i.strip() for i in x])
train['text_a'] = train['text_a'].apply(lambda x:['。' if i.strip() == '.' else i.strip() for i in x])
train['text_a'] = train['text_a'].apply(lambda x:['！' if i.strip() == '!' else i.strip() for i in x])
train['text_a'] = train['text_a'].apply(lambda x:['？' if i.strip() == '?' else i.strip() for i in x])
train['text_a'] = train['text_a'].apply(lambda x:['：' if i.strip() == ':' else i.strip() for i in x])
train['text_a'] = train['text_a'].apply(lambda x:['；' if i.strip() == ';' else i.strip() for i in x])

In [6]:
sentences = [sentence for sentence in train.text_a.values]

In [8]:
tem = _flatten(sentences)

In [12]:
word_counter = Counter(tem)
word_counter['UNK'] = 10000000
# char_counter['PAD'] = 10000001

In [13]:
word_counter_list = sorted(word_counter.items(), key=lambda x:x[1], reverse=True)

# 2、构建id2word和word2id

In [14]:
id2word = {i: j[0] for i, j in enumerate(word_counter_list)}
word2id = {j[0]: i for i, j in enumerate(word_counter_list)}

# 3、语料转换成id向量

In [15]:
def generate_ids_features(sentence):
    ids_feature = [word2id.get(word,word2id['UNK']) for word in sentence]
    return ids_feature

# 4、添加位置特征

In [16]:
punctuation = {'，': 0, 
              '。': 1,
              '？': 2,
              '！': 3,
              '：': 4,
              '；': 5}

In [17]:
def generate_position_features(sentence):
    position_feature = []
    i = 5
    for word in sentence:
        i += 1
        if word not in punctuation:
            position_feature.append(i)
        if word in punctuation:
            position_feature.append(punctuation[word])
            i = 5
    return position_feature

In [18]:
def get_data(df):
    data = []
    for index, sentence in enumerate(df.text_a.values):
        ids_feature = generate_ids_features(sentence)
        position_feature = generate_position_features(sentence)
        label = df.label.values[index]
        data.append([ids_feature, position_feature, label])
    return data

In [19]:
train_data = get_data(train)
val_data = get_data(val)
test_data = get_data(test)

# 5、数据集封装

In [21]:
def get_default_params():
    return tf.contrib.training.HParams(
        char_embedding_size = 100,
        position_embedding_size = 20,
        num_timesteps = 100,
        num_fc_nodes = 32,
        batch_size = 128,
        clip_lstm_grads = 5.0,
        learning_rate = 0.0003,
        num_word_threshold = 5,
    )

hps = get_default_params()

In [23]:
class TextDataSet:
    def __init__(self, data, num_timesteps):
        self._data = data
        self._num_timesteps = num_timesteps
        # matrix
        self._inputs = []
        # vector
        self._outputs = []
        self._indicator = 0
        self._parse_data()
    
    def _parse_data(self):
        tf.logging.info('Loading data')
        for line in self._data:
            label, id_feature, position_feature = line[2], line[0], line[1]
            id_feature = id_feature[0: self._num_timesteps]
            position_feature = position_feature[0: self._num_timesteps]
            padding_num = self._num_timesteps - len(id_feature)
            id_feature = id_feature + [word2id['UNK'] for i in range(padding_num)]
            position_feature = position_feature + [word2id['UNK'] for i in range(padding_num)]
            self._inputs.append([id_feature, position_feature])
            self._outputs.append(label)
        self._inputs = np.asarray(self._inputs, dtype = np.int32)
        self._outputs = np.asarray(self._outputs, dtype = np.int32)
        self._random_shuffle()
    
    def _random_shuffle(self):
        p = np.random.permutation(len(self._inputs))
        self._inputs = self._inputs[p]
        self._outputs = self._outputs[p]
    
    def next_batch(self, batch_size):
        end_indicator = self._indicator + batch_size
        if end_indicator > len(self._inputs):
            self._random_shuffle()
            self._indicator = 0
            end_indicator = batch_size
        if end_indicator > len(self._inputs):
            print("batch_size: %d is too large" % batch_size)
        
        batch_inputs = self._inputs[self._indicator: end_indicator]
        batch_outputs = self._outputs[self._indicator: end_indicator]
        self._indicator = end_indicator
        return batch_inputs, batch_outputs
            
train_dataset = TextDataSet(
    train_data, hps.num_timesteps) 
val_dataset = TextDataSet(
    val_data, hps.num_timesteps) 
test_dataset = TextDataSet(
    test_data, hps.num_timesteps) 

INFO:tensorflow:Loading data
INFO:tensorflow:Loading data
INFO:tensorflow:Loading data


In [24]:
def conv_wrapper(inputs, dilation_rate, name, output_channel=128, kernel_size=(1,3), activation=tf.nn.relu):
    conv = tf.layers.conv2d(inputs=inputs,
                            filters=output_channel,
                            kernel_size=kernel_size,
                            dilation_rate=dilation_rate,
                            padding='same',
                            activation=activation,
                            name=name)
#     bn = tf.layers.batch_normalization(conv, training=is_training)
    return conv

In [25]:
def conv_weight(name, shape):
    conv_weight = tf.get_variable(name, shape=shape, initializer=tf.contrib.layers.xavier_initializer())
    return conv_weight

In [27]:
def create_model(hps, vocab_size, num_classes):
    num_timesteps = hps.num_timesteps
    
    batch_size = tf.placeholder(tf.int32, [], name='batch_size')
    char_inputs = tf.placeholder(tf.int32, (None, num_timesteps), name='char_inputs')
    position_inputs = tf.placeholder(tf.int32, (None, num_timesteps), name='position_inputs')
    outputs = tf.placeholder(tf.int32, (None, ), name='outputs')
    keep_prob = tf.placeholder(tf.float32, name = 'keep_prob')
    
    global_step = tf.Variable(
        tf.zeros([], tf.int64), name = 'global_step', trainable=False)
    
    with tf.variable_scope('embedding', reuse=tf.AUTO_REUSE):
        char_embedding = tf.get_variable('char_embedding', 
                                         shape=[vocab_size, hps.char_embedding_size], 
                                         initializer=tf.contrib.layers.xavier_initializer())
        position_embedding = tf.get_variable('position_embedding', 
                                         shape=[284, hps.position_embedding_size], 
                                         initializer=tf.contrib.layers.xavier_initializer())
        char_embedding_matrix = tf.nn.embedding_lookup(char_embedding, char_inputs)
        position_embedding_matrix = tf.nn.embedding_lookup(position_embedding, position_inputs)
        embed_inputs = tf.concat([char_embedding_matrix, position_embedding_matrix], axis=-1)
    
    model_inputs = tf.expand_dims(embed_inputs, 1)
    with tf.variable_scope("conv", reuse=tf.AUTO_REUSE):
        conv1 = conv_wrapper(model_inputs, 1, 'conv1')
        conv2_1 = conv_wrapper(conv1, 1, 'conv2_1')
        conv2_2 = conv_wrapper(conv2_1, 1, 'conv2_2')
        conv2_3 = conv_wrapper(conv2_2, 2, 'conv2_3')
        conv3_1 = conv_wrapper(conv2_3, 1, 'conv3_1')
        conv3_2 = conv_wrapper(conv3_1, 1, 'conv3_2')
        conv3_3 = conv_wrapper(conv3_2, 2, 'conv3_3')
#         conv4_1 = conv_wrapper(conv3_3, 1, 'conv4_1')
#         conv4_2 = conv_wrapper(conv4_1, 1, 'conv4_2')
#         conv4_3 = conv_wrapper(conv4_2, 2, 'conv4_3')
#     with tf.variable_scope('conv_weight', reuse=tf.AUTO_REUSE):
#         conv2_1_weight = conv_weight('conv2_1_weight', shape=conv2_1.get_shape()[1:])
#         conv2_2_weight = conv_weight('conv2_2_weight', shape=conv2_2.get_shape()[1:])
#         conv2_3_weight = conv_weight('conv2_3_weight', shape=conv2_3.get_shape()[1:])
#         conv3_1_weight = conv_weight('conv3_1_weight', shape=conv3_1.get_shape()[1:])
#         conv3_2_weight = conv_weight('conv3_2_weight', shape=conv3_2.get_shape()[1:])
#         conv3_3_weight = conv_weight('conv3_3_weight', shape=conv3_3.get_shape()[1:])
#         conv4_3_weight = conv_weight('conv4_3_weight', shape=conv4_3.get_shape()[1:])
#     conv2_1 = tf.multiply(conv2_1, conv2_1_weight)
#     conv2_2 = tf.multiply(conv2_2, conv2_2_weight)
#     conv2_3 = tf.multiply(conv2_3, conv2_3_weight)
#     conv3_1 = tf.multiply(conv3_1, conv3_1_weight)
#     conv3_2 = tf.multiply(conv3_2, conv3_2_weight)
#     conv3_3 = tf.multiply(conv3_3, conv3_3_weight)
#     conv4_3 = tf.multiply(conv4_3, conv4_3_weight)
#     last = tf.add(conv2_1, conv2_2)
#     last = tf.add(last, conv2_3)
#     last = tf.add(last, conv3_1)
#     last = tf.add(last, conv3_2)
#     last = tf.add(conv2_3, conv3_3)
#     last = tf.add(tmp, conv4_3)
    last = tf.concat([conv2_3, conv3_3], axis=3)
    with tf.variable_scope('flatten', reuse=tf.AUTO_REUSE):
        flatten = tf.layers.flatten(last)
    fc_init = tf.uniform_unit_scaling_initializer(factor=1.0)
    with tf.variable_scope('fc', initializer = fc_init, reuse=tf.AUTO_REUSE):
        fc1 = tf.layers.dense(flatten, 
                              hps.num_fc_nodes,
                              activation = tf.nn.relu,
                              name = 'fc1')
        fc1_dropout = tf.contrib.layers.dropout(fc1, keep_prob)
        logits = tf.layers.dense(fc1_dropout,
                                 num_classes,
                                 name = 'fc2')
    
    with tf.name_scope('metrics'):
        softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits = logits, labels = outputs)
        loss = tf.reduce_mean(softmax_loss)
#         tf.nn.l2_loss(conv2_3_weight) + \
#         tf.nn.l2_loss(conv3_3_weight)
#         tf.nn.l2_loss(conv2_3_weight) + \
#         tf.nn.l2_loss(conv3_1_weight) + \
#         tf.nn.l2_loss(conv3_2_weight) + \
#         tf.nn.l2_loss(conv3_3_weight)
        # [0, 1, 5, 4, 2] -> argmax: 2
        y_pred = tf.argmax(tf.nn.softmax(logits),
                           1, 
                           output_type = tf.int32)
        correct_pred = tf.equal(outputs, y_pred)
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
    
    with tf.variable_scope('train_op', reuse=tf.AUTO_REUSE):
        tvars = tf.trainable_variables()
        for var in tvars:
            tf.logging.info('variable name: %s' % (var.name))
        grads, _ = tf.clip_by_global_norm(
            tf.gradients(loss, tvars), hps.clip_lstm_grads)
        optimizer = tf.train.AdamOptimizer(hps.learning_rate)
        train_op = optimizer.apply_gradients(
            zip(grads, tvars), global_step = global_step)
    
    return ((char_inputs, position_inputs, outputs, keep_prob, batch_size),
            (loss, y_pred, accuracy),
            (train_op, global_step))

placeholders, metrics, others = create_model(hps, len(word2id), 2)

char_inputs, position_inputs, outputs, keep_prob, batch_size = placeholders
loss, y_pred, accuracy = metrics
train_op, global_step = others
saver = tf.train.Saver()

Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
INFO:tensorflow:variable name: embedding/char_embedding:0
INFO:tensorflow:variable name: embedding/position_embedding:0
INFO:tensorflow:variable name: conv/conv1/kernel:0
INFO:tensorflow:variable name: conv/conv1/bias:0
INFO:tensorflow:variable name: conv/conv2_1/kernel:0
INFO:tensorflow:variable name: conv/conv2_1/bias:0
INFO:tensorflow:variable name: conv/conv2_2/kernel:0
INFO:tensorflow:variable name: conv/conv2_2/bias:0
INFO:tensorflow:variable name: conv/conv2_3/kernel:0
INFO:tensorflow:variable name: conv/conv2_3/bias:0
INFO:tensorflow:variable name: conv/conv3_1/kernel:0
INFO:tensorflow:variable name: conv/conv3_1/bias:0
INFO:tensorflow:variable name: conv/conv3_2/kernel:0
INFO:tensorflow:variable name: conv/conv3_2/bias:0
INFO:tensorflow:variable name: conv/conv3_3/kernel:0
INFO:tensorflow:variable name: conv/conv3_3/bias:0
INFO:tensorflow:variable name:

In [28]:
init_op = tf.global_variables_initializer()
train_keep_prob_value = 0.8
test_keep_prob_value = 1.0

num_train_steps = 10000

with tf.Session() as sess:
    sess.run(init_op)
#     if os.path.exists('../model/checkpoint'):
# #         saver.restore(sess,FLAGS.model_dir)
#         saver.restore(sess,'../model')
#     else:
    val_data, val_labels = val_dataset.next_batch(1200)
    test_data, test_labels = test_dataset.next_batch(1200)
    for i in range(num_train_steps):
        batch_inputs, batch_labels = train_dataset.next_batch(
                hps.batch_size)
        outputs_val = sess.run([loss, accuracy, train_op, global_step, outputs, y_pred],
                                   feed_dict = {
                                       char_inputs: batch_inputs[:, 0],
                                       position_inputs: batch_inputs[:, 1],
                                       outputs: batch_labels,
                                       keep_prob: train_keep_prob_value,
                                       batch_size: hps.batch_size
                                   })
        loss_val, accuracy_val, _, global_step_val, outputs_val, y_pred_val = outputs_val
        train_auc = roc_auc_score(outputs_val, y_pred_val)
        if (global_step_val + 1) % 100 == 0:
            outputs_ = sess.run([loss, accuracy, global_step, outputs, y_pred],
                                   feed_dict = {
                                       char_inputs: val_data[:, 0],
                                       position_inputs: val_data[:, 1],
                                       outputs: val_labels,
                                       keep_prob: test_keep_prob_value,
                                       batch_size: len(val_data)
                                   })
            loss_, accuracy_, global_step_, outputs_, y_pred_ = outputs_
            val_auc = roc_auc_score(outputs_, y_pred_)
            tf.logging.info("[Val] Step: %5d, loss: %3.3f, accuracy: %3.3f, auc: %3.3f"
                                % (global_step_, loss_, accuracy_, val_auc))
            outputs_1 = sess.run([loss, accuracy, global_step, outputs, y_pred],
                                   feed_dict = {
                                       char_inputs: test_data[:, 0],
                                       position_inputs: test_data[:, 1],
                                       outputs: test_labels,
                                       keep_prob: test_keep_prob_value,
                                       batch_size: len(test_data)
                                   })
            loss_1, accuracy_1, global_step_1, outputs_1, y_pred_1 = outputs_1
            test_auc = roc_auc_score(outputs_1, y_pred_1)
            tf.logging.info("[Test] Step: %5d, loss: %3.3f, accuracy: %3.3f, auc: %3.3f"
                                % (global_step_1, loss_1, accuracy_1, test_auc))
            saver.save(sess,model_path)
        if global_step_val % 20 == 0:
            tf.logging.info("Step: %5d, loss: %3.3f, accuracy: %3.3f, auc: %3.3f"
                                % (global_step_val, loss_val, accuracy_val, train_auc))

UnknownError: Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node conv/conv1/Conv2D (defined at <ipython-input-24-740f3e0169b0>:8)  = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](train_op/gradients/conv/conv1/Conv2D_grad/Conv2DBackpropFilter-0-TransposeNHWCToNCHW-LayoutOptimizer, conv/conv1/kernel/read)]]
	 [[{{node metrics/Mean_1/_35}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_901_metrics/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'conv/conv1/Conv2D', defined at:
  File "D:\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "D:\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "D:\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "D:\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "D:\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "D:\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "D:\Anaconda3\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "D:\Anaconda3\lib\asyncio\base_events.py", line 1432, in _run_once
    handle._run()
  File "D:\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "D:\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "D:\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "D:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "D:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "D:\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "D:\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "D:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "D:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "D:\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "D:\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "D:\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "D:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "D:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "D:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "D:\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-27-26734c49a98c>", line 102, in <module>
    placeholders, metrics, others = create_model(hps, len(word2id), 2)
  File "<ipython-input-27-26734c49a98c>", line 26, in create_model
    conv1 = conv_wrapper(model_inputs, 1, 'conv1')
  File "<ipython-input-24-740f3e0169b0>", line 8, in conv_wrapper
    name=name)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\layers\convolutional.py", line 417, in conv2d
    return layer.apply(inputs)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 817, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\layers\base.py", line 374, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 757, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\keras\layers\convolutional.py", line 194, in call
    outputs = self._convolution_op(inputs, self.kernel)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 868, in __call__
    return self.conv_op(inp, filter)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 520, in __call__
    return self.call(inp, filter)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 204, in __call__
    name=self.name)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 957, in conv2d
    data_format=data_format, dilations=dilations, name=name)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
    op_def=op_def)
  File "D:\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

UnknownError (see above for traceback): Failed to get convolution algorithm. This is probably because cuDNN failed to initialize, so try looking to see if a warning log message was printed above.
	 [[node conv/conv1/Conv2D (defined at <ipython-input-24-740f3e0169b0>:8)  = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](train_op/gradients/conv/conv1/Conv2D_grad/Conv2DBackpropFilter-0-TransposeNHWCToNCHW-LayoutOptimizer, conv/conv1/kernel/read)]]
	 [[{{node metrics/Mean_1/_35}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_901_metrics/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
