# 试跑EmotionX

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from transformers import BertTokenizer, TFBertModel, BertConfig
from tools import *
from random import shuffle

import argparse
import os

# os.environ['CUDA_VISIBLE_DEVICES'] = '0'

tokenizer_path = r"/home/ning/bert_conf"
data_path = r'/home/ning/bert_code/chatbot_data'
bert_path = r'bert/home/yuwang/post_train/model_tf'
bert_path = r"/home/ning/"

预训练模型

In [2]:

class PostTrainedBert(layers.Layer):
    def __init__(self):
        super(PostTrainedBert, self).__init__()
        self.bert = TFBertModel.from_pretrained(os.path.join(bert_path,'tf_model.h5'),\
                                                    config = os.path.join(bert_path,'config.json'))
        self.tokenizer = BertTokenizer.from_pretrained(os.path.join(tokenizer_path,'vocab_idea.txt'))

    def call(self, inputs, token_type_ids=None):
        return self.bert(inputs, token_type_ids=token_type_ids)


In [3]:
class PersonaEmotionIDEAModel(keras.Model):
    def __init__(self, dropout_rate):
        super(PersonaEmotionIDEAModel, self).__init__()
        self.bert = PostTrainedBert()
        self.classifier_part1 = keras.models.Sequential()
        self.classifier_part1.add(keras.Input(shape=(1536,)))
        self.classifier_part1.add(layers.Dense(384, activation='selu'))
        
        self.dropout = layers.Dropout(dropout_rate)
        
        self.classifier_part2 = keras.models.Sequential()
        self.classifier_part2.add(keras.Input(shape=(384,)))
        self.classifier_part2.add(layers.Dense(5, activation='softmax'))
    
    def attn(self, sentence_embed, persona_embed):
        weight = tf.nn.softmax(tf.matmul(sentence_embed, tf.transpose(persona_embed)))
        weight = tf.transpose(tf.stack([weight[0]]*persona_embed.get_shape()[1]))
        result = weight * persona_embed
        return tf.reduce_sum(result, axis=0)
    
    def call(self, inputs, sep_positions, yps, yp_sep_place, pps, pp_sep_place, training=False):
        embeddings = self.bert(inputs, token_type_ids=None)[0]
        yp_embeddings = self.bert(yps, token_type_ids=None)[0]
        pp_embeddings = self.bert(pps, token_type_ids=None)[0]
        embedding = embeddings[0]
        yp_embedding = yp_embeddings[0]
        pp_embedding = pp_embeddings[0]
        sentence_embed = []
        attended_persona_embeds = []
        if self.mode == "emotion_classification":
            for i, sep_position in enumerate(sep_positions):
                # 改这里
                k = embedding[sep_position[0]: sep_position[1]]
                k1 = k[np.newaxis, :]
                k2 = k1[:,0,:]
                
                k3 = k2[0]
                sentence_embed.append(k3)
                
                
                if i%2 == 0:
                    attended_embed = self.attn(k2, pp_embedding)
                else:
                    attended_embed = self.attn(k2, yp_embedding)
                attended_persona_embeds.append(attended_embed)
                
            sentence_embed = tf.stack(sentence_embed, axis=0)
            attended_persona_embeds = tf.stack(attended_persona_embeds, axis=0)
            final_embed = tf.concat([sentence_embed, attended_persona_embeds], axis=1)                
            classified_result = self.classifier_part1(final_embed)
            classified_result = self.dropout(classified_result, training=training)
            classified_result = self.classifier_part2(classified_result)
            return classified_result                
        else:
            k = embedding[sep_positions[-1][0]: sep_positions[-1][1]]
            k = k[np.newaxis, :]
            k = self.pooling(k)[0]
            return k

# k: (12, 768)
# k2: (1, 768)
# k3: (768,)   

class PersonaEmotionXModel(keras.Model):
    def __init__(self, dropout_rate, pooling_way, mode):
        super(PersonaEmotionXModel, self).__init__()
        self.bert = PostTrainedBert()
        if pooling_way == "max":
            self.pooling = layers.GlobalMaxPooling1D()
        else:
            self.pooling = layers.GlobalAveragePooling1D()
        self.classifier_part1 = keras.models.Sequential()
        self.classifier_part1.add(keras.Input(shape=(1536,)))
        self.classifier_part1.add(layers.Dense(384, activation='selu'))
        self.dropout = layers.Dropout(dropout_rate)
        self.classifier_part2 = keras.models.Sequential()
        self.classifier_part2.add(keras.Input(shape=(384,)))
        self.classifier_part2.add(layers.Dense(5, activation='softmax'))
        self.mode = mode

    def attn(self, sentence_embed, persona_embed):
        weight = tf.nn.softmax(tf.matmul(sentence_embed, tf.transpose(persona_embed)))
        weight = tf.transpose(tf.stack([weight[0]]*persona_embed.get_shape()[1]))
        result = weight * persona_embed
        return tf.reduce_sum(result, axis=0)

    def call(self, inputs, sep_positions, yps, yp_sep_place, pps, pp_sep_place, training=False):
        #pos = self.get_pos(inputs, sep_positions)
        embeddings = self.bert(inputs, token_type_ids=None)[0]
        yp_embeddings = self.bert(yps, token_type_ids=None)[0]
        pp_embeddings = self.bert(pps, token_type_ids=None)[0]
        
        embedding = embeddings[0]
        yp_embedding = yp_embeddings[0]
        pp_embedding = pp_embeddings[0]
        sentence_embed = []
        attended_persona_embeds = []
        if self.mode == "emotion_classification":
            for i, sep_position in enumerate(sep_positions):
                k = embedding[sep_position[0]: sep_position[1]]
#                 print('k:',k.shape)
                k1 = k[np.newaxis, :]
                k2 = self.pooling(k1)
#                 print('k2:',k2.shape)
                k3 = k2[0]
#                 print('k3:',k3.shape)
                sentence_embed.append(k3)
                if i%2 == 0:
                    attended_embed = self.attn(k2, pp_embedding)
                else:
                    attended_embed = self.attn(k2, yp_embedding)
                attended_persona_embeds.append(attended_embed)
            sentence_embed = tf.stack(sentence_embed, axis=0)
            attended_persona_embeds = tf.stack(attended_persona_embeds, axis=0)
            final_embed = tf.concat([sentence_embed, attended_persona_embeds], axis=1)

            # sentence_embed = sentence_embed[np.newaxis, :]
            # pooled_embed = self.pooling(sentence_embed)
            classified_result = self.classifier_part1(final_embed)
            classified_result = self.dropout(classified_result, training=training)
            classified_result = self.classifier_part2(classified_result)
            return classified_result
        else:
            k = embedding[sep_positions[-1][0]: sep_positions[-1][1]]
            k = k[np.newaxis, :]
            k = self.pooling(k)[0]
            return k

In [4]:
epoch = 1
save_path = ''
def n_loss(result, label, class_frequency):
    LCE = tf.reduce_mean(-(label * tf.math.log(tf.clip_by_value(result, 1e-8, 1.0)) + (1 - label) * tf.math.log(tf.clip_by_value(1-result, 1e-8, 1.0))) * class_frequency) / len(
        class_frequency)
    return LCE
def train():
#     log_file = open("log.txt", "w+", encoding="utf8")
    #loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=False)
    #bert = PostTrainedBert(args.load_post_trained_bert, args.post_trained_bert_file)
    
#     model = PersonaEmotionXModel(0.1, 'max', "emotion_classification")
    model = PersonaEmotionIDEAModel(0.1, 'max', "emotion_classification")

    
    tokenizer = model.bert.tokenizer
    label_seq = load_label_seq("./dialogue_data/dialogue_label_seq.txt")

    data, frequency, tr_step, tr_nums = persona_label_data("./dialogue_data/train_contexts.txt", "./dialogue_data/train_emotions.txt", "./dialogue_data/train_your_personas.txt", "./dialogue_data/train_partner_personas.txt", tokenizer, label_seq)
    eval_data, _, eval_step, _ = persona_label_data("./dialogue_data/test_contexts.txt", "./dialogue_data/test_emotions.txt", "./dialogue_data/test_your_personas.txt", "./dialogue_data/test_partner_personas.txt", tokenizer, label_seq)


    optimizer = keras.optimizers.Adam(2e-5, epsilon=1e-7, clipvalue=5)
    for i in range(epoch):
        total_loss = 0.
        print("Training for epoch {}".format(i))
        shuffle(data)
        train_true_num = 0.
        step = 0
        label_num = 0
        for d in data:
            x = tf.convert_to_tensor(np.array(d[0])[np.newaxis, :], dtype=np.int32)
            yp = tf.convert_to_tensor(np.array(d[2])[np.newaxis, :], dtype=np.int32)
            pp = tf.convert_to_tensor(np.array(d[4])[np.newaxis, :], dtype=np.int32)
            step += 1
            labels = []
            poss = []
            for y, s in d[1]:
                labels.append(y)
                poss.append(s)
            labels = tf.stack(labels, axis=0)
            with tf.GradientTape() as tape:
                predictions = model(x, poss, yp, d[3], pp, d[5], True)
                #loss_value = focal_loss(predictions, labels, 0.25, 2)
                loss_value = n_loss(predictions, labels, frequency)
            gradients = tape.gradient(loss_value, model.trainable_weights)
            optimizer.apply_gradients(zip(gradients, model.trainable_weights))
            a = tf.argmax(predictions[:, :-1], 1)
            b = tf.argmax(labels, 1)
            for q in range(b.get_shape()[0]):
                if b[q] != 4:
                    if a[q] == b[q]:
                        train_true_num += 1
                    label_num += 1
            total_loss += loss_value
            if step % 1000 == 0:
                print("epoch: {} step: {} training loss: {}".format(i, step, total_loss / step))
        print("Train acc for epoch {} is {}".format(i, train_true_num / label_num))
        true_num = 0.
        compare_result = []
        test_step = 0
        for d in eval_data:
            x = tf.convert_to_tensor(np.array(d[0])[np.newaxis, :], dtype=np.int32)
            yp = tf.convert_to_tensor(np.array(d[2])[np.newaxis, :], dtype=np.int32)
            pp = tf.convert_to_tensor(np.array(d[4])[np.newaxis, :], dtype=np.int32)
            labels = []
            poss = []
            for y, s in d[1]:
                labels.append(y)
                poss.append(s)
            labels = tf.stack(labels, axis=0)
            result = model(x, poss, yp, d[3], pp, d[5], False)
            a = tf.argmax(result[:, :-1], 1)
            b = tf.argmax(labels, 1)
            for q in range(b.get_shape()[0]):
                if b[q] != 4:
                    if a[q] == b[q]:
                        true_num += 1
                    test_step += 1
                    k = [a[q], b[q]]
                    compare_result.append(k)
        print("Eval acc for epoch {} is {}".format(i, true_num/test_step))
        micro_f1, f1s = cal_score(compare_result, label_seq)
        print("micro_f1: {}".format(micro_f1))
        print(label_seq)
        print("F1s: {}".format(f1s))
#         log_file.write("F1s: {}".format(f1s)+"\n")
        print("------------------------")
        os.makedirs(save_path+"checkpoint_{}/".format(i))
        model.save_weights(save_path+"checkpoint_{}/".format(i)+"checkpoint.h5")


In [5]:
save_path = ''

def cal_score(results, label_seq):
    TPs = np.zeros([len(label_seq)-1])
    FPs = np.zeros([len(label_seq)-1])
    FNs = np.zeros([len(label_seq)-1])
    for b, a in results:
        '''a = tf.argmax(y)
        b = tf.argmax(result)'''
        if b == a:
            TPs[a] += 1
        else:
            FPs[a] += 1
            FNs[b] += 1
    '''TPs = TPs[0:-1]
    FPs = FPs[0:-1]
    FNs = FNs[0:-1]'''
    precisions = TPs/(TPs+FPs)
    recalls = TPs/(TPs+FNs)
    F1s = 2*precisions*recalls/(precisions+recalls)
    micro_p = np.sum(TPs)/np.sum(TPs+FPs)
    micro_r = np.sum(TPs)/np.sum(TPs+FNs)
    micro_f1 = 2*micro_p*micro_r/(micro_p+micro_r)
    return micro_f1, F1s

class PosEmotionXModel(keras.Model):
    def __init__(self, dropout_rate):
        super(PosEmotionXModel, self).__init__()
        self.bert = PostTrainedBert()
        if pooling_way == "max":
            self.pooling = layers.GlobalMaxPooling1D()
        else:
            self.pooling = layers.GlobalAveragePooling1D()
        self.classifier_part1 = keras.models.Sequential()
        self.classifier_part1.add(keras.Input(shape=(768,)))
        self.classifier_part1.add(layers.Dense(384, activation='selu'))
        self.dropout = layers.Dropout(dropout_rate)
        self.classifier_part2 = keras.models.Sequential()
        self.classifier_part2.add(keras.Input(shape=(384,)))
        self.classifier_part2.add(layers.Dense(5, activation='softmax'))
        self.mode = mode

    def get_pos(self, inputs, sep_positions):
        pos = [0]*(sep_positions[0]+1)+[1]*(sep_positions[1]-sep_positions[0]-1)+[0]*(int(inputs.get_shape()[1])-sep_positions[1])
        pos = tf.convert_to_tensor(np.array(pos)[np.newaxis, :], dtype=np.int32)
        return pos

    def call(self, inputs, sep_positions, training=False):
        #pos = self.get_pos(inputs, sep_positions)
        embeddings = self.bert(inputs, token_type_ids=None)[0]
        embedding = embeddings[0]
        sentence_embed = []
        if self.mode == "emotion_classification":
            for sep_position in sep_positions:
                k = embedding[sep_position[0]: sep_position[1]]
                k = k[np.newaxis, :]
                k = self.pooling(k)[0]
                sentence_embed.append(k)
            sentence_embed = tf.stack(sentence_embed, axis=0)
            # sentence_embed = sentence_embed[np.newaxis, :]
            # pooled_embed = self.pooling(sentence_embed)
            classified_result = self.classifier_part1(sentence_embed)
            classified_result = self.dropout(classified_result, training=training)
            classified_result = self.classifier_part2(classified_result)
            return classified_result
        else:
            k = embedding[sep_positions[-1][0]: sep_positions[-1][1]]
            k = k[np.newaxis, :]
            k = self.pooling(k)[0]
            return k




class PosEmotionXModel(keras.Model):
    def __init__(self, dropout_rate, pooling_way, mode):
        super(PosEmotionXModel, self).__init__()
        self.bert = PostTrainedBert()
        if pooling_way == "max":
            self.pooling = layers.GlobalMaxPooling1D()
        else:
            self.pooling = layers.GlobalAveragePooling1D()
        self.classifier_part1 = keras.models.Sequential()
        self.classifier_part1.add(keras.Input(shape=(768,)))
        self.classifier_part1.add(layers.Dense(384, activation='selu'))
        self.dropout = layers.Dropout(dropout_rate)
        self.classifier_part2 = keras.models.Sequential()
        self.classifier_part2.add(keras.Input(shape=(384,)))
        self.classifier_part2.add(layers.Dense(5, activation='softmax'))
        self.mode = mode

    def get_pos(self, inputs, sep_positions):
        pos = [0]*(sep_positions[0]+1)+[1]*(sep_positions[1]-sep_positions[0]-1)+[0]*(int(inputs.get_shape()[1])-sep_positions[1])
        pos = tf.convert_to_tensor(np.array(pos)[np.newaxis, :], dtype=np.int32)
        return pos

    def call(self, inputs, sep_positions, training=False):
        #pos = self.get_pos(inputs, sep_positions)
        embeddings = self.bert(inputs, token_type_ids=None)[0]
        embedding = embeddings[0]
        sentence_embed = []
        if self.mode == "emotion_classification":
            for sep_position in sep_positions:
                k = embedding[sep_position[0]: sep_position[1]]
                k = k[np.newaxis, :]
                k = self.pooling(k)[0]
                sentence_embed.append(k)
            sentence_embed = tf.stack(sentence_embed, axis=0)
            # sentence_embed = sentence_embed[np.newaxis, :]
            # pooled_embed = self.pooling(sentence_embed)
            classified_result = self.classifier_part1(sentence_embed)
            classified_result = self.dropout(classified_result, training=training)
            classified_result = self.classifier_part2(classified_result)
            return classified_result
        else:
            k = embedding[sep_positions[-1][0]: sep_positions[-1][1]]
            k = k[np.newaxis, :]
            k = self.pooling(k)[0]
            return k
        

def test():
    #bert = PostTrainedBert(args.load_post_trained_bert, args.post_trained_bert_file)
    model = PosEmotionXModel(0.1, 'max', "emotion_classification")
    model.load_weights("/home/ning/bert_code/checkpoint_0")
    tokenizer = model.bert.tokenizer
    label_seq = load_label_seq("./dialogue_data/dialogue_label_seq.txt")
    test_data, _, _, _ = load_label_data("./dialogue_data/test_contexts.txt", "./dialogue_data/test_emotions.txt", tokenizer, label_seq)
    true_num = 0.
    compare_result = []
    test_step = 0
    for d in test_data:
        x = tf.convert_to_tensor(np.array(d[0])[np.newaxis, :], dtype=np.int32)
        labels = []
        poss = []
        for y, s in d[1]:
            labels.append(y)
            poss.append(s)
        labels = tf.stack(labels, axis=0)
        result = model(x, poss, True)
        a = tf.argmax(result[:, :-1], 1)
        b = tf.argmax(labels, 1)
        for q in range(b.get_shape()[0]):
            if b[q] != 4:
                if a[q] == b[q]:
                    true_num += 1
                test_step += 1
                k = [a[q], b[q]]
                compare_result.append(k)
    print("Test acc is {}".format(true_num / test_step))
    micro_f1, f1s = cal_score(compare_result, label_seq)
    print("micro_f1: {}".format(micro_f1))
    print(label_seq)
    print("F1s: {}".format(f1s))
    print("------------------------")


In [6]:
train()

TypeError: __init__() takes 2 positional arguments but 4 were given

In [6]:
test()

ResourceExhaustedError: OOM when allocating tensor with shape[30522,768] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:TruncatedNormal]