In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from transformers import BertTokenizer, TFBertModel, BertConfig
from tool2 import *
from random import shuffle

import argparse
import os

# os.environ['CUDA_VISIBLE_DEVICES'] = '0'

tokenizer_path = r"/home/ning/bert_conf"
data_path = r'/home/ning/bert_code/chatbot_data'
bert_path = r'bert/home/yuwang/post_train/model_tf'
bert_path = r"/home/ning/"

In [2]:
class PostTrainedBert(layers.Layer):
    def __init__(self):
        super(PostTrainedBert, self).__init__()
        self.bert = TFBertModel.from_pretrained(os.path.join(bert_path,'tf_model.h5'),\
                                                    config = os.path.join(bert_path,'config.json'))
        self.tokenizer = BertTokenizer.from_pretrained(os.path.join(tokenizer_path,'vocab_idea.txt'))

    def call(self, inputs, token_type_ids=None):
        return self.bert(inputs, token_type_ids=token_type_ids)
    
class PersonaEmotionIDEAModel(keras.Model):
    def __init__(self, dropout_rate,mode):
        super(PersonaEmotionIDEAModel, self).__init__()
        self.bert = PostTrainedBert()
        self.classifier_part1 = keras.models.Sequential()
        self.classifier_part1.add(keras.Input(shape=(1536,)))
        self.classifier_part1.add(layers.Dense(384, activation='selu'))
        
        self.dropout = layers.Dropout(dropout_rate)
        
        self.classifier_part2 = keras.models.Sequential()
        self.classifier_part2.add(keras.Input(shape=(384,)))
        self.classifier_part2.add(layers.Dense(5, activation='softmax'))
        self.mode = mode
        
        
    def attn(self, sentence_embed, persona_embed):
        weight = tf.nn.softmax(tf.matmul(sentence_embed, tf.transpose(persona_embed)))
        weight = tf.transpose(tf.stack([weight[0]]*persona_embed.get_shape()[1]))
        result = weight * persona_embed
        return tf.reduce_sum(result, axis=0)
    
    def call(self, inputs, sep_positions, yps, yp_sep_place, pps, pp_sep_place, training=False):
        embeddings = self.bert(inputs, token_type_ids=None)[0]
        yp_embeddings = self.bert(yps, token_type_ids=None)[0]
        pp_embeddings = self.bert(pps, token_type_ids=None)[0]
        embedding = embeddings[0]
        yp_embedding = yp_embeddings[0]
        pp_embedding = pp_embeddings[0]
        sentence_embed = []
        attended_persona_embeds = []
        if self.mode == "emotion_classification":
            for i, sep_position in enumerate(sep_positions):
                # 改这里
                k = embedding[sep_position[0]: sep_position[1]]
                k1 = k[np.newaxis, :]
                k2 = k1[:,0,:]
                
                k3 = k2[0]
                sentence_embed.append(k3)
                
                
                if i%2 == 0:
                    attended_embed = self.attn(k2, pp_embedding)
                else:
                    attended_embed = self.attn(k2, yp_embedding)
                attended_persona_embeds.append(attended_embed)
                
            sentence_embed = tf.stack(sentence_embed, axis=0)
            attended_persona_embeds = tf.stack(attended_persona_embeds, axis=0)
            final_embed = tf.concat([sentence_embed, attended_persona_embeds], axis=1)                
            classified_result = self.classifier_part1(final_embed)
            classified_result = self.dropout(classified_result, training=training)
            classified_result = self.classifier_part2(classified_result)
            return classified_result                
        else:
            k = embedding[sep_positions[-1][0]: sep_positions[-1][1]]
            k = k[np.newaxis, :]
            k = self.pooling(k)[0]
            return k

In [3]:
epoch = 10
save_path = ''
def n_loss(result, label, class_frequency):
    LCE = tf.reduce_mean(-(label * tf.math.log(tf.clip_by_value(result, 1e-8, 1.0)) + (1 - label) * tf.math.log(tf.clip_by_value(1-result, 1e-8, 1.0))) * class_frequency) / len(
        class_frequency)
    return LCE
def train():

    save_path = './saved_model/'
    model = PersonaEmotionIDEAModel(0.1,"emotion_classification")

    
    tokenizer = model.bert.tokenizer
    label_seq = load_label_seq("./dialogue_data/dialogue_label_seq.txt")

    data, frequency, tr_step, tr_nums = persona_label_data("./dialogue_data/train_contexts.txt", "./dialogue_data/train_emotions.txt", "./dialogue_data/train_your_personas.txt", "./dialogue_data/train_partner_personas.txt", tokenizer, label_seq)
    eval_data, _, eval_step, _ = persona_label_data("./dialogue_data/test_contexts.txt", "./dialogue_data/test_emotions.txt", "./dialogue_data/test_your_personas.txt", "./dialogue_data/test_partner_personas.txt", tokenizer, label_seq)


    optimizer = keras.optimizers.Adam(2e-5, epsilon=1e-7, clipvalue=5)
    for i in range(epoch):
        total_loss = 0.
        print("Training for epoch {}".format(i))
        shuffle(data)
        train_true_num = 0.
        step = 0
        label_num = 0
        for d in data:
            x = tf.convert_to_tensor(np.array(d[0])[np.newaxis, :], dtype=np.int32)
            yp = tf.convert_to_tensor(np.array(d[2])[np.newaxis, :], dtype=np.int32)
            pp = tf.convert_to_tensor(np.array(d[4])[np.newaxis, :], dtype=np.int32)
            step += 1
            labels = []
            poss = []
            for y, s in d[1]:
                labels.append(y)
                poss.append(s)
            labels = tf.stack(labels, axis=0)
            with tf.GradientTape() as tape:
                predictions = model(x, poss, yp, d[3], pp, d[5], True)
                #loss_value = focal_loss(predictions, labels, 0.25, 2)
                loss_value = n_loss(predictions, labels, frequency)
            gradients = tape.gradient(loss_value, model.trainable_weights)
            optimizer.apply_gradients(zip(gradients, model.trainable_weights))
            a = tf.argmax(predictions[:, :-1], 1)
            b = tf.argmax(labels, 1)
            for q in range(b.get_shape()[0]):
                if b[q] != 4:
                    if a[q] == b[q]:
                        train_true_num += 1
                    label_num += 1
            total_loss += loss_value
            if step % 1000 == 0:
                print("epoch: {} step: {} training loss: {}".format(i, step, total_loss / step))
        print("Train acc for epoch {} is {}".format(i, train_true_num / label_num))
        true_num = 0.
        compare_result = []
        test_step = 0
        for d in eval_data:
            x = tf.convert_to_tensor(np.array(d[0])[np.newaxis, :], dtype=np.int32)
            yp = tf.convert_to_tensor(np.array(d[2])[np.newaxis, :], dtype=np.int32)
            pp = tf.convert_to_tensor(np.array(d[4])[np.newaxis, :], dtype=np.int32)
            labels = []
            poss = []
            for y, s in d[1]:
                labels.append(y)
                poss.append(s)
            labels = tf.stack(labels, axis=0)
            result = model(x, poss, yp, d[3], pp, d[5], False)
            a = tf.argmax(result[:, :-1], 1)
            b = tf.argmax(labels, 1)
            for q in range(b.get_shape()[0]):
                if b[q] != 4:
                    if a[q] == b[q]:
                        true_num += 1
                    test_step += 1
                    k = [a[q], b[q]]
                    compare_result.append(k)
        print("Eval acc for epoch {} is {}".format(i, true_num/test_step))
        micro_f1, f1s = cal_score(compare_result, label_seq)
        print("micro_f1: {}".format(micro_f1))
        print(label_seq)
        print("F1s: {}".format(f1s))
#         log_file.write("F1s: {}".format(f1s)+"\n")
        print("------------------------")
#         os.makedirs(save_path+"checkpoint_{}/".format(i))
#         model.save_weights(save_path+"checkpoint_{}/".format(i)+"checkpoint.h5")
#         model.save(save_path+"checkpoint_{}/".format(i)+"checkpoint.h5")


In [4]:
saved_path = './saved_model/'

def cal_score(results, label_seq):
    TPs = np.zeros([len(label_seq)-1])
    FPs = np.zeros([len(label_seq)-1])
    FNs = np.zeros([len(label_seq)-1])
    for b, a in results:
        '''a = tf.argmax(y)
        b = tf.argmax(result)'''
        if b == a:
            TPs[a] += 1
        else:
            FPs[a] += 1
            FNs[b] += 1
    '''TPs = TPs[0:-1]
    FPs = FPs[0:-1]
    FNs = FNs[0:-1]'''
    precisions = TPs/(TPs+FPs)
    recalls = TPs/(TPs+FNs)
    F1s = 2*precisions*recalls/(precisions+recalls)
    micro_p = np.sum(TPs)/np.sum(TPs+FPs)
    micro_r = np.sum(TPs)/np.sum(TPs+FNs)
    micro_f1 = 2*micro_p*micro_r/(micro_p+micro_r)
    return micro_f1, F1s

In [5]:


def cal_score(results, label_seq):
    TPs = np.zeros([len(label_seq)-1])
    FPs = np.zeros([len(label_seq)-1])
    FNs = np.zeros([len(label_seq)-1])
    for b, a in results:
        '''a = tf.argmax(y)
        b = tf.argmax(result)'''
        if b == a:
            TPs[a] += 1
        else:
            FPs[a] += 1
            FNs[b] += 1
    '''TPs = TPs[0:-1]
    FPs = FPs[0:-1]
    FNs = FNs[0:-1]'''
    precisions = TPs/(TPs+FPs)
    recalls = TPs/(TPs+FNs)
    F1s = 2*precisions*recalls/(precisions+recalls)
    micro_p = np.sum(TPs)/np.sum(TPs+FPs)
    micro_r = np.sum(TPs)/np.sum(TPs+FNs)
    micro_f1 = 2*micro_p*micro_r/(micro_p+micro_r)
    return micro_f1, F1s




In [None]:
train()

Some weights of the model checkpoint at /home/ning/tf_model.h5 were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at /home/ning/tf_model.h5.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.
Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated


Data num: 300
Max len: 446
4404
Data num: 50
Max len: 270
786
Training for epoch 0












Train acc for epoch 0 is 0.5455810716771051
Eval acc for epoch 0 is 0.5412371134020618
micro_f1: 0.5412371134020618
['n', 'j', 'sa', 'su', 'out-of-domain']
F1s: [0.5821727 0.5672043       nan       nan]
------------------------
Training for epoch 1
















Train acc for epoch 1 is 0.651356993736952
Eval acc for epoch 1 is 0.6301546391752577
micro_f1: 0.6301546391752577
['n', 'j', 'sa', 'su', 'out-of-domain']
F1s: [0.70588235 0.59823009 0.17777778        nan]
------------------------
Training for epoch 2












Train acc for epoch 2 is 0.7385757364880539
Eval acc for epoch 2 is 0.6262886597938144
micro_f1: 0.6262886597938144
['n', 'j', 'sa', 'su', 'out-of-domain']
F1s: [0.67352185 0.625      0.44444444        nan]
------------------------
Training for epoch 3












Train acc for epoch 3 is 0.8000463929482718
Eval acc for epoch 3 is 0.6237113402061856
micro_f1: 0.6237113402061856
['n', 'j', 'sa', 'su', 'out-of-domain']
F1s: [0.69575758 0.6039783  0.36477987 0.13333333]
------------------------
Training for epoch 4












Train acc for epoch 4 is 0.8603572257016934
Eval acc for epoch 4 is 0.6623711340206185
micro_f1: 0.6623711340206185
['n', 'j', 'sa', 'su', 'out-of-domain']
F1s: [0.72617612 0.64814815 0.44311377 0.125     ]
------------------------
Training for epoch 5












Train acc for epoch 5 is 0.9118533982834609
Eval acc for epoch 5 is 0.6765463917525774
micro_f1: 0.6765463917525774
['n', 'j', 'sa', 'su', 'out-of-domain']
F1s: [0.74829932 0.63969171 0.41176471 0.13333333]
------------------------
Training for epoch 6












Train acc for epoch 6 is 0.9315704012990026
Eval acc for epoch 6 is 0.6765463917525774
micro_f1: 0.6765463917525774
['n', 'j', 'sa', 'su', 'out-of-domain']
F1s: [0.71673254 0.68902439 0.42975207 0.125     ]
------------------------
Training for epoch 7














Train acc for epoch 7 is 0.9471120389700766
Eval acc for epoch 7 is 0.6688144329896907
micro_f1: 0.6688144329896907
['n', 'j', 'sa', 'su', 'out-of-domain']
F1s: [0.72078721 0.66437177 0.44604317 0.21052632]
------------------------
Training for epoch 8












Train acc for epoch 8 is 0.9589422407794015
Eval acc for epoch 8 is 0.6649484536082474
micro_f1: 0.6649484536082474
['n', 'j', 'sa', 'su', 'out-of-domain']
F1s: [0.72235872 0.65849387 0.41891892 0.31578947]
------------------------
Training for epoch 9
