In [1]:
import tensorflow as tf
from tensorflow import keras

from transformers import TFBertModel,BertTokenizer,TFBertForSequenceClassification
import os
import json

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
emotionpush_data_path = r'/home/ning/dataset'
bert_path = r"/home/ning/bert_conf"
cache_dir=r'/home/ning/bert_conf/bert-base-uncased-cache'

ckpt_path = r'/home/ning/models.ckpt'

train_df = pd.read_pickle(os.path.join(emotionpush_data_path,'emotionpush_train.pkl'))
test_df = pd.read_pickle(os.path.join(emotionpush_data_path,'emotionpush_test.pkl'))

train_df = train_df[train_df['emotion'].isin([ 'neutral', 'joy', 'sadness', 'anger'])]
test_df = test_df[test_df['emotion'].isin([ 'neutral', 'joy', 'sadness', 'anger'])]

tokenizer = BertTokenizer.from_pretrained(os.path.join(bert_path,'vocab_idea.txt'))
# model = TFBertForSequenceClassification.from_pretrained(os.path.join(bert_path,'tf_model.h5'),config = os.path.join(bert_path,'config.json'),num_labels=4)
# model = TFBertModel.from_pretrained(os.path.join(bert_path,'tf_model.h5'),config = os.path.join(bert_path,'config.json'))


Calling BertTokenizer.from_pretrained() with the path to a single file or url is deprecated


In [3]:
def encode_sentence_with_speaker(speaker,utterance,tokenizer,sos):
    if sos:
        tokens = ['[CLS]']
    else:
        tokens = []

    if utterance in [';)',':)','<3',':(','LMAO','LOL','OMG','','lol','lmao','omg']:
        return tokenizer.encode([utterance.upper()])
        
    
    if speaker not in ['other','None']:
        spk_token = '['+speaker+']'
        tokens.append(spk_token)
        tokens.append('[says]')
        tokens.extend(list(tokenizer.tokenize(utterance)))
        tokens.append('[SEP]')
    
    else:
        tokens.extend(list(tokenizer.tokenize(utterance)))
    
    return tokenizer.convert_tokens_to_ids(tokens)
    
    


def bert_encode(dataframe, tokenizer,single=False):
    
    if not single:
        num_examples = len(dataframe.index)
        sentence1 = tf.ragged.constant([encode_sentence_with_speaker(s[0],s[1],tokenizer,True) for s in dataframe.values])

        sentence2 = tf.ragged.constant([encode_sentence_with_speaker(s[2],s[3],tokenizer,False) for s in dataframe.values])

        input_word_ids = tf.concat([sentence1, sentence2], axis=-1)

        input_mask = tf.ones_like(input_word_ids).to_tensor()

        type_s1 = tf.zeros_like(sentence1)
        type_s2 = tf.ones_like(sentence2)
        input_type_ids = tf.concat([type_s1, type_s2], axis=-1).to_tensor()


        inputs = {
            'input_ids': input_word_ids.to_tensor(),
            'attention_mask': input_mask,
            'token_type_ids': input_type_ids}
    
    else:
        num_examples = len(dataframe.index)
        input_word_ids = tf.ragged.constant([encode_sentence_with_speaker(s[0],s[1],tokenizer,True) for s in dataframe.values])
        
        input_mask = tf.ones_like(input_word_ids).to_tensor()
        
        input_type_ids = tf.zeros_like(input_word_ids).to_tensor()
        
        inputs = {
            'input_ids': input_word_ids.to_tensor(),
            'attention_mask': input_mask,
            'token_type_ids': input_type_ids}
        
        
    return inputs

In [4]:
train_target = train_df['emotion']
test_target = test_df['emotion']


train_features = bert_encode(train_df, tokenizer,single = False)
test_features = bert_encode(test_df, tokenizer,single = False)

ems = train_target.unique()
def convert(emotion):
    return np.where(ems == emotion)[0][0]

train_labels = np.array(list(map(convert,train_target)))
test_labels = np.array(list(map(convert,test_target)))

In [5]:
train_f_batch = {'input_ids': train_features['input_ids'][0:5],
        'attention_mask': train_features['attention_mask'][0:5],
        'token_type_ids': train_features['token_type_ids'][0:5]}
train_l_batch = train_labels[0:5]

In [22]:
class PretrainedBert(keras.layers.Layer):
    def __init__(self,bert_file,bert_config):
        super(PretrainedBert,self).__init__()
        self.bert = TFBertModel.from_pretrained(bert_file,config = bert_config)
        
    def call(self,inputs):
        return self.bert(inputs)

class ChatEmotion(keras.Model):
    def __init__(self,bert_file,bert_config):
        super(ChatEmotion, self).__init__()
        self.bert = PretrainedBert(bert_file,bert_config)        
        
        self.dense = keras.layers.Dense(4, activation='softmax')
    
    def call(self,inputs,embedding = True):
        cls_embeddings = self.bert(inputs)[0][:,0,:]
        
        if embedding:
            return cls_embeddings
        else:
            cls = self.dense(cls_embeddings)
            return cls

In [23]:
model = ChatEmotion(os.path.join(bert_path,'tf_model.h5'),bert_config = os.path.join(bert_path,'config.json'))

Some weights of the model checkpoint at /home/ning/bert_conf/tf_model.h5 were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at /home/ning/bert_conf/tf_model.h5.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [44]:
model(train_f_batch,0)[1]

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.32542852, 0.16106938, 0.3464705 , 0.1670316 ], dtype=float32)>

In [55]:
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam(learning_rate=2.5e-7)
with tf.GradientTape() as tape:
    prediction = model(train_f_batch,False)
    print(prediction)
    pred_label = np.argmax(prediction.numpy(),axis=1)
    print(pred_label)
    loss_value  = loss_object(y_true=train_l_batch, y_pred=prediction)
    print(loss_value)

grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_variables))

tf.Tensor(
[[0.31598407 0.34923843 0.23809561 0.09668195]
 [0.41697124 0.14555547 0.27276433 0.16470896]
 [0.287455   0.18570375 0.12795821 0.398883  ]
 [0.26985553 0.1652068  0.2148574  0.35008034]
 [0.47818822 0.18381248 0.10415643 0.2338429 ]], shape=(5, 4), dtype=float32)
[1 0 3 3 0]
tf.Tensor(1.2882814, shape=(), dtype=float32)


<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [5]:


class ChatEmotion(keras.Model):
    def __init__(self,bert_file,bert_config):
        super(ChatEmotion, self).__init__()
        self.bert = TFBertModel.from_pretrained(bert_file,config = bert_config)        
        self.dense = keras.layers.Dense(4, activation='softmax')
    
    def call(self,inputs,embedding = True):
        cls_embeddings = self.bert(inputs)[0][:,0,:]
        
        if embedding:
            return cls_embeddings
        else:
            cls = self.dense(cls_embeddings)
            return cls

In [6]:
model = ChatEmotion(os.path.join(bert_path,'tf_model.h5'),bert_config = os.path.join(bert_path,'config.json'))

Some weights of the model checkpoint at /home/ning/bert_conf/tf_model.h5 were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the model checkpoint at /home/ning/bert_conf/tf_model.h5.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFBertModel for predictions without further training.


In [28]:
# for _ in range(5):
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam(learning_rate=2.5e-7)
with tf.GradientTape() as tape:
    prediction = model(train_f_batch,False)
    print(prediction)
    pred_label = np.argmax(prediction.numpy(),axis=1)
    print(pred_label)
    loss_value  = loss_object(y_true=train_l_batch, y_pred=prediction)
    print(loss_value)
    print(train_l_batch)

grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_variables))

tf.Tensor(
[[0.16600533 0.11135951 0.24714075 0.4754944 ]
 [0.0879107  0.15858485 0.13441016 0.61909425]
 [0.1681401  0.11430804 0.22191021 0.49564165]
 [0.14224131 0.09006726 0.16430868 0.6033827 ]
 [0.09710054 0.13526304 0.1330557  0.63458073]], shape=(5, 4), dtype=float32)
[3 3 3 3 3]
tf.Tensor(1.5230981, shape=(), dtype=float32)
[0 0 0 0 0]


<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [None]:
class PretrainedBert(keras.layers.Layer):
    def __init__(self,bert_file,bert_config):
        super(PretrainedBert,self).__init__()
        self.bert = TFBertModel.from_pretrained(bert_file,config = bert_config)
        
    def call(self,inputs):
        return self.bert(inputs)

class ChatEmotion(keras.Model):
    def __init__(self,bert_file,bert_config):
        super(ChatEmotion, self).__init__()
        self.bert = PretrainedBert(bert_file,bert_config)        
        
        self.dense = keras.layers.Dense(4, activation='softmax')
    
    def call(self,inputs,embedding = True):
        cls_embeddings = self.bert(inputs)[0][:,0,:]
        
        if embedding:
            return cls_embeddings
        else:
            cls = self.dense(cls_embeddings)
            return cls


class ChatEmotion(keras.Model):
    def __init__(self,bert_file,bert_config):
        super(ChatEmotion, self).__init__()
        self.bert = TFBertModel.from_pretrained(bert_file,config = bert_config)        
        self.dense = keras.layers.Dense(4, activation='softmax')
    
    def call(self,inputs,embedding = True):
        cls_embeddings = self.bert(inputs)[0][:,0,:]
        
        if embedding:
            return cls_embeddings
        else:
            cls = self.dense(cls_embeddings)
            return cls

In [5]:
cls = TFBertForSequenceClassification.from_pretrained(os.path.join(bert_path,'tf_model.h5'),config = os.path.join(bert_path,'config.json'),num_labels=4)

Some weights of the model checkpoint at /home/ning/bert_conf/tf_model.h5 were not used when initializing TFBertForSequenceClassification: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of TFBertForSequenceClassification were not initialized from the model checkpoint at /home/ning/bert_conf/tf_model.h5 and are newly initialized: ['dropout_37', 'classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
cls.layers[0](train_f_batch)[0][:,0,:]

<tf.Tensor: shape=(5, 768), dtype=float32, numpy=
array([[-0.23915076, -0.25951898,  0.03390481, ..., -0.11725874,
         0.21749967,  0.6405008 ],
       [-0.4463973 , -0.03729638,  0.35437933, ..., -0.33169132,
         0.2951671 ,  0.41419148],
       [-0.03577711, -0.19770059,  0.51548743, ..., -0.5142569 ,
         0.17428611,  0.31457123],
       [-0.02149344, -0.40951985,  0.4850874 , ..., -0.5744982 ,
        -0.05005437,  0.6261888 ],
       [ 0.31917137, -0.03037216,  0.8768659 , ..., -0.03596294,
        -0.06128835, -0.20560619]], dtype=float32)>

In [34]:
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam(learning_rate=2.5e-7)
with tf.GradientTape() as tape:
    prediction = cls.layers[0](train_f_batch)[0][:,0,:]
    pred_label = np.argmax(prediction.numpy(),axis=1)
    loss_value  = loss_object(y_true=train_l_batch, y_pred=prediction)
    print(loss_value)
    print(train_l_batch)

grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_variables))

<transformers.modeling_tf_bert.TFBertForSequenceClassification at 0x7f9f44112890>

In [None]:
cls.layers[0].get_weights()[0]

In [10]:
cls.layers[0](train_f_batch)[0][0,0,:5]

<tf.Tensor: shape=(5,), dtype=float32, numpy=
array([-0.23915076, -0.25951898,  0.03390481, -0.15077853, -0.57236046],
      dtype=float32)>

In [12]:
cls.layers

[<transformers.modeling_tf_bert.TFBertMainLayer at 0x7fca6d7518d0>,
 <tensorflow.python.keras.layers.core.Dropout at 0x7fca6b284d90>,
 <tensorflow.python.keras.layers.core.Dense at 0x7fcacd7ba8d0>]

# 直接套TFBertForSequenceClassification 

In [6]:
from transformers import TFBertPreTrainedModel

class ChatEmotion(keras.Model):
    def __init__(self, bert_file, config, num_labels):
        super(ChatEmotion,self).__init__()
        self.bert = TFBertForSequenceClassification.from_pretrained(os.path.join(bert_path,'tf_model.h5'),config = os.path.join(bert_path,'config.json'),num_labels=num_labels)
        self.bert_main_layer = self.bert.layers[0]
        
        
    def call(self,inputs,embedding = True):
    
        if embedding:
            return self.bert_main_layer(inputs)[1]
        else:
            return self.bert(inputs)

In [32]:
model = ChatEmotion(os.path.join(bert_path,'tf_model.h5'),config = os.path.join(bert_path,'config.json'),num_labels = 4)

Some weights of the model checkpoint at /home/ning/bert_conf/tf_model.h5 were not used when initializing TFBertForSequenceClassification: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of TFBertForSequenceClassification were not initialized from the model checkpoint at /home/ning/bert_conf/tf_model.h5 and are newly initialized: ['dropout_75', 'classifier']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
model(train_f_batch,1).numpy().shape

(5, 768)

In [40]:
loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = keras.optimizers.Adam(learning_rate=2.5e-5)
with tf.GradientTape() as tape:
    prediction = model(train_f_batch,0)[0]
    pred_label = np.argmax(prediction.numpy(),axis=1)
    loss_value  = loss_object(y_true=train_l_batch, y_pred=prediction)
    print(loss_value)
    print(train_l_batch)

grads = tape.gradient(loss_value, model.trainable_weights)
optimizer.apply_gradients(zip(grads, model.trainable_variables))

tf.Tensor(0.7025588, shape=(), dtype=float32)
[0 0 0 0 0]


<tf.Variable 'UnreadVariable' shape=() dtype=int64, numpy=1>

In [41]:
model.layers[1].get_weights()[1][0,:5]

array([ 0.01750542, -0.02568091, -0.0366416 , -0.02528609,  0.007971  ],
      dtype=float32)

In [49]:
model.layers[1].weights[0] is model.layers[0].layers[0].weights[0]

True

In [9]:
model.layers

[<transformers.modeling_tf_bert.TFBertForSequenceClassification at 0x7ff8fc2004f0>,
 <transformers.modeling_tf_bert.TFBertMainLayer at 0x7ff8fc20a190>]

In [48]:
model.layers[1].weights[0].shape

TensorShape([30522, 768])

# 继承TFBertPreTrainedModel

In [None]:
from transformers import TFBertPreTrainedModel

class BertQAModel(TFBertPreTrainedModel):
    
    DROPOUT_RATE = 0.1
    NUM_HIDDEN_STATES = 2
    
    def __init__(self, config, *inputs, **kwargs):
        super().__init__(config, *inputs, **kwargs)
        
        self.bert = TFBertMainLayer(config, name="bert")
        self.concat = L.Concatenate()
        self.dropout = L.Dropout(self.DROPOUT_RATE)
        self.qa_outputs = L.Dense(
            config.num_labels, 
            kernel_initializer=TruncatedNormal(stddev=config.initializer_range),
            dtype='float32',
            name="qa_outputs")
        
    @tf.function
    def call(self, inputs, **kwargs):
        # outputs: Tuple[sequence, pooled, hidden_states]
        _, _, hidden_states = self.bert(inputs, **kwargs)
        
        hidden_states = self.concat([
            hidden_states[-i] for i in range(1, self.NUM_HIDDEN_STATES+1)
        ])
        
        hidden_states = self.dropout(hidden_states, training=kwargs.get("training", False))
        logits = self.qa_outputs(hidden_states)
        start_logits, end_logits = tf.split(logits, 2, axis=-1)
        start_logits = tf.squeeze(start_logits, axis=-1)
        end_logits = tf.squeeze(end_logits, axis=-1)
        
        return start_logits, end_logits

