In [1]:
# Purpose: The following is a program to 1) run the fine-tuned BERT transformer model to a new quarter of data and  
#          2) do analysis on how the predicted score from the fine-tuned BERT transformer model is correlated to ChatGPT sentiment label and customer satifaction score category
#          nps_rawgrade_category is promoters-neutrals-passives category created from customer satisfaction score, which is optional. 
#          It is included to allow for assocation analysis between model output sentiment and satisfaction store. 
#          sentiment is the sentiment label created from ChatGPT batch process, which is optional. 
#          It is included to allow for assocation analysis between model output sentiment and ChatGPT output sentiment.  
#          This is an example program used for my Casualty Actuarial Society 2023 Annual Meeting presentation   
# By:      Frank Zhang - Oct 2023


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.metrics import classification_report,confusion_matrix
import tensorflow as tf
from transformers import TFBertModel,  BertConfig, BertTokenizerFast
from tensorflow.keras.layers import Input, Dropout, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.utils import to_categorical
 
model_name = 'bert-base-uncased'

# Max length of tokens
max_length = 50

In [2]:
# Load transformers config and set output_hidden_states to False
config = BertConfig.from_pretrained(model_name)
config.output_hidden_states = False
# Load BERT tokenizer
tokenizer = BertTokenizerFast.from_pretrained(pretrained_model_name_or_path = model_name, config = config)
# Load the Transformers BERT model
transformer_bert_model = TFBertModel.from_pretrained(model_name, config = config)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

In [3]:
### ------- Build the model ------- ###
# Load the MainLayer
bert = transformer_bert_model.layers[0]

# Build your model input
input_ids = Input(shape=(max_length,), name='input_ids', dtype='int32')
inputs = {'input_ids': input_ids}

# Load the Transformers BERT model as a layer in a Keras model
bert_model = bert(inputs)[1]
dropout = Dropout(config.hidden_dropout_prob, name='pooled_output')
pooled_output = dropout(bert_model, training=False)


In [4]:
# Then build your model output
Sentiments = Dense(units=5, kernel_initializer=TruncatedNormal(stddev=config.initializer_range), name='Sentiment')(pooled_output)
outputs = {'Sentiment': Sentiments}

# And combine it all in a model object
model_bert_multiclass = Model(inputs=inputs, outputs=outputs, name='BERT_MultiClass')

# Take a look at the model
model_bert_multiclass.summary()

Model: "BERT_MultiClass"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_ids (InputLayer)      [(None, 45)]              0         
                                                                 
 bert (TFBertMainLayer)      TFBaseModelOutputWithPo   109482240 
                             olingAndCrossAttentions             
                             (last_hidden_state=(Non             
                             e, 45, 768),                        
                              pooler_output=(None, 7             
                             68),                                
                              past_key_values=None,              
                             hidden_states=None, att             
                             entions=None, cross_att             
                             entions=None)                       
                                                   

In [12]:
data_all = pd.read_csv('./df_comment_with_gpt_sentiment_2023q3.csv')
data_all["sentiment_gpt_clean"] = np.where(data_all.sentiment.apply(lambda x: x.lower().find("very negative"))>0,"1:Very Negative", \
                                                 np.where(data_all.sentiment.apply(lambda x: x.lower().find("negative"))>0,"2:Negative", \
                                                          np.where(data_all.sentiment.apply(lambda x: x.lower().find("very positive"))>0,"5:Very Postive", \
                                                                   np.where(data_all.sentiment.apply(lambda x: x.lower().find("positive"))>0,"4:Postive","3:Neutral"))))
data_all['Sentiment_num']=data_all['sentiment_gpt_clean'].str[:1].astype('int')-1

data_all.head()

Unnamed: 0.1,Unnamed: 0,repond_id,nps_rawgrade,comments,nps_rawgrade_category,sentiment,topic,sentiment_gpt_clean,Sentiment_num
0,0,203950,9.0,Great job all around.,Promoters,You have all worked hard and it shows.\n\nVer...,"We hit our sales targets, drove more traffic ...",5:Very Postive,4
1,1,203949,10.0,Let me know what was done with the girl that h...,Promoters,\n\nNeutral,\n\n1. Girl hitting someone\n2. What was done ...,3:Neutral,2
2,2,203945,10.0,Mail check directly to lender (Regions Mortgag...,Promoters,\n\nNeutral,\n\n1. Sending a check directly to lender \n2....,3:Neutral,2
3,3,203944,10.0,No need to improve - they did a good job. Than...,Promoters,\n\nPositive,\n\n1. Appreciation \n2. Positive feedback \n3...,4:Postive,3
4,4,203943,10.0,Overall we were very happy! But - I don't unde...,Promoters,\n\nNegative,\n\n1. Satisfaction with service\n2. Payment t...,2:Negative,1


In [13]:
y_all = to_categorical(data_all['Sentiment_num'])# Load BERT tokenizer

x_all = tokenizer(
          text=data_all['comments'].to_list(),
          add_special_tokens=True,
          max_length=max_length,
          truncation=True,
          padding=True, 
          return_tensors='tf',
          return_token_type_ids = False,
          return_attention_mask = True,
          verbose = True)
# Restore the weights
model_bert_multiclass.load_weights('./finedtuned_model/fine_tuned_bert_multiclass_claim_sentiment_fz')
y_all_predicted = model_bert_multiclass.predict(
    x={'input_ids': x_all['input_ids']},
)



In [14]:
y_all_predicted

{'Sentiment': array([[-1.358789  , -1.9501685 , -2.3008769 , -0.48266894,  5.777367  ],
        [-1.1841376 , -0.09151569,  5.1973743 , -0.713212  , -1.8988496 ],
        [-1.8998606 ,  0.23065065,  4.598942  ,  0.20221479, -1.9760107 ],
        ...,
        [-0.766754  ,  2.5558662 , -0.48745203,  1.7485728 , -2.9076397 ],
        [ 1.0083941 ,  4.325148  , -1.4121637 , -1.33708   , -2.2267375 ],
        [-0.8103707 , -1.5029817 , -2.1543064 , -0.960685  ,  5.4283257 ]],
       dtype=float32)}

In [15]:
from sklearn.metrics import classification_report

y_all_pred_max=[np.argmax(i) for i in y_all_predicted['Sentiment']]
y_all_actual_max=[np.argmax(i) for i in y_all]


report = classification_report(y_all_pred_max, y_all_actual_max)

print(report) # 74% of accuracy b/w actual vs predicted

              precision    recall  f1-score   support

           0       0.80      0.67      0.73         6
           1       0.78      0.67      0.72       189
           2       0.58      0.76      0.66       195
           3       0.41      0.47      0.44       170
           4       0.91      0.82      0.86       611

    accuracy                           0.74      1171
   macro avg       0.70      0.68      0.68      1171
weighted avg       0.76      0.74      0.74      1171



In [16]:

lkup = {0:'1:Very Negative', 1:'2:Negative', 2:'3:Neutral',3:'4:Positive', 4:'5:Very Positive'} 
data_all["Sentiment_transfomer_pred"]= [lkup[item] for item in y_all_pred_max] 
 
pd.crosstab(data_all["sentiment_gpt_clean"], data_all["Sentiment_transfomer_pred"])

Sentiment_pred,1:Very Negative,2:Negative,3:Neutral,4:Positive,5:Very Positive
sentiment_gpt_clean,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1:Very Negative,4,1,0,0,0
2:Negative,2,126,21,11,1
3:Neutral,0,48,148,43,15
4:Postive,0,8,17,80,91
5:Very Postive,0,6,9,36,504


In [17]:
# cross-tabulation of the predicted sentiment from fine-tuned BERT transformer model and ChatGPT sentiment class
df_crosstab_gpt_clean_finetuned_pred = pd.crosstab(data_all["sentiment_gpt_clean"], data_all["Sentiment_pred"])
# cross-tabulation of the customer satisfaction score category and ChatGPT sentiment class
df_crosstab_nps_cat_gpt_clean = pd.crosstab(data_all["nps_rawgrade_category"], data_all["sentiment_gpt_clean"])
# cross-tabulation of the customer satisfaction score category and the predicted sentiment from fine-tuned BERT transformer model 
df_crosstab_nps_cat_finetuned_pred = pd.crosstab(data_all["nps_rawgrade_category"], data_all["Sentiment_pred"])

In [18]:
# Reference: https://www.geo.fu-berlin.de/en/v/soga-py/Basics-of-statistics/Descriptive-Statistics/Measures-of-Relation-Between-Variables/Contingency-Coeficient/index.html

def contigency_coff(in_df_tab):
    # in_df_tab : pd.crosstab
    from scipy.stats.contingency import expected_freq
    expected = pd.DataFrame(expected_freq(in_df_tab), columns=in_df_tab.columns, index = in_df_tab.index)
    chisqVal = np.sum((in_df_tab.to_numpy()-expected_freq(in_df_tab))**2/expected_freq(in_df_tab))
    C_star = np.sqrt(chisqVal/(np.sum(in_df_tab.to_numpy()) + chisqVal))
    # Or, more concisely
    r, c = in_df_tab.shape

    k = min(r, c)
    C_star_max = np.sqrt((k - 1) / k)
    C = C_star / C_star_max
    return(C)

In [19]:
contigency_coff(df_crosstab_gpt_clean_finetuned_pred)

0.881979032732939

In [20]:
print(df_crosstab_gpt_clean_finetuned_pred)
print(contigency_coff(df_crosstab_gpt_clean_finetuned_pred)) 
# 88% contingency coeff on new quarter of data between the predicted sentiment from fine-tuned BERT transformer model and ChatGPT sentiment class

Sentiment_pred       1:Very Negative  2:Negative  3:Neutral  4:Positive  \
sentiment_gpt_clean                                                       
1:Very Negative                    4           1          0           0   
2:Negative                         2         126         21          11   
3:Neutral                          0          48        148          43   
4:Postive                          0           8         17          80   
5:Very Postive                     0           6          9          36   

Sentiment_pred       5:Very Positive  
sentiment_gpt_clean                   
1:Very Negative                    0  
2:Negative                         1  
3:Neutral                         15  
4:Postive                         91  
5:Very Postive                   504  
0.881979032732939


In [21]:
contigency_coff(df_crosstab_nps_cat_gpt_clean) # contingency score of 60.8% b/w customer satisfaction score category and ChatGPT sentiment class

0.6080631833192912

In [22]:
contigency_coff(df_crosstab_nps_cat_finetuned_pred) # # contingency score of 60.0% b/w customer satisfaction score category and the predicted sentiment from fine-tuned BERT transformer model 

0.6003578894873848

In [23]:
data_all.to_excel("./df_comment_out_gpt_finetunemod_score_2023q3.xlsx", index=False) 