In [4]:
import pandas as pd
import numpy as np
import seaborn as sns
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [5]:
df = pd.read_csv('FB_posts_labeled.txt', sep = '\t')
df.head()

Unnamed: 0,postId,message,Appreciation,Complaint,Feedback
0,126016648090_10150802142013091,Great ! ;),1,0,0
1,108381603303_10151136215833304,YUM! YUM!,1,0,0
2,108381603303_3913438087739,Yummm :)),1,0,0
3,110455108974424_343049739048292,sweet,1,0,0
4,110455108974424_350358541650745,nice,1,0,0


In [6]:
from sklearn.model_selection import train_test_split

x = df[['message']]
y = df[['Appreciation','Complaint', 'Feedback']]

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25)

In [7]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [8]:
# Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

# Neural network layers
l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
l = tf.keras.layers.Dense(3, activation='softmax', name="output")(l)

# Use inputs and outputs to construct a final model
model = tf.keras.Model(inputs=[text_input], outputs = [l])

In [9]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_mask': (Non  0           ['text[0][0]']                   
                                e, 128),                                                          
                                 'input_type_ids':                                                
                                (None, 128),                                                      
                                 'input_word_ids':                                                
                                (None, 128)}                                                  

In [10]:
import tensorflow_addons as tfa

 The versions of TensorFlow you are currently using is 2.12.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [11]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=[tfa.metrics.F1Score(average='macro',num_classes=3)])

In [None]:
model.fit(X_train, y_train, epochs=10)

Epoch 1/10
  9/187 [>.............................] - ETA: 51:45 - loss: 1.0932 - f1_score: 0.3239

In [None]:
model.evaluate(X_test, y_test)

In [None]:
df_unl = pd.read_csv('FB_posts_unlabeled.txt', sep = '\t')
df_unl.head()

In [None]:
# Generate predictions for samples
predictions = model.predict(df_unl['message'])
print(predictions)

In [None]:
classes = np.argmax(predictions, axis = 1)
print(classes)
#Appreciation	Complaint	Feedback

In [None]:
df_unl['class'] = classes

In [None]:
df_unl['class_new'] = df_unl['class'].map(lambda x: 'Appreciation_pred' if x == 0 else ('Complaint_pred' if x==1  else 'Feedback_pred'))

In [None]:
# Get one hot encoding of columns B
one_hot = pd.get_dummies(df_unl['class_new'])
one_hot
df_merged = pd.concat([df_unl[['postId']], one_hot], axis = 1)
df_merged.to_csv('results.csv', index = False)
# Drop column B as it is now encoded
#df = df.drop('B',axis = 1)