In [3]:
import pandas as pd
import numpy as np
import seaborn as sns
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [4]:
df = pd.read_csv('FB_posts_labeled.txt', sep = '\t')
df.head()

Unnamed: 0,postId,message,Appreciation,Complaint,Feedback
0,126016648090_10150802142013091,Great ! ;),1,0,0
1,108381603303_10151136215833304,YUM! YUM!,1,0,0
2,108381603303_3913438087739,Yummm :)),1,0,0
3,110455108974424_343049739048292,sweet,1,0,0
4,110455108974424_350358541650745,nice,1,0,0


In [5]:
from sklearn.model_selection import train_test_split

x = df[['message']]
y = df[['Appreciation','Complaint', 'Feedback']]

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25)

In [6]:
bert_preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
bert_encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")

In [7]:
# Bert layers
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
preprocessed_text = bert_preprocess(text_input)
outputs = bert_encoder(preprocessed_text)

# Neural network layers
l = tf.keras.layers.Dropout(0.1, name="dropout")(outputs['pooled_output'])
l = tf.keras.layers.Dense(3, activation='softmax', name="output")(l)

# Use inputs and outputs to construct a final model
model = tf.keras.Model(inputs=[text_input], outputs = [l])

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [8]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text (InputLayer)              [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_mask': (Non  0           ['text[0][0]']                   
                                e, 128),                                                          
                                 'input_type_ids':                                                
                                (None, 128),                                                      
                                 'input_word_ids':                                                
                                (None, 128)}                                                  

In [12]:
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting tensorflow_addons
  Downloading tensorflow_addons-0.19.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
Collecting typeguard>=2.7
  Downloading typeguard-3.0.1-py3-none-any.whl (30 kB)
Installing collected packages: typeguard, tensorflow_addons
Successfully installed tensorflow_addons-0.19.0 typeguard-3.0.1


In [13]:
import tensorflow_addons as tfa

In [14]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=[tfa.metrics.F1Score(average='macro',num_classes=3)])

In [15]:
model.fit(X_train, y_train, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f846de7eeb0>

In [16]:
model.evaluate(X_test, y_test)



[0.6123871803283691, 0.7424693703651428]

In [17]:
df_unl = pd.read_csv('FB_posts_unlabeled.txt', sep = '\t')
df_unl.head()

Unnamed: 0,postId,message
0,108381603303_10151119973393304,Love. It. To
1,115568331790246_371841206162956,NICE
2,115568331790246_515044031842672,Congrats
3,147285781446_10151010892176447,Awesome!
4,159616034235_10150639103634236,Award


In [18]:
# Generate predictions for samples
predictions = model.predict(df_unl['message'])
print(predictions)

[[0.54589945 0.36868277 0.08541775]
 [0.80114126 0.08920414 0.10965455]
 [0.8800548  0.05770996 0.0622353 ]
 ...
 [0.09309734 0.13232656 0.77457607]
 [0.44879675 0.3519066  0.19929668]
 [0.07580331 0.8014149  0.12278177]]


In [19]:
classes = np.argmax(predictions, axis = 1)
print(classes)
#Appreciation	Complaint	Feedback

[0 0 0 ... 2 0 1]


In [20]:
df_unl['class'] = classes

In [29]:
df_unl['class_new'] = df_unl['class'].map(lambda x: 'Appreciation_pred' if x == 0 else ('Complaint_pred' if x==1  else 'Feedback_pred'))

In [34]:
# Get one hot encoding of columns B
one_hot = pd.get_dummies(df_unl['class_new'])
one_hot
df_merged = pd.concat([df_unl[['postId']], one_hot], axis = 1)
df_merged.to_csv('results.csv')
# Drop column B as it is now encoded
#df = df.drop('B',axis = 1)