In [1]:
#!pip install -q transformers
#!pip install tensorflow_datasets
#!pip install torch
from transformers import DistilBertTokenizerFast
from transformers import TFDistilBertForSequenceClassification
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.metrics import CategoricalAccuracy
from tensorflow.keras.layers import Input, Dense
import pandas as pd
import numpy as np
import keras

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
if torch.cuda.is_available():
    print('Default GPU Device: {}'.format(torch.device("cuda:0")))
else:
    print("Please install GPU version of TF")
print("keras version: {0}".format(keras.__version__))
print("Num GPUs Available: ", torch.cuda.device_count())

Please install GPU version of TF
keras version: 2.6.0
Num GPUs Available:  0


In [4]:
# can be up to 512 for BERT
max_length = 256
batch_size = 18
# recommended learning rate for Adam 5e-5, 3e-5, 2e-5
learning_rate = 3e-5
# we will do just 2 epoch, though multiple epochs might be better as long as we will not overfit the model
number_of_epochs = 2

#Assign tokenizer object to the tokenizer class
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased',num_labels=3)
#Assign tokenizer object to the tokenizer class
model.summary()


Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertForSequenceClassification: ['vocab_transform', 'activation_13', 'vocab_projector', 'vocab_layer_norm']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier', 'dropout_19', 'classifier']
You should probably TRAIN this model on a down-stream task to be able to use i

Model: "tf_distil_bert_for_sequence_classification"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
distilbert (TFDistilBertMain multiple                  66362880  
_________________________________________________________________
pre_classifier (Dense)       multiple                  590592    
_________________________________________________________________
classifier (Dense)           multiple                  2307      
_________________________________________________________________
dropout_19 (Dropout)         multiple                  0         
Total params: 66,955,779
Trainable params: 66,955,779
Non-trainable params: 0
_________________________________________________________________


In [5]:
"""
DATASETS

"""
test_csv = pd.read_csv('test_data.csv') 
train_csv = pd.read_csv('train_data.csv') #sentiment
train_csv = train_csv.sort_values('sentiment')
train_csv = train_csv.drop(train_csv[train_csv.sentiment == 'positive'].index[int(train_csv.count()['sentiment']/5.5):])
ds_train = train_csv[['review_content','sentiment']]
ds_test = test_csv[['review_content','Annotator_1']]
ds_test = ds_test.rename(columns={'Annotator_1':'sentiment'})
ds_train = ds_train.dropna()
print(ds_train.isnull().any())
print(ds_test.isnull().any())

review_content    False
sentiment         False
dtype: bool
review_content    False
sentiment         False
dtype: bool


In [6]:
possible_labels = ds_train.sentiment.unique()

label_dict = {}
for index, possible_label in enumerate(possible_labels):
    label_dict[possible_label] = index
print(label_dict)

ds_train['label'] = ds_train.sentiment.replace(label_dict)
ds_test['label'] = ds_test.sentiment.replace(label_dict)
y_train = to_categorical(ds_train.label)
y_test = to_categorical(ds_test.label)

{'negative': 0, 'neutral': 1, 'positive': 2}


In [7]:
# Tokenize the input (takes some time) 
# here tokenizer using from bert-base-uncased
x_train = tokenizer(
    text=ds_train.review_content.tolist(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)
x_test = tokenizer(
    text=ds_test.review_content.tolist(),
    add_special_tokens=True,
    max_length=max_length,
    truncation=True,
    padding=True, 
    return_tensors='tf',
    return_token_type_ids = False,
    return_attention_mask = True,
    verbose = True)

In [8]:
input_ids = x_train['input_ids']
attention_mask = x_train['attention_mask']

In [10]:
input_ids = Input(shape=(max_length,), dtype=tf.int32, name="input_ids")
input_mask = Input(shape=(max_length,), dtype=tf.int32, name="attention_mask")
embeddings = model(input_ids,attention_mask = input_mask)[0] 
out = Dense(128, activation='relu')(embeddings)
out = tf.keras.layers.Dropout(0.2)(out)
out = Dense(32,activation = 'relu')(out)
y = Dense(3,activation = 'sigmoid')(out)
model = tf.keras.Model(inputs=[input_ids, input_mask], outputs=y)
model.layers[2].trainable = True

In [11]:
# choosing Adam optimizer
optimizer = Adam(
    learning_rate=learning_rate, # this learning rate is for bert model , taken from huggingface website 
    epsilon=1e-08, decay=0.01, clipnorm=1.0)
# Set loss and metrics
loss =CategoricalCrossentropy(from_logits = True)
metric = CategoricalAccuracy('balanced_accuracy'),

model.compile(optimizer=optimizer, loss=loss, metrics=metric)


In [13]:
bert_search = model.fit(
    x ={'input_ids':x_train['input_ids'],'attention_mask':x_train['attention_mask']} ,
    y = y_train,
    validation_data = (
    {'input_ids':x_test['input_ids'],'attention_mask':x_test['attention_mask']}, y_test
    ),
  epochs=number_of_epochs,
    batch_size=14
)

Epoch 1/2
Epoch 2/2


In [23]:
# model.save('BERTmodel')
model.save_pretrained("BERT")
print("Saved model to disk")

AttributeError: 'Functional' object has no attribute 'save_pretrained'

In [21]:
model = keras.models.load_model('BERTmodel')

ValueError: The two structures don't have the same nested structure.

First structure: type=tuple str=(({'input_ids': TensorSpec(shape=(None, 5), dtype=tf.int32, name='input_ids/input_ids')}, None, None, None, None, None, None, None, False), {})

Second structure: type=tuple str=((TensorSpec(shape=(None, 100), dtype=tf.int32, name='input_ids'), TensorSpec(shape=(None, 100), dtype=tf.int32, name='attention_mask'), None, None, None, None, None, None, False), {})

More specifically: Substructure "type=dict str={'input_ids': TensorSpec(shape=(None, 5), dtype=tf.int32, name='input_ids/input_ids')}" is a sequence, while substructure "type=TensorSpec str=TensorSpec(shape=(None, 100), dtype=tf.int32, name='input_ids')" is not
Entire first structure:
(({'input_ids': .}, ., ., ., ., ., ., ., .), {})
Entire second structure:
((., ., ., ., ., ., ., ., .), {})

In [14]:
predicted_raw = model.predict({'input_ids':x_test['input_ids'],'attention_mask':x_test['attention_mask']})
predicted_raw[0]

array([0.3111919 , 0.13600692, 0.8138756 ], dtype=float32)

In [15]:
y_predicted = np.argmax(predicted_raw, axis = 1)
y_true = ds_test.label
from sklearn.metrics import classification_report
print(classification_report(y_true, y_predicted))

              precision    recall  f1-score   support

           0       0.62      0.98      0.75       748
           1       0.50      0.00      0.01       346
           2       0.97      0.93      0.95      2582

    accuracy                           0.85      3676
   macro avg       0.69      0.64      0.57      3676
weighted avg       0.85      0.85      0.82      3676

