In [1]:
import tensorflow as tf
from transformers import TFAutoModel, AutoTokenizer
from datasets import load_dataset

In [2]:
model = TFAutoModel.from_pretrained("bert-base-uncased")

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/511M [00:00<?, ?B/s]

Some layers from the model checkpoint at bert-base-uncased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [3]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
inputs = tokenizer(['I like pizza', 'but my friend likes burger'], padding=True, truncation=True,
                  return_tensors='tf')
inputs

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/455k [00:00<?, ?B/s]

{'input_ids': <tf.Tensor: shape=(2, 7), dtype=int32, numpy=
array([[  101,  1045,  2066, 10733,   102,     0,     0],
       [  101,  2021,  2026,  2767,  7777, 15890,   102]], dtype=int32)>, 'token_type_ids': <tf.Tensor: shape=(2, 7), dtype=int32, numpy=
array([[0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(2, 7), dtype=int32, numpy=
array([[1, 1, 1, 1, 1, 0, 0],
       [1, 1, 1, 1, 1, 1, 1]], dtype=int32)>}

In [4]:
output = model(inputs)
output

TFBaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=<tf.Tensor: shape=(2, 7, 768), dtype=float32, numpy=
array([[[ 0.03161393,  0.21924111, -0.06189651, ..., -0.01806979,
          0.29032907,  0.26182804],
        [ 0.33707508,  0.11439962, -0.46938097, ...,  0.20413513,
          0.82430315,  0.27784604],
        [ 0.29033974,  0.40582636,  0.9369134 , ...,  0.40464547,
          0.3492163 ,  0.1465727 ],
        ...,
        [ 0.7844129 ,  0.13871926, -0.14925267, ...,  0.28906852,
         -0.61085105, -0.30621636],
        [ 0.22610539, -0.01241209,  0.04190814, ...,  0.2928391 ,
          0.27107716,  0.02607327],
        [ 0.19631523, -0.16395801,  0.04214695, ...,  0.3881919 ,
          0.21493885, -0.10372332]],

       [[ 0.08502091,  0.10061197, -0.00596355, ..., -0.00249572,
          0.25800818,  0.20576611],
        [ 0.48356184, -0.08624651,  0.11501959, ..., -0.25489786,
          0.5577764 ,  0.71446025],
        [ 0.1349858 , -0.13354455,  0.36880377, ...

In [5]:
emotions = load_dataset('SetFit/emotion')

Downloading and preparing dataset json/SetFit--emotion to /root/.cache/huggingface/datasets/json/SetFit--emotion-ddb0f0516f8def09/0.0.0/ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b...


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/2.23M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/279k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/276k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Dataset json downloaded and prepared to /root/.cache/huggingface/datasets/json/SetFit--emotion-ddb0f0516f8def09/0.0.0/ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [6]:
emotions

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'label_text'],
        num_rows: 16000
    })
    test: Dataset({
        features: ['text', 'label', 'label_text'],
        num_rows: 2000
    })
    validation: Dataset({
        features: ['text', 'label', 'label_text'],
        num_rows: 2000
    })
})

In [7]:
def tokenize(batch):
    return tokenizer(batch["text"], padding=True, truncation=True)

In [8]:
emotions_encoded = emotions.map(tokenize, batched=True, batch_size=None)
emotions_encoded

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

DatasetDict({
    train: Dataset({
        features: ['text', 'label', 'label_text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 16000
    })
    test: Dataset({
        features: ['text', 'label', 'label_text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2000
    })
    validation: Dataset({
        features: ['text', 'label', 'label_text', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 2000
    })
})

In [9]:
emotions_encoded.set_format('tf', columns=['input_ids', 'attention_mask', 'token_type_ids', 'label'])
batch_size = 64
def order(input):
    data = list(input.values())
    return {
        'input_ids': data[1],
        'attention_mask': data[2],
        'token_type_ids': data[3]
    }, data[0]
train_dataset = tf.data.Dataset.from_tensor_slices(emotions_encoded['train'][:])
train_dataset = train_dataset.batch(batch_size).shuffle(1000)
train_dataset = train_dataset.map(order, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices(emotions_encoded['test'][:])
test_dataset = test_dataset.batch(batch_size)
test_dataset = test_dataset.map(order, num_parallel_calls=tf.data.AUTOTUNE)

In [10]:
inp, out = next(iter(train_dataset))
print(inp, '\n\n', out)

{'input_ids': <tf.Tensor: shape=(64, 87), dtype=int64, numpy=
array([[  101, 10047,  3110, ...,     0,     0,     0],
       [  101,  1045,  6135, ...,     0,     0,     0],
       [  101,  1045,  5223, ...,     0,     0,     0],
       ...,
       [  101,  1045,  2514, ...,     0,     0,     0],
       [  101,  1045,  2572, ...,     0,     0,     0],
       [  101,  1045,  2514, ...,     0,     0,     0]])>, 'attention_mask': <tf.Tensor: shape=(64, 87), dtype=int64, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])>, 'token_type_ids': <tf.Tensor: shape=(64, 87), dtype=int64, numpy=
array([[1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 0, 0]])>} 

 tf.Tensor(
[4 1 0 1 0 0 0 0 1 3 2 3 1 

In [11]:
class BERTForTextClassification(tf.keras.Model):    
    def __init__(self, bert_model, num_classes):
        super().__init__()
        self.bert = bert_model
        self.fc = tf.keras.layers.Dense(num_classes, activation='softmax')        
    def call(self, inputs):
        x = self.bert(inputs)[1]
        return self.fc(x)

In [12]:
classifier = BERTForTextClassification(model, num_classes=6)
classifier.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),loss=tf.keras.losses.SparseCategoricalCrossentropy(),metrics=['accuracy'])

In [13]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint

lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=1e-6)
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss')
history = classifier.fit(train_dataset,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [14]:
classifier.evaluate(test_dataset)



[0.15439268946647644, 0.9254999756813049]