In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from transformers import BertTokenizer
from transformers import TFBertModel

gpus = tf.config.experimental.list_physical_devices("GPU")
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

D:\anaconda\envs\tf-gpu-2.10.0-py-3.10\lib\site-packages\numpy\.libs\libopenblas.FB5AE2TYXYH2IJRDKGDGQ3XBKLKTF43H.gfortran-win_amd64.dll
D:\anaconda\envs\tf-gpu-2.10.0-py-3.10\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-246-g3d31191b-gcc_10_3_0.dll
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
max_length = 128
batch_size = 32
train_ds, test_ds = tfds.load("imdb_reviews", split=['train', 'test'], as_supervised=True)
tokenizer = BertTokenizer.from_pretrained("D:/bert-base-uncased")
texts = []
labels = []
for t,l in train_ds:
    texts.append(t.numpy().decode('utf-8'))
    labels.append(l.numpy())
for t,l in test_ds:
    texts.append(t.numpy().decode('utf-8'))
    labels.append(l.numpy())
print(texts[:2])
print(labels[:2])
print(len(texts))
print(len(labels))

["This was an absolutely terrible movie. Don't be lured in by Christopher Walken or Michael Ironside. Both are great actors, but this must simply be their worst role in history. Even their great acting could not redeem this movie's ridiculous storyline. This movie is an early nineties US propaganda piece. The most pathetic scenes were those when the Columbian rebels were making their cases for revolutions. Maria Conchita Alonso appeared phony, and her pseudo-love affair with Walken was nothing but a pathetic emotional plug in a movie that was devoid of any real meaning. I am disappointed that there are movies like this, ruining actor's like Christopher Walken's good name. I could barely sit through it.", 'I have been known to fall asleep during films, but this is usually due to a combination of things including, really tired, being warm and comfortable on the sette and having just eaten a lot. However on this occasion I fell asleep because the film was rubbish. The plot development was

In [22]:
encoded = tokenizer.batch_encode_plus(texts, 
                                 add_special_tokens=True,
                                 max_length=max_length,
                                 padding='max_length',
                                 truncation=True,
                                 return_attention_mask=True,
                                 return_tensors='tf')
input_ids = encoded['input_ids']
attention_mask = encoded['attention_mask']
labels = tf.convert_to_tensor(labels)
print("input_ids.shape", input_ids.shape)
print("attention_mask.shape",attention_mask.shape)
print("labels.shape",labels.shape)

dataset = tf.data.Dataset.from_tensor_slices(((input_ids, attention_mask), labels)).shuffle(len(texts)).batch(batch_size)
data_size = len(dataset)
train_size = int(0.8*data_size)
train_dataset = dataset.take(train_size)
test_dataset = dataset.skip(train_size)

input_ids.shape (50000, 128)
attention_mask.shape (50000, 128)
labels.shape (50000,)


In [23]:
for (a,b),c in train_dataset.take(1):
    print(a[:2])
    print()
    print(b[:2])
    print()
    print(c[:2])

tf.Tensor(
[[  101  2298  1010  1045  1005  2310  8134  2439  2035  3246  1999 20814
   2044  3666  2037 14751  1000  2718  1010  1000  1996  6248  3428  2316
   2265  1010  1998  1000 24582  2906  2135  1000  2003  2053  6453   999
   2065  2017  4033  1005  1056  4384  1010 24582  2906  2135  2003  2085
   1996  1001  1015  2718  1056 28394  2078 13130  2006  2547  2157  2085
    999  2044  4994  2023  1010  1045  2787  2000  3422  1037  2261  4178
   2870  2000  2156  2054  1996  1044 18863  2001  2055   999  1045  2031
   2028  2773  2000  6235  2023  2265  1999  2236  1012  1012  1012  1000
   3947  3238   999   999   999  1000  1045  3685  2903  2008  4907 15159
   2052  2175  2023  2659  1998  2191  2242  2023 10231  7685   999   999
    999  2009  1005  1055  9202   999   999   102]
 [  101  1045  3427  2023  6823  1010  3202  2128 12155  8630  2009  1010
   3427  2009  2153  1998  4191  3807  2004  2524  1012  1045  6118 16755
   2023  6823  2005  2216  2040  2024  2025  5223 

In [26]:
bert_model = TFBertModel.from_pretrained("D:/bert-base-uncased")
for layer in bert_model.layers:
    layer.trainable = True
input_ids = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='input_ids')
attention_mask = tf.keras.layers.Input(shape=(max_length,), dtype=tf.int32, name='attention_mask')
bert_output = bert_model(input_ids, attention_mask=attention_mask)
cls_token = bert_output.last_hidden_state[:,0,:]
output = tf.keras.layers.Dense(1, activation='sigmoid')(cls_token)
model = tf.keras.models.Model(inputs=[input_ids, attention_mask], outputs=output)

Some layers from the model checkpoint at D:/bert-base-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at D:/bert-base-uncased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [27]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=3e-5),
             loss=tf.keras.losses.BinaryCrossentropy(),
             metrics=[tf.keras.metrics.BinaryAccuracy()])
model.fit(train_dataset, epochs=3, validation_data=test_dataset)

Epoch 1/3








Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1e4b0708a00>

In [29]:
model.predict(["This was an absolutely terrible movie."])

ValueError: in user code:

    File "D:\anaconda\envs\tf-gpu-2.10.0-py-3.10\lib\site-packages\keras\engine\training.py", line 2041, in predict_function  *
        return step_function(self, iterator)
    File "D:\anaconda\envs\tf-gpu-2.10.0-py-3.10\lib\site-packages\keras\engine\training.py", line 2027, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "D:\anaconda\envs\tf-gpu-2.10.0-py-3.10\lib\site-packages\keras\engine\training.py", line 2015, in run_step  **
        outputs = model.predict_step(data)
    File "D:\anaconda\envs\tf-gpu-2.10.0-py-3.10\lib\site-packages\keras\engine\training.py", line 1983, in predict_step
        return self(x, training=False)
    File "D:\anaconda\envs\tf-gpu-2.10.0-py-3.10\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "D:\anaconda\envs\tf-gpu-2.10.0-py-3.10\lib\site-packages\keras\engine\input_spec.py", line 216, in assert_input_compatibility
        raise ValueError(

    ValueError: Layer "model_6" expects 2 input(s), but it received 1 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(None,) dtype=string>]
