In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds

from official.nlp import optimization as nlp_opt
from official.nlp.bert import tokenization as bert_token

from berts.berts import BertClassificationModel
from berts.utils import get_bert_inputs

In [2]:
bert_url = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2"
model, bert_layer = BertClassificationModel(bert_url, 2)
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_words_seq (InputLayer)    [(None, None)]       0                                            
__________________________________________________________________________________________________
input_attention_mask (InputLaye [(None, None)]       0                                            
__________________________________________________________________________________________________
input_segment_mask (InputLayer) [(None, None)]       0                                            
__________________________________________________________________________________________________
keras_layer (KerasLayer)        [(None, 768), (None, 109482241   input_words_seq[0][0]            
                                                                 input_attention_mask[0

In [3]:
# load vocabulary (must be same as pre-trained bert)
vocab_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
to_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
bert_tokenizer = bert_token.FullTokenizer(vocab_file, to_lower_case)
print('vocabulary size:', len(bert_tokenizer.vocab))

vocabulary size: 30522


In [4]:
# use glue data set (sst2: Stanford Sentiment Treebank)
glue, info = tfds.load('glue/sst2', with_info=True, batch_size=-1)
print(glue.keys())
print(info.features)

dict_keys(['test', 'train', 'validation'])
FeaturesDict({
    'idx': Tensor(shape=(), dtype=tf.int32),
    'label': ClassLabel(shape=(), dtype=tf.int64, num_classes=2),
    'sentence': Text(shape=(), dtype=tf.string),
})


In [5]:
info.features['label'].names

['negative', 'positive']

In [6]:
train_input_words, train_input_mask, train_input_seg = get_bert_inputs(bert_tokenizer, glue['train']['sentence'])
train_labels = glue['train']['label']
print('training data shapes:', train_input_words.shape, train_input_mask.shape, train_input_seg.shape, train_labels.shape)

valid_input_words, valid_input_mask, valid_input_seg = get_bert_inputs(bert_tokenizer, glue['validation']['sentence'])
valid_labels = glue['validation']['label']
print('validation data shapes:', valid_input_words.shape, valid_input_mask.shape, valid_input_seg.shape, valid_labels.shape)

training data shapes: (67349, 66) (67349, 66) (67349, 66) (67349,)
validation data shapes: (872, 55) (872, 55) (872, 55) (872,)


In [7]:
batch_size = 32
epochs = 3
train_data_size = len(train_labels)
steps_per_epoch = int(train_data_size / batch_size)
num_train_steps = steps_per_epoch * epochs
warmup_steps = int(epochs * train_data_size * 0.1 / batch_size)

# creates an optimizer with learning rate schedule
optimizer = nlp_opt.create_optimizer(2e-5, num_train_steps=num_train_steps, num_warmup_steps=warmup_steps)

In [8]:
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics='acc')

In [9]:
history = model.fit([train_input_words, train_input_mask, train_input_seg], train_labels,
                    validation_data=([valid_input_words, valid_input_mask, valid_input_seg], valid_labels),
                    batch_size=batch_size, epochs=epochs)

Epoch 1/3
Epoch 2/3
Epoch 3/3
