In [21]:
from transformers import BertModel, BertTokenizer
import torch

# Generating BERT embedding

In [22]:
model = BertModel.from_pretrained('bert-base-uncased')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [23]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Preprocessing the input

In [24]:
sentence = 'I love Paris'

In [25]:
tokens = tokenizer.tokenize(sentence)

In [26]:
print(tokens)

['i', 'love', 'paris']


In [27]:
tokens = ['[CLS]'] + tokens + ['[SEP]']

In [28]:
print(tokens)

['[CLS]', 'i', 'love', 'paris', '[SEP]']


In [29]:
tokens = tokens + ['[PAD]'] + ['[PAD]']

In [30]:
print(tokens)

['[CLS]', 'i', 'love', 'paris', '[SEP]', '[PAD]', '[PAD]']


In [31]:
attention_mask = [1 if i!= '[PAD]' else 0 for i in tokens]

In [32]:
print(attention_mask)

[1, 1, 1, 1, 1, 0, 0]


In [33]:
token_ids = tokenizer.convert_tokens_to_ids(tokens)

In [34]:
print(token_ids)

[101, 1045, 2293, 3000, 102, 0, 0]


# Getting the embedding

In [35]:
token_ids = torch.tensor(token_ids).unsqueeze(0)
attention_mask = torch.tensor(attention_mask).unsqueeze(0)

In [44]:
token_ids.shape

torch.Size([1, 7])

In [47]:
hidden_rep, cls_head = model(token_ids, attention_mask = attention_mask)