# BERT
Bidirectional encoder representations from transformers(BERT) is a word embedding technique that uses the transformer architecture.\
It is a truely bidirectional model and takes in context from both the left and right of the word to embed to get the correct meaning of the word and solve the ambiguity.\
It uses 2 pretraining stratergies to make it bidirectional
1. Masked Language Model (MLM) - Where it hides 15% of the words randomly
2. Next Sentence Prediction (NSP) - Where it predicts the probabilty of a sentence following another one

In [None]:
from transformers import BertTokenizer, BertModel
import torch

# Load tokenizer and model
tokenizer = BertTokenizer.from_pretrained("bert-base-cased")
model = BertModel.from_pretrained("bert-base-cased")

text = "ChatGPT is a language model developed by OpenAI, based on the GPT (Generative Pre-trained Transformer) architecture."

# Tokenize and encode the text
encoding = tokenizer(text, return_tensors="pt")
input_ids = encoding["input_ids"]
attention_mask = encoding["attention_mask"]

tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
print("Tokens:", tokens)

# Get embeddings from BERT
with torch.no_grad():
    outputs = model(input_ids, attention_mask=attention_mask)
embeddings = outputs.last_hidden_state  # shape: (1, seq_len, hidden_size)

In [4]:
word_tokens = ["Cha", "##t", "##GP", "##T"]
word_indices = [i for i, t in enumerate(tokens) if t in word_tokens]

chatgpt_embedding = embeddings[0, word_indices, :].mean(dim=0)
print("Embedding vector for 'ChatGPT':", chatgpt_embedding.numpy()[:50])
print("Shape:", chatgpt_embedding.shape)

Embedding vector for 'ChatGPT': [ 0.18248466  0.06531088 -0.01304259 -0.10080975 -0.41819397  0.12474632
  0.01837833  0.40949067 -0.05134635 -0.5909524  -0.2686349   0.46541452
 -0.6358447   0.11812568 -0.587759   -0.13893628  0.07554506 -0.13657925
 -0.33382833 -0.05356705 -0.03662148 -0.31187272  0.18614851 -0.3344482
 -0.23796754 -0.2935622   0.13455424  0.20183997 -0.48891345  0.03939801
  0.24702975 -0.09962103  0.1313757  -0.05996384 -0.00407322  0.08232902
 -0.0035517   0.06007571  0.17209187  0.48715696  0.43790358  0.38488567
  0.3372156  -0.11275385  0.11603636 -0.08879892 -0.02702014  0.4166429
 -0.23614068  0.19281545]
Shape: torch.Size([768])
