## Usage of Transformers models in PyTorch

## GPT2 (Generative)

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

print(">Getting model gpt2")
gpt2 = AutoModelForCausalLM.from_pretrained("gpt2")

print(">Getting tokenizer for gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2") # Will yield a normal warning

prompt = "Large Langage Models are sometimes biased models."
print(">Defined the prompt:", prompt)

print(">Defining tokenizer")
tokenizer_res = tokenizer(prompt, return_tensors="pt")
input_ids = tokenizer_res.input_ids

print(">Generating new tokens")
gen_tokens = gpt2.generate(
    input_ids,
    do_sample=True,
    temperature=0.9,
    max_length=100,
)

print(">Converting generated tokens to text")
gen_text = tokenizer.batch_decode(gen_tokens)[0]

print(">Text generated:\n", gen_text)

print(">END")

## BERT (Descriptive)

In [None]:
from transformers import BertModel, BertTokenizer
import torch

bert = BertModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Example input text
input_text1 = "Hello, how are you doing today?"
input_text2 = "Hello, you're doing well today"
input_text3 = "LLMs are sometimes very biased models."

# Tokenize the input text
inputs1 = tokenizer(input_text1, return_tensors='pt') #, padding='max_length', truncation=True, max_length=10)
inputs2 = tokenizer(input_text2, return_tensors='pt') #, padding='max_length', truncation=True, max_length=10)
inputs3 = tokenizer(input_text3, return_tensors='pt') #, padding='max_length', truncation=True, max_length=10)

# Forward pass through the model
outputs1 = bert(**inputs1)
outputs2 = bert(**inputs2)
outputs3 = bert(**inputs3)

# Extract activations from the last hidden state
last_hidden_state1 = outputs1.last_hidden_state
last_hidden_state2 = outputs2.last_hidden_state
last_hidden_state3 = outputs3.last_hidden_state

# Print the shape of the activations (batch_size, sequence_length, hidden_size)
print(f"Text 1: {input_text1}")
print(f"Text 2: {input_text2}")
print(f"Text 3: {input_text3}")
print("")
print(f"Tokens for 1: {inputs1.input_ids}")
print(f"Tokens for 2: {inputs2.input_ids}")
print(f"Tokens for 3: {inputs3.input_ids}")
print("")
print(f"Shape of bert output: {last_hidden_state1.shape}")
print(f"Shape of output: {last_hidden_state2.shape}")
print(f"Shape of output: {last_hidden_state3.shape}")
print("")
print(f"Norm between example 1 and 2: {torch.norm(last_hidden_state1 - last_hidden_state2)}")
print(f"Norm between example 1 and 3: {torch.norm(last_hidden_state1 - last_hidden_state3)}")
print(f"Norm between example 2 and 3: {torch.norm(last_hidden_state2 - last_hidden_state3)}")

# # Optionally, print the activations (this could be a large tensor)
# print(last_hidden_state1)
# print(last_hidden_state2)
# print(last_hidden_state3)


**As a classifier with an MLP**

In [None]:
import transformers
from torch import nn

class BERTClassification(nn.Module):
    def __init__ (self):
        super(BERTClassification, self).__init__()
        self.bert = transformers.BertModel.from_pretrained('bert-base-cased')
        self.bert_drop = nn.Dropout(0.4)
        self.out = nn.Linear(768, 1)

    def forward(self, ids, mask, token_type_ids):
        _, pooledOut = self.bert(ids, attention_mask = mask,
                                token_type_ids=token_type_ids)
        bertOut = self.bert_drop(pooledOut)
        output = self.out(bertOut)

        return output