## Usage of Transformers models in PyTorch

## GPT2 (Generative)

In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer

print(">Getting model gpt2")
gpt2 = AutoModelForCausalLM.from_pretrained("gpt2")

print(">Getting tokenizer for gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2") # Will yield a normal warning

prompt = "Large Langage Models are sometimes biased models."
print(">Defined the prompt:", prompt)

print(">Defining tokenizer")
tokenizer_res = tokenizer(prompt, return_tensors="pt")
input_ids = tokenizer_res.input_ids

print(">Generating new tokens")
gen_tokens = gpt2.generate(
    input_ids,
    do_sample=True,
    temperature=0.9,
    max_length=100,
)

print(">Converting generated tokens to text")
gen_text = tokenizer.batch_decode(gen_tokens)[0]

print(">Text generated:\n", gen_text)

print(">END")

>Getting model gpt2
>Getting tokenizer for gpt2
>Defined the prompt: Large Langage Models are sometimes biased models.
>Defining tokenizer
>Generating new tokens


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


>Converting generated tokens to text
>Text generated:
 Large Langage Models are sometimes biased models. Instead of creating a general linear model of the natural behavior, some models are more general than others. For example, some models are more general than others (the best examples are the linear and nonlinear models for which the most general features are known).

References<|endoftext|>
>END


## ROUGE score

In [3]:
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer

# Step 2: Prepare dataset (example prompts and references)
prompts = ["The capital of France is", "The largest mammal is"]
references = ["The capital of France is Paris.", "The largest mammal is the blue whale."]

# Step 3: Generate outputs
generated_texts = []
for prompt in prompts:
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = gpt2.generate(inputs, max_length=30, num_return_sequences=1)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    generated_texts.append(generated_text)

# Step 4: Compute BLEU and ROUGE scores
bleu_scores = []
rouge_scores = []

rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

for ref, gen in zip(references, generated_texts):
    # BLEU Score
    reference_tokens = ref.split()
    generated_tokens = gen.split()
    bleu = sentence_bleu([reference_tokens], generated_tokens)
    bleu_scores.append(bleu)

    # ROUGE Score
    rouge_score = rouge.score(ref, gen)
    rouge_scores.append(rouge_score)

# Step 5: Print evaluation results
for i, (bleu, rouge) in enumerate(zip(bleu_scores, rouge_scores)):
    print(f"Example {i + 1}:")
    print(f"Prompt: {prompts[i]}")
    print(f"Reference: {references[i]}")
    print(f"Generated: {generated_texts[i]}")
    print(f"BLEU Score: {bleu:.4f}")
    print(f"ROUGE Score: {rouge}")
    print("-" * 30)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Example 1:
Prompt: The capital of France is
Reference: The capital of France is Paris.
Generated: The capital of France is the capital of the French Republic, and the capital of the French Republic is the capital of the French Republic.

The
BLEU Score: 0.1352
ROUGE Score: {'rouge1': Score(precision=0.19230769230769232, recall=0.8333333333333334, fmeasure=0.3125), 'rouge2': Score(precision=0.16, recall=0.8, fmeasure=0.26666666666666666), 'rougeL': Score(precision=0.19230769230769232, recall=0.8333333333333334, fmeasure=0.3125)}
------------------------------
Example 2:
Prompt: The largest mammal is
Reference: The largest mammal is the blue whale.
Generated: The largest mammal is the elephant, which is about the size of a human. It is the largest mammal in the world. It is the largest mammal
BLEU Score: 0.1299
ROUGE Score: {'rouge1': Score(precision=0.18518518518518517, recall=0.7142857142857143, fmeasure=0.2941176470588235), 'rouge2': Score(precision=0.15384615384615385, recall=0.66666

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer

# Step 1: Load pre-trained model and tokenizer
print("> Getting model gpt2")
gpt2 = AutoModelForCausalLM.from_pretrained("gpt2")

print("> Getting tokenizer for gpt2")
tokenizer = AutoTokenizer.from_pretrained("gpt2")

# Step 2: Load the LOTR dataset
file_path = "lotr_dataset.csv"  # Path to your dataset
dataset = pd.read_csv(file_path)

prompts = dataset['prompt'].tolist()
references = dataset['reference'].tolist()

# Step 3: Generate outputs using the model
print("> Generating texts...")
generated_texts = []
i = 0
for prompt in prompts:
    print(f"Generating text {i + 1}/{len(prompts)}")
    i += 1
    inputs = tokenizer.encode(prompt, return_tensors="pt")
    outputs = gpt2.generate(inputs, max_new_tokens=30, num_return_sequences=1)
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    generated_texts.append(generated_text)

# Step 4: Compute BLEU and ROUGE scores
bleu_scores = []
rouge_scores = []

rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

print("> Evaluating...")
for ref, gen in zip(references, generated_texts):
    # BLEU Score
    reference_tokens = ref.split()
    generated_tokens = gen.split()
    bleu = sentence_bleu([reference_tokens], generated_tokens)
    bleu_scores.append(bleu)

    # ROUGE Score
    rouge_score = rouge.score(ref, gen)
    rouge_scores.append(rouge_score)

# Step 5: Print evaluation results
print("> Results:")
for i, (bleu, rouge) in enumerate(zip(bleu_scores, rouge_scores)):
    print(f"Example {i + 1}/{len(prompts)}")
    print(f"Prompt: {prompts[i]}")
    print(f"Reference: {references[i]}")
    print(f"Generated: {generated_texts[i]}")
    # print(f"BLEU Score: {bleu:.4f}")
    # print(f"ROUGE Score: {rouge}")
    print("-" * 30)

# Step 6: Save results to CSV
results = pd.DataFrame({
    "prompt": prompts,
    "reference": references,
    "generated": generated_texts,
    "bleu_score": bleu_scores,
    "rouge1_f1": [score['rouge1'].fmeasure for score in rouge_scores],
    "rouge2_f1": [score['rouge2'].fmeasure for score in rouge_scores],
    "rougeL_f1": [score['rougeL'].fmeasure for score in rouge_scores],
})

# output_path = "lotr_evaluation_results.csv"
# results.to_csv(output_path, index=False)
# print(f"> Results saved to {output_path}")

# Average BLEU and ROUGE scores
avg_bleu = sum(bleu_scores) / len(bleu_scores)
avg_rouge1 = sum(score['rouge1'].fmeasure for score in rouge_scores) / len(rouge_scores)
avg_rouge2 = sum(score['rouge2'].fmeasure for score in rouge_scores) / len(rouge_scores)
avg_rougeL = sum(score['rougeL'].fmeasure for score in rouge_scores) / len(rouge_scores)

print(f"> Average BLEU Score: {avg_bleu:.4f}")
print(f"> Average ROUGE-1 F1 Score: {avg_rouge1:.4f}")
print(f"> Average ROUGE-2 F1 Score: {avg_rouge2:.4f}")
print(f"> Average ROUGE-L F1 Score: {avg_rougeL:.4f}")
# > Average BLEU Score: 0.0774
# > Average ROUGE-1 F1 Score: 0.3271
# > Average ROUGE-2 F1 Score: 0.1292
# > Average ROUGE-L F1 Score: 0.2074

> Getting model gpt2
> Getting tokenizer for gpt2


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


> Generating texts...
Generating text 1/355
AAAA


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


BBBBB
Generating text 2/355
AAAA


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


BBBBB
Generating text 3/355
AAAA


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


BBBBB
Generating text 4/355
AAAA


KeyboardInterrupt: 

## BERT (Descriptive)

In [None]:
from transformers import BertModel, BertTokenizer
import torch

bert = BertModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Example input text
input_text1 = "Hello, how are you doing today?"
input_text2 = "Hello, you're doing well today"
input_text3 = "LLMs are sometimes very biased models."

# Tokenize the input text
inputs1 = tokenizer(input_text1, return_tensors='pt') #, padding='max_length', truncation=True, max_length=10)
inputs2 = tokenizer(input_text2, return_tensors='pt') #, padding='max_length', truncation=True, max_length=10)
inputs3 = tokenizer(input_text3, return_tensors='pt') #, padding='max_length', truncation=True, max_length=10)

# Forward pass through the model
outputs1 = bert(**inputs1)
outputs2 = bert(**inputs2)
outputs3 = bert(**inputs3)

# Extract activations from the last hidden state
last_hidden_state1 = outputs1.last_hidden_state
last_hidden_state2 = outputs2.last_hidden_state
last_hidden_state3 = outputs3.last_hidden_state

# Print the shape of the activations (batch_size, sequence_length, hidden_size)
print(f"Text 1: {input_text1}")
print(f"Text 2: {input_text2}")
print(f"Text 3: {input_text3}")
print("")
print(f"Tokens for 1: {inputs1.input_ids}")
print(f"Tokens for 2: {inputs2.input_ids}")
print(f"Tokens for 3: {inputs3.input_ids}")
print("")
print(f"Shape of bert output: {last_hidden_state1.shape}")
print(f"Shape of output: {last_hidden_state2.shape}")
print(f"Shape of output: {last_hidden_state3.shape}")
print("")
print(f"Norm between example 1 and 2: {torch.norm(last_hidden_state1 - last_hidden_state2)}")
print(f"Norm between example 1 and 3: {torch.norm(last_hidden_state1 - last_hidden_state3)}")
print(f"Norm between example 2 and 3: {torch.norm(last_hidden_state2 - last_hidden_state3)}")

# # Optionally, print the activations (this could be a large tensor)
# print(last_hidden_state1)
# print(last_hidden_state2)
# print(last_hidden_state3)


**As a classifier with an MLP**

In [None]:
import transformers
from torch import nn

class BERTClassification(nn.Module):
    def __init__ (self):
        super(BERTClassification, self).__init__()
        self.bert = transformers.BertModel.from_pretrained('bert-base-cased')
        self.bert_drop = nn.Dropout(0.4)
        self.out = nn.Linear(768, 1)

    def forward(self, ids, mask, token_type_ids):
        _, pooledOut = self.bert(ids, attention_mask = mask,
                                token_type_ids=token_type_ids)
        bertOut = self.bert_drop(pooledOut)
        output = self.out(bertOut)

        return output