In [1]:
import torch
import torch.nn as nn
from transformers import GPT2LMHeadModel, GPT2Tokenizer, AdamW, get_linear_schedule_with_warmup

## Loading the Model

In [2]:
# Loading the model
model_name = 'gpt2'
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


## Freezing Model Parameters

In [3]:
# freezing model parameters
for param in model.parameters():
    param.requires_grad = False

for param in model.parameters():
    param.data = param.data.to(torch.float32)

# Enable Gradient Checkpointing

In [4]:
# enable gradient checkpointing
model.gradient_checkpointing_enable()

model.transformer.wte.weight.requires_grad = True
model.transformer.wpe.weight.requires_grad = True

# Custom Output Casting

In [5]:
class CustomLMHead(nn.Module):
    def __init__(self, original_lm_head):
        super(CustomLMHead, self).__init__()
        self.original_lm_head = original_lm_head

    def forward(self, *args, **kwargs):
        output = self.original_lm_head(*args, **kwargs)
        return output.to(torch.float32)

# Replace the model’s lm head with an instance of this custom class
model.lm_head = CustomLMHead(model.lm_head)

# Helper Function

In [6]:
# Helper Function to Print Trainable Parameters
def print_trainable_parameters(model):
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Number of trainable parameters: {trainable_params}")

print_trainable_parameters(model)

Number of trainable parameters: 39383808


# Prompt Creation

In [7]:
# function to format context, question, and answer into a prompt template
def format_prompt(context, question, answer=None):
    if answer:
        return f"Context: {context}\nQuestion: {question}\nAnswer: {answer}"
    else:
        return f"Context: {context}\nQuestion: {question}\nAnswer:"

# Mock QA dataset
qa_dataset = [
    {"context": "The sky is blue.", "question": "What color is the sky?", "answer": "Blue"},
    {"context": "The cat is on the roof.", "question": "Where is the cat?", "answer": "On the roof"},
    {"context": "The car is red.", "question": "What color is the car?", "answer": "Red"},
    {"context": "The dog is brown.", "question": "What color is the dog?", "answer": "Brown"},
]

# Map the QA dataset to this prompt format using the tokenizer.
def map_qa_to_prompt(qa_dataset, tokenizer):
    prompts = []
    for qa in qa_dataset:
        prompt = format_prompt(qa['context'], qa['question'], qa['answer'])
        inputs = tokenizer(prompt, return_tensors='pt')
        prompts.append(inputs)
    return prompts

# Tokenize the QA dataset
tokenized_prompts = map_qa_to_prompt(qa_dataset, tokenizer)
for prompt in tokenized_prompts:
    print(prompt)

{'input_ids': tensor([[21947,    25,   383,  6766,   318,  4171,    13,   198, 24361,    25,
          1867,  3124,   318,   262,  6766,    30,   198, 33706,    25,  4518]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
{'input_ids': tensor([[21947,    25,   383,  3797,   318,   319,   262,  9753,    13,   198,
         24361,    25,  6350,   318,   262,  3797,    30,   198, 33706,    25,
          1550,   262,  9753]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
{'input_ids': tensor([[21947,    25,   383,  1097,   318,  2266,    13,   198, 24361,    25,
          1867,  3124,   318,   262,  1097,    30,   198, 33706,    25,  2297]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}
{'input_ids': tensor([[21947,    25,   383,  3290,   318,  7586,    13,   198, 24361,    25,
          1867,  3124,   318,   262,  3290,    30,   198, 33706,    25,  

# Training the Model

In [8]:
# Train the model
epochs = 3
learning_rate = 5e-5

optimizer = AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate)
total_steps = len(tokenized_prompts) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

model.train()
for epoch in range(epochs):
    for batch in tokenized_prompts:
        inputs = batch['input_ids']
        labels = batch['input_ids']
        outputs = model(inputs, labels=labels)
        loss = outputs.loss

        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

        print(f"Epoch {epoch+1}, Loss: {loss.item()}")

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Epoch 1, Loss: 2.798947811126709
Epoch 1, Loss: 2.9669504165649414
Epoch 1, Loss: 3.3174118995666504
Epoch 1, Loss: 3.0745296478271484
Epoch 2, Loss: 2.761045455932617
Epoch 2, Loss: 2.8329732418060303
Epoch 2, Loss: 3.0522823333740234
Epoch 2, Loss: 3.178790807723999
Epoch 3, Loss: 2.59621262550354
Epoch 3, Loss: 2.6281633377075195
Epoch 3, Loss: 3.1056389808654785
Epoch 3, Loss: 3.6082780361175537


# Loading LoRA Model

In [9]:
!pip install peft



In [10]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=16,
    lora_alpha=64,
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


# Inference Function

In [11]:
# inference function to generate answers based on a given context and question
def generate_answer(model, tokenizer, context, question):
    prompt = format_prompt(context, question)
    inputs = tokenizer(prompt, return_tensors='pt')
    with torch.no_grad():
        outputs = model.generate(inputs['input_ids'], max_length=50)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer

# Test Inference

In [12]:
# Test the inference function with sample contexts and questions.
sample_context = "The sky is blue."
sample_question = "What color is the sky?"
answer = generate_answer(model, tokenizer, sample_context, sample_question)
print(f"Context: {sample_context}\nQuestion: {sample_question}\nAnswer: {answer}")

sample_context = "The cat is on the roof."
sample_question = "Where is the cat?"
answer = generate_answer(model, tokenizer, sample_context, sample_question)
print(f"Context: {sample_context}\nQuestion: {sample_question}\nAnswer: {answer}")

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Context: The sky is blue.
Question: What color is the sky?
Answer: Context: The sky is blue.
Question: What color is the sky?
Answer: The sky is blue.
Question: What color is the sky?
Question: The sky is blue.
Question: What color is the sky?
Context: The cat is on the roof.
Question: Where is the cat?
Answer: Context: The cat is on the roof.
Question: Where is the cat?
Answer: The cat is on the roof.
Question: What is the cat?Answer: The cat is on the roof.
Question: What is the
