In [None]:
# Downloading required libraries
!pip install -q transformers datasets sentencepiece

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m39.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.7/468.7 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m34.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m132.9/132.9 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Deleting the cache content of the transformer namespace
!rm -rf $HOME/.cache/huggingface/
!mkdir $HOME/.cache/huggingface/

In [None]:
# Load the required libraries
# from transformers import T5ForConditionalGeneration, T5Tokenizer
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import os

In [None]:
# Fine-tuning function
def fine_tune_t5(model_dir, num_iterations=5):
    # Load tokenizer and model
    tokenizer = T5Tokenizer.from_pretrained(model_dir)
    model = T5ForConditionalGeneration.from_pretrained(model_dir)

    # Set up optimizer and loss function
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Fine-tune model for num_iterations
    for i in range(num_iterations):
        # Get context and question from user
        context = input("Enter context: ")
        question = input("Enter question: ")

        input_text = f'generate question: "{context}"'
        output_text = f'{question}'

         # Tokenize inputs
        input_ids = tokenizer.encode(input_text, return_tensors="pt", padding=True, truncation=True)
        target_ids = tokenizer.encode(output_text, return_tensors="pt", padding=True, truncation=True)

        # Forward pass
        outputs = model(input_ids=input_ids, labels=target_ids)
        loss = criterion(outputs.logits.view(-1, outputs.logits.shape[-1]), target_ids.view(-1))

        # Backward pass and optimization step
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # Print loss after each iteration
        print(f"Iteration {i+1}: Loss={loss.item()}")

        # Save model and tokenizer
        output_dir = os.path.join(os.path.dirname(model_dir), "fine_tuned_models", f"iteration_{i+1}")
        os.makedirs(output_dir, exist_ok=True)
        model.save_pretrained(output_dir)
        tokenizer.save_pretrained(output_dir)
        print(f"Model and tokenizer saved after iteration {i+1}")


In [None]:
fine_tune_t5("/content/fine_tuned_model", 1)

Enter context: Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".
Enter question: When did Beyonce start becoming popular?
Iteration 1: Loss=4.588083744049072
Model and tokenizer saved after iteration 1


In [None]:
# Testing Model - Local
# Set model to eval mode for inference
model.eval()
with torch.no_grad():
    # Get context from user
    context = input("Enter context: ")
    input_ids = tokenizer.encode(context, return_tensors="pt", padding=True, truncation=True)

    # Generate question
    output_ids = model.generate(input_ids, max_length=50, num_beams=5, early_stopping=True)
    question = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Print generated question
    print(f"Generated question: {question}")

Enter context: Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ bee-YON-say) (born September 4, 1981) is an American singer, songwriter, record producer and actress. Born and raised in Houston, Texas, she performed in various singing and dancing competitions as a child, and rose to fame in the late 1990s as lead singer of R&B girl-group Destiny's Child. Managed by her father, Mathew Knowles, the group became one of the world's best-selling girl groups of all time. Their hiatus saw the release of Beyoncé's debut album, Dangerously in Love (2003), which established her as a solo artist worldwide, earned five Grammy Awards and featured the Billboard Hot 100 number-one singles "Crazy in Love" and "Baby Boy".
Generated question: Who became popular?
