In [None]:
# Load the required libraries
from transformers import T5ForConditionalGeneration, T5Tokenizer
from datasets import load_dataset
import torch

# Load the pre-trained T5 model and tokenizer
model = T5ForConditionalGeneration.from_pretrained('t5-base')
tokenizer = T5Tokenizer.from_pretrained('t5-base')

# Load the SQuAD v2 dataset
dataset = load_dataset('squad_v2')

# Extract the passages and answers from the dataset
passages = dataset['train']['context']
answers = dataset['train']['answers']['text']

# Prepare the input and output for the T5 model
inputs = []
outputs = []
for i in range(len(passages)):
    input_text = f'generate questions: "{passages[i]}" answer: "{answers[i]}"'
    output_text = f'{dataset["train"][i]["question"]}'
    inputs.append(input_text)
    outputs.append(output_text)

# Tokenize the input and output using the T5 tokenizer
input_ids = tokenizer(inputs, padding=True, truncation=True, return_tensors='pt')
output_ids = tokenizer(outputs, padding=True, truncation=True, return_tensors='pt')

# Fine-tune the T5 model on the SQuAD v2 dataset
model.train()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
for epoch in range(3):
    for i in range(len(inputs)):
        input_ids_batch = input_ids[i].unsqueeze(0)
        output_ids_batch = output_ids[i].unsqueeze(0)
        loss = model(input_ids_batch, labels=output_ids_batch).loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

# Generate questions based on a given passage
sample_passage = 'This is a sample passage.'
sample_answer = 'Sample answer.'
input_text = f'generate questions: "{sample_passage}" answer: "{sample_answer}"'
input_ids = tokenizer.encode(input_text, return_tensors='pt')
output = model.generate(input_ids=input_ids, max_length=64, num_beams=4, no_repeat_ngram_size=2, early_stopping=True)
output_text = tokenizer.decode(output[0], skip_special_tokens=True)

# Print the generated questions
print(output_text)
