## Download the dependancies

In [None]:
!pip install datasets
!pip install transformers
!pip install accelerate -U
!pip install evaluate
!pip install torch torchvision torchaudio
!pip install scikit-learn -U
!pip install sentencepiece

## Load the dataset

In [None]:
from datasets import load_dataset

dataset = load_dataset("squad_v2")


## Load the model and the tokenizer

In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments

tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small", model_max_length=512)

## Run the tokenizer

In [None]:
def tokenize_example(example):

    model_input = tokenizer(f"question: {example['question']} context: {example['context']}",
                            padding="max_length", truncation=True, max_length=512)

    # Tokenize the answer
    with tokenizer.as_target_tokenizer():

        label = tokenizer(f"{example['answers']['text']}", padding="max_length", truncation=True, max_length=128)
    
    model_input["labels"] = label["input_ids"]
    return model_input

# test part of the dataset
train_dataset = dataset["train"].select(range(5000))
eval_dataset = dataset["validation"].select(range(5000))

tokenized_train_dataset = [tokenize_example(example) for example in train_dataset]
tokenized_eval_dataset = [tokenize_example(example) for example in eval_dataset]


## Set the training args and the transformer trainer

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch",fp16=True) #fp16 is set to True to use mixed precision training


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
)

# Train

In [None]:
trainer.train()

## Save the model to disk

In [None]:
model.save_pretrained("./t5-qa_v1")


## Run inference

In [None]:
context = "Rick and Morty is an American adult animated science fiction sitcom created by Justin Roiland and Dan Harmon for Cartoon Network's nighttime programming block Adult Swim. The series follows the misadventures of Rick Sanchez, a cynical mad scientist, and his good-hearted but fretful grandson Morty Smith, who split their time between domestic life and interdimensional adventures that take place across an infinite number of realities, often traveling to other planets and dimensions through portals and on Rick's flying saucer."
model = T5ForConditionalGeneration.from_pretrained("./t5-qa_v1")
tokenizer = T5Tokenizer.from_pretrained("t5-base", model_max_length=512)
input_ids = tokenizer("question : Who created Rick and Morty? context: " + context, return_tensors="pt").input_ids
outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
