In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import torch
import evaluate  




In [None]:
dataset = load_dataset("cnn_dailymail", "3.0.0", split="train[:1%]")  

model_name = "t5-small"
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [None]:

def preprocess_data(examples):
    inputs = [article for article in examples['article']]
    targets = [summary for summary in examples['highlights']]
    
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length", return_tensors="pt")
    labels = tokenizer(targets, max_length=150, truncation=True, padding="max_length", return_tensors="pt").input_ids
    labels[labels == tokenizer.pad_token_id] = -100  
    model_inputs['labels'] = labels
    return model_inputs

train_data = dataset.map(preprocess_data, batched=True)

In [None]:
metric = evaluate.load("rouge")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(logits, dim=-1)
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    return result

In [None]:
training_args = TrainingArguments(
    output_dir="./results",          
    evaluation_strategy="epoch",     
    learning_rate=5e-5,              
    per_device_train_batch_size=4,   
    per_device_eval_batch_size=8,    
    num_train_epochs=3,              
    weight_decay=0.01,               
    logging_dir="./logs",            
    logging_steps=10,                
    save_strategy="epoch"           
)



In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=train_data,  
    compute_metrics=compute_metrics,
)


In [None]:
trainer.train()


  0%|          | 0/2154 [00:00<?, ?it/s]

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


{'loss': 2.681, 'grad_norm': 4.6257853507995605, 'learning_rate': 4.976787372330548e-05, 'epoch': 0.01}
{'loss': 2.5356, 'grad_norm': 3.1615967750549316, 'learning_rate': 4.953574744661096e-05, 'epoch': 0.03}
{'loss': 2.4958, 'grad_norm': 3.9571962356567383, 'learning_rate': 4.930362116991643e-05, 'epoch': 0.04}
{'loss': 2.3863, 'grad_norm': 3.3057658672332764, 'learning_rate': 4.9071494893221914e-05, 'epoch': 0.06}
{'loss': 2.2961, 'grad_norm': 2.82592511177063, 'learning_rate': 4.883936861652739e-05, 'epoch': 0.07}
{'loss': 2.2758, 'grad_norm': 3.300835132598877, 'learning_rate': 4.860724233983287e-05, 'epoch': 0.08}
{'loss': 2.3751, 'grad_norm': 2.6961417198181152, 'learning_rate': 4.8375116063138345e-05, 'epoch': 0.1}
{'loss': 2.144, 'grad_norm': 3.0563158988952637, 'learning_rate': 4.8142989786443826e-05, 'epoch': 0.11}
{'loss': 2.3914, 'grad_norm': 3.7386505603790283, 'learning_rate': 4.79108635097493e-05, 'epoch': 0.13}
{'loss': 2.3225, 'grad_norm': 3.1724650859832764, 'learning

  0%|          | 0/359 [00:00<?, ?it/s]