In [None]:
from transformers import T5ForConditionalGeneration, T5Tokenizer, Seq2SeqTrainer, Seq2SeqTrainingArguments, DataCollatorForSeq2Seq
from datasets import load_dataset, Dataset
import pandas as pd

# === Load Enhanced Dataset ===
df = pd.read_csv("/content/finetune_t5_medical_reports_cleaned_symbol.csv")
dataset = Dataset.from_pandas(df)

# === Load previous fine-tuned model ===
#model_path = "/content/T5_medical_finetuned"
#model = T5ForConditionalGeneration.from_pretrained(model_path)
#tokenizer = T5Tokenizer.from_pretrained(model_path)
# 2. Tokenizer and Model Setup
tokenizer = T5Tokenizer.from_pretrained("t5-base")
model = T5ForConditionalGeneration.from_pretrained("t5-base")

# === Preprocess Function ===
def preprocess(example):
    input_text = str(example['input_text'])  # ensure string
    target_text = str(example['target_text'])

    model_input = tokenizer(
        text=input_text,
        truncation=True,
        padding="max_length",
        max_length=512
    )

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            text=target_text,
            truncation=True,
            padding="max_length",
            max_length=200
        )

    model_input["labels"] = labels["input_ids"]
    return model_input


# === Tokenize Dataset ===
tokenized_dataset = dataset.map(preprocess, remove_columns=dataset.column_names)

# === Define Training Arguments ===
training_args = Seq2SeqTrainingArguments(
    output_dir="t5_medical_finetuned_v2",
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    num_train_epochs=5,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir="logs",
    save_strategy="epoch",
    report_to="none"
)

# === Trainer Setup ===
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    processing_class=tokenizer,
    data_collator=data_collator
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/853 [00:00<?, ? examples/s]



In [None]:
# === Start Training ===
trainer.train()

Step,Training Loss
500,0.7885
1000,0.0747


TrainOutput(global_step=1070, training_loss=0.40785695325548404, metrics={'train_runtime': 1059.757, 'train_samples_per_second': 4.025, 'train_steps_per_second': 1.01, 'total_flos': 2597205403238400.0, 'train_loss': 0.40785695325548404, 'epoch': 5.0})

In [None]:
# === Save the Updated Model ===
trainer.model.save_pretrained("t5_medical_finetuned_v2")
tokenizer.save_pretrained("t5_medical_finetuned_v2")

('t5_medical_finetuned_v2/tokenizer_config.json',
 't5_medical_finetuned_v2/special_tokens_map.json',
 't5_medical_finetuned_v2/spiece.model',
 't5_medical_finetuned_v2/added_tokens.json')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip /content/drive/MyDrive/t5_medical_finetuned_v3.zip

Archive:  /content/drive/MyDrive/t5_medical_finetuned_v3.zip
   creating: t5_medical_finetuned_v2/
  inflating: t5_medical_finetuned_v2/added_tokens.json  
  inflating: t5_medical_finetuned_v2/config.json  
  inflating: t5_medical_finetuned_v2/generation_config.json  
  inflating: t5_medical_finetuned_v2/model.safetensors  
  inflating: t5_medical_finetuned_v2/special_tokens_map.json  
  inflating: t5_medical_finetuned_v2/spiece.model  
  inflating: t5_medical_finetuned_v2/tokenizer_config.json  


In [None]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.5-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.5


In [None]:
pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=af594428a57146ff14f920e00fb5652f7685d2ebbb4c1902558f685e5f855354
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
import pandas as pd
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from tqdm import tqdm
import nltk
nltk.download('punkt')

# === Load fine-tuned model and tokenizer ===
model_path = "/content/t5_medical_finetuned_v2"  # your folder
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path).cuda()
model.eval()

# === Load your CSV dataset ===
df = pd.read_csv("/content/finetune_t5_medical_reports_cleaned_symbol.csv")
df = df.sample(n=100, random_state=42).reset_index(drop=True)  # fast evaluation on 100 samples

# === Handle non-string values in 'input_text' and 'target_text' ===
df['input_text'] = df['input_text'].fillna('')
df['target_text'] = df['target_text'].fillna('')


# === Tokenize all inputs ===
inputs = tokenizer(
    ["generate report: " + t for t in df['input_text'].tolist()],
    padding=True, truncation=True, return_tensors="pt"
)
input_ids = inputs['input_ids'].cuda()
attention_mask = inputs['attention_mask'].cuda()

# === Generate predictions in batch ===
batch_size = 8
predictions = []

for i in tqdm(range(0, len(input_ids), batch_size), desc="Generating reports"):
    batch_ids = input_ids[i:i+batch_size]
    batch_mask = attention_mask[i:i+batch_size]

    with torch.no_grad():
        output_ids = model.generate(
            input_ids=batch_ids,
            attention_mask=batch_mask,
            max_new_tokens=64,
            num_beams=2,
            early_stopping=True
        )
    decoded_preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
    predictions.extend(decoded_preds)

df['generated'] = predictions

# === Evaluation ===
smoothie = SmoothingFunction().method4
rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

bleu_scores, rouge1_list, rouge2_list, rougel_list = [], [], [], []

for ref, pred in zip(df['target_text'], df['generated']):
    ref_tokens = nltk.word_tokenize(ref.lower())
    pred_tokens = nltk.word_tokenize(pred.lower())

    # BLEU
    bleu = sentence_bleu([ref_tokens], pred_tokens, smoothing_function=smoothie)
    bleu_scores.append(bleu)

    # ROUGE
    scores = rouge.score(ref, pred)
    rouge1_list.append(scores["rouge1"].fmeasure)
    rouge2_list.append(scores["rouge2"].fmeasure)
    rougel_list.append(scores["rougeL"].fmeasure)

# === Print Results ===
print("\n🧪 Evaluation on 100 samples:")
print(f"Average BLEU Score  : {sum(bleu_scores)/len(bleu_scores):.4f}")
print(f"Average ROUGE-1     : {sum(rouge1_list)/len(rouge1_list):.4f}")
print(f"Average ROUGE-2     : {sum(rouge2_list)/len(rouge2_list):.4f}")
print(f"Average ROUGE-L     : {sum(rougel_list)/len(rougel_list):.4f}")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Generating reports: 100%|██████████| 13/13 [00:36<00:00,  2.77s/it]



🧪 Evaluation on 100 samples:
Average BLEU Score  : 0.0520
Average ROUGE-1     : 0.2782
Average ROUGE-2     : 0.2699
Average ROUGE-L     : 0.2782
