In [1]:
import pandas as pd
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
from tqdm import tqdm

In [2]:
# Load model + tokenizer
model_path = "./models/t5_task_a_model"
tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

T5ForConditionalGeneration(
  (shared): Embedding(32128, 768)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 768)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=768, out_features=768, bias=False)
              (k): Linear(in_features=768, out_features=768, bias=False)
              (v): Linear(in_features=768, out_features=768, bias=False)
              (o): Linear(in_features=768, out_features=768, bias=False)
              (relative_attention_bias): Embedding(32, 12)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=768, out_features=3072, bias=False)
              (wo): Linear(in_features=3072, out_features=768, bias=False)
              (dropout): Dro

In [3]:
# Load test set (you can also test testset-2 later)
test_df = pd.read_csv("../dataset/task_a/MTS-Dialog-TestSet-1-MEDIQA-Chat-2023.csv")
test_df["input_text"] = "Dialogue: " + test_df["dialogue"].str.replace(r"\s+", " ", regex=True).str.strip()

In [4]:
# Generate predictions
section_headers = []
summaries = []

In [5]:
model.eval()
for text in tqdm(test_df["input_text"], desc="Generating..."):
    input_ids = tokenizer(text, return_tensors="pt", truncation=True, max_length=512).input_ids.to(device)
    output = model.generate(input_ids, max_length=128, num_beams=4)
    decoded = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract Section + Summary
    if decoded.lower().startswith("section:"):
        try:
            section = decoded.split("Summary:")[0].replace("Section:", "").strip()
            summary = decoded.split("Summary:")[1].strip()
        except:
            section, summary = "UNKNOWN", decoded.strip()
    else:
        section, summary = "UNKNOWN", decoded.strip()

    section_headers.append(section)
    summaries.append(summary)

Generating...: 100%|██████████| 200/200 [06:18<00:00,  1.89s/it]


In [6]:
# Final submission format
submission_df = pd.DataFrame({
    "TestID": test_df["ID"],
    "SystemOutput1": section_headers,
    "SystemOutput2": summaries
})

In [11]:
# Save output
output_path = "../outputs/taskA_run1.csv"
submission_df.to_csv(output_path, index=False)
print("Saved:", output_path)

Saved: ../outputs/taskA_run1.csv
