💻 Check CUDA availability

In [1]:
import torch

In [2]:
if torch.cuda.is_available():
    print(f"✅ CUDA is available! GPU: {torch.cuda.get_device_name(0)}")
else:
    print("❌ CUDA is NOT available. Using CPU instead.")

❌ CUDA is NOT available. Using CPU instead.


### 1) Load the dataset (CNN/DailyMail)

In [3]:
from datasets import load_dataset

In [4]:
raw_datasets = load_dataset("cnn_dailymail", "3.0.0")   # train/validation/test splits
raw_datasets

DatasetDict({
    train: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 287113
    })
    validation: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 13368
    })
    test: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 11490
    })
})

In [5]:
# Let's keep only the first 100 samples from each split
small_train_dataset = raw_datasets["train"].select(range(100))
small_val_dataset = raw_datasets["validation"].select(range(20))
small_test_dataset = raw_datasets["test"].select(range(20))

print(small_train_dataset[0])

{'article': 'LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK box office char

### Load T5-small

In [6]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [7]:
model_checkpoint = "t5-small"  # light and CPU-friendly
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

### Tokenize Dataset

In [8]:
max_input_length = 512   # T5-small limit for inputs
max_target_length = 128  # reasonable summary length

In [9]:
def preprocess_function(examples):
    inputs = ["summarize: " + doc for doc in examples["article"]]
    model_inputs = tokenizer(
        inputs, max_length=max_input_length, truncation=True
    )
    
    # Tokenize targets (highlights)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(
            examples["highlights"], max_length=max_target_length, truncation=True
        )
    
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [10]:
tokenized_train = small_train_dataset.map(preprocess_function, batched=True)
tokenized_val = small_val_dataset.map(preprocess_function, batched=True)

### Run Inference (Generate Summaries)

In [12]:
# from transformers import pipeline

In [13]:
# summarizer = pipeline(
#     "summarization",
#     model=model,
#     tokenizer=tokenizer
# )

In [14]:
# for i in range(3):
#     article_text = small_val_dataset[i]["article"]
#     summary = summarizer(
#         "summarize: " + article_text,
#         max_length=128,
#         min_length=30,
#         do_sample=False
#     )[0]["summary_text"]
    
#     print(f"\n🔹 Original:\n{article_text[:300]}...\n")
#     print(f"✂️ Summary:\n{summary}\n")

In [15]:
def summarize_text(text, max_input_length=512, max_new_tokens=64):
    # prepend T5 prompt
    input_str = "summarize: " + text
    # tokenize & truncate to model max
    inputs = tokenizer(
        input_str,
        return_tensors="pt",
        max_length=max_input_length,
        truncation=True,
        padding="longest"
    )
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    # generate (CPU)
    with torch.no_grad():
        generated_ids = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=max_new_tokens,
            num_beams=4,
            no_repeat_ngram_size=3,
            length_penalty=2.0,
            early_stopping=True
        )

    return tokenizer.decode(generated_ids[0], skip_special_tokens=True)


In [16]:
for i in range(3):
    print("----- SAMPLE", i, "-----\n")
    print(summarize_text(small_val_dataset[i]["article"], max_input_length=512, max_new_tokens=64))
    print("\n")

----- SAMPLE 0 -----

the power that multiplied Zully Broussard's gift was data processing of genetic profiles from donor-recipient pairs. it is taking five surgeons, a covey of physician assistants, nurses and anesthesiologists to perform surgeries on 12 people. the


----- SAMPLE 1 -----

a new domestic television and media rights deal with MLS is worth $700 million over eight years. a further four are set to be added by 2020. the new season is the first of a domestic tv and media deal with FOX, ESPN and Univision.


----- SAMPLE 2 -----

bafetimbi Gomis is now "feeling well" after collapsing during Swansea's 3-2 defeat at white hart lane. the 29-year-old has a history of fainting. he spent the night in hospital as a precaution.




In [11]:
print(tokenized_train[0])

{'article': 'LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK box office char