In [None]:
import torch
import random
import evaluate
import datasets
import numpy as np
import pandas as pd
import transformers
from datasets import load_dataset
from IPython.display import display, HTML
from transformers import AutoTokenizer, DataCollatorForSeq2Seq, AutoModelForSeq2SeqLM, Seq2SeqTrainingArguments, Seq2SeqTrainer

In [3]:
# Get dataset
dataset = load_dataset("cnn_dailymail")

In [4]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'article', 'highlights'],
        num_rows: 287113
    })
    validation: Dataset({
        features: ['id', 'article', 'highlights'],
        num_rows: 13368
    })
    test: Dataset({
        features: ['id', 'article', 'highlights'],
        num_rows: 11490
    })
})

In [5]:
# Show some examples picked randomly in the dataset
def show_random_elements(dataset, num_examples=5):
    # Ensure that num_examples is not greater than the number of examples in the dataset
    assert num_examples <= len(dataset), "Can't pick more elements than there are in the dataset."
    
    # Initialize an empty list to store the randomly picked indices
    picks = []
    
    # Loop to randomly pick num_examples elements from the dataset
    for _ in range(num_examples):
        # Randomly select an index within the range of the dataset
        pick = random.randint(0, len(dataset)-1)
        
        # Check if the index has already been picked to avoid duplication
        while pick in picks:
            pick = random.randint(0, len(dataset)-1)
        
        # Add the picked index to the list of picks
        picks.append(pick)
    
    # Create a DataFrame containing the randomly picked examples from the dataset
    df = pd.DataFrame(dataset[picks])
    
    # Transform class label indices into their corresponding names (if applicable)
    for column, typ in dataset.features.items():
        if isinstance(typ, datasets.ClassLabel):
            df[column] = df[column].transform(lambda i: typ.names[i])
    
    # Display the DataFrame containing the random examples
    display(HTML(df.to_html()))

In [6]:
show_random_elements(dataset["train"])

Unnamed: 0,id,article,highlights
0,1e69daf17060e6fcfc5d084199651cabfcc40596,"By . Daily Mail Reporters . Android users beware: Heartbleed has been causing panic across users of popular internet sites and now millions of Android device users have been revealed to be at risk . Millions of Android smartphone and tablet users are vulnerable to the Heartbleed security flaw despite Google's insistence to the contrary, researchers reveal. The tech giant assured panicked users last week that their mobile platform was safe save for a 'limited exception.' It's now been revealed that the exception refers to millions of devices running a 2012 version of the operating system. Security experts say the vulnerable software can be found on popular phone and tablet brands including Samsung, HTC and others, reports Bloomberg. More than 900 million Android devices have been purchased worldwide. Of those, Bloomberg reports that 34 percent run some version of the outdated software. While a fix for computers and websites was readily available after news of Heartbleed sent internet users into a panic last week, the same cannot be said for Android. While a patch is available for devices with the flaw, wireless carriers and handset makers must now provide updates and the process is long. 'One of the major issues with Android is the update cycle is really long,' Michael Shaulov, CEO of mobile security company Lacoon Security Ltd. told Bloomberg. 'The device manufacturers and the carriers need to do something with the patch, and that’s usually a really long process.' Panic was triggered last week when it emerged that tens of millions of websites around the world did not have the security promised. The popular blogging website Tumblr, which is owned by Yahoo!, urged its users to change all their passwords, especially those protecting sensitive data like email and bank accounts, immediately. Internet security companies have painted a doomsday scenario of customer web accounts being hijacked by criminals or even government spies. Global web giants were given advanced warning of the need to improve security, which allowed them to make sure their sites safe. Hacked? Internet security companies had warned of accounts being hacked by criminals or government spies and despite Google's assurances to the contrary, it appears many millions of Android users are vulnerable . However, millions of other firms have not had the time – or expertise - to ensure their sites are protected, leaving consumers in the dark about whether they are safe to use. These firms will have no choice but to bring in outside security consultants at great expense to protect their websites and customers. The Heartbleed bug bypasses the encryption – called OpenSSL - that normally protects data as it is sent between computers and servers, leaving personal and sensitive data vulnerable. The problem has existed for at least two years, however details were only made public this week by Google and a small company from Finland, called Codenomicon. The fact the loophole has existed for so long is a huge blow to the credibility of websites and consumer trust in the internet. However, there is no evidence to date that any malicious hackers have taken advantage of it to grab information. In a blog post, Google said it has applied a security patch to defeat the flaw on its search engine, Gmail, YouTube, Wallet and Play store for mobile apps and other digital content. A spokesman said: ‘It’s good practice to use strong passwords that are unique to each of your accounts, but we don’t think our users need to change their Google Account passwords because of this bug.’ Amazon said simply: ‘Amazon.com is not affected.’ Facebook, along with a number of big name websites, was given advance warning that details of the security flaw were about to be made public. It appears its systems have been vulnerable for some time, however it has also now installed a security patch. A spokesman said: ‘We added protections for Facebook’s implementations of OpenSSL before this issue was publicly disclosed, and we haven’t detected any signs of suspicious activity on people’s accounts. 'We’re continuing to monitor the situation closely.’ The banks also tried to reassure customers. The British Bankers Association said: ‘If customers are worried about the security of their online account they should contact their bank to discuss those concerns directly. ‘If they are proven to have been the victim of a fraudulent online transaction then their bank will make sure they are granted a full refund of any money that has been stolen.’ By contrast, the British Retail Consortium was unable to offer any comment on the safety of online stores. Paypal, which is part of eBay, and responsible for handling millions of electronic payments every day insisted its system remains secure. Its chief technical officer, John Barrese, issued a statement saying customer account details were not exposed and there is no need for people to change their password. However, the problem is not resolved by simply improving site security. Experts at the American security and training company, the SANS Institute, suggested that the software that runs smartphones, tablets and laptops could have the same flaw. Spokesman Jake Williams said a malicious server could easily send a message to vulnerable software on phones, laptops, PCs, home routers and other devices, and retrieve a 64KB block of sensitive data from the targeted system. That would gather keystrokes which could, in theory, provide log-in and password details for internet banks and other sites. Mr Williams criticised companies with websites that are vulnerable to the Heartbeat bug for failing to admit the problem to consumers. ‘Too many vendors not communicating with their customers,’ he said. He suggested it could take the industry until 2020 and beyond to eliminate the problem.","A 2012 version of Google's mobile operating systems is susceptible to the glitch, which allows hackers to steal passwords .\nThe flaw was revealed last week but Google said all Android devices were immune save for a 'limited exception'\nResearchers now say the 'exception' covers millions of devices that still run the old software and a solution won't come easily ."
1,e824e75c97ef075a3223aba03d1aacfc34364116,"During a trip to the mostly Muslim nation of Albania, Pope Francis rebuked militants who act in the name of religion, saying no one can act as the ""armor of God."" The Pope lauded Albanians during a visit to the capital of Tirana, calling the Balkan nation one that proves ""a peaceful and fruitful coexistence between persons and communities of believers of different religions is not only desirable, but possible and realistic."" The pontiff didn't refer to any militant group by name. He denounced those who have perverted religious spirit and who are engaged in violence that prevents harmony between people of different faiths. Last month, he denounced ISIS, the Islamist militant group seeking to establish a caliphate in the Middle East. He called ISIS an ""unjust aggressor"" On Sunday, he again spoke harshly against religious warriors. ""Let no one consider themselves to be the 'armor' of God while planning and carrying out acts of violence and oppression,"" Pope Francis told diplomats at the presidential palace. ""May no one use religion as a pretext for actions against human dignity and against the fundamental rights of every man and woman, above all, the right to life and the right of everyone to religious freedom."" The pontiff pointed to the religious peace in Albania, a country relatively new to the concept of religious freedom. The population is 56% Muslim, 10% Catholic and 7% Orthodox, according to the CIA Factbook. The climate of respect and trust between those groups is a ""precious gift,"" the pontiff said. It has been two decades since a Pope last visited Albania, which is a short distance from Italy, just across the Adriatic Sea. The day-long visit is the first for Francis to another European country since he became Pope. On his way to the palace, he rode through the streets past multitudes of cheering fans and onlookers in an open Pope mobile. Later the Pope celebrated Mass on Mother Teresa Square. Many see Pope Francis' visit as support for the country's efforts to grow closer to the West and its wish to join the European Union. Albania was a Soviet satellite state before the fall of European communism at the end of the Cold War. It has been a democratic republic for 24 years.","No one can act as the ""armor of God,"" Pope Francis says .\nPontiff says Albania is an example of how people of different religions can live together .\nFrancis doesn't mention any militant group by name .\nReligious freedom is relatively new in the country with a large Muslim population ."
2,11087ed3a162e37653b9673566429004bfcebd6f,"(CNN) -- From Texas to Tennessee, there's not much relief in sight for people smacked by a frigid winter storm that killed at least four, shut down schools, backed up interstates and left hundreds of thousands without power. The National Weather Service predicted more sleet and subfreezing temperatures in Dallas and Memphis until Sunday and in Little Rock, Arkansas, until Monday. Other parts of the country braced for bad weather. The storm moved east Friday night, with snow expected to accumulate across the central Appalachians through central New England into early Saturday morning. Snow or sleet was expected to hit Washington, D.C., on Sunday. Dallas/Fort Worth took the hardest hit. It was colder in the Big D (26 degrees) than in Anchorage, Alaska (34 degrees), prompting the cancellation of the Dallas Marathon and spurring Dallas/Fort Worth International Airport to cancel almost 700 flights, about 80% of those scheduled. About 200,000 customers lost electricity, and a local official urged people to offer shelter to those without power. Southbound Interstate 35 was shut down and traffic backed up for six miles, CNN affiliate WFAA in Dallas reported. The temperature swings were startling. Hot Springs, Arkansas, experienced a record high of 75 on Wednesday. By Friday morning, the city was in the middle of an ice storm. The storm caught a lot of Texans by surprise. Debra Knight, a CNN iReporter, said she ran her defroster 20 minutes but couldn't get rid of the thick layer of ice on top of her car in Dallas. ""This is Texas. We don't have snow tools!"" said her husband, Bobby. Storm deaths . In some cases, the weather has been deadly. In Texas, a passenger was killed Thursday in Hockley County when a vehicle lost control and crashed into another car, Sgt. Bryan Witt of the Texas Department of Public Safety said. An Arkansas man was killed late Thursday when a tree fell on his camper in Pope County, Tommy Jackson, an Arkansas Department of Emergency Management spokesman, said Friday. Jackson said the death was related to the weather but couldn't provide details about the weather at the time. Rain and freezing rain were reported in the area Thursday night. Highway Patrol officials in Oklahoma blamed at least one death, in Muskogee, on the weather. The agency initially said weather had caused another death, that of a 24-year-old man in Owasso, but later said this fatality ""is still under investigation and may not be winter storm related."" No snow had fallen in New Mexico, but drivers dealt with snow and ice. One person died in a semi crash near Clines Corners. About 116 storm-related injuries have been reported in Oklahoma, including 48 falls, the state health department said. When not causing harm, the storm struck fear in residents. In Dallas, iReporter Earl Wallace IV said he when he heard a loud crack and a rush of wind and ran out to find a large tree had fallen on two nearby homes. ""One of the homeowners emerged crying and talking on the phone,"" said the 32-year-old creative director. ""The tree had crashed down into her dining room. I felt nervous, shocked and scared for the families inside."" Two homes were damaged -- one with a giant hole in the roof -- but no one was injured. WFAA in Dallas reported that a portion of the Pier 121 Marina in Lewisville collapsed on top of boats. Two people were taken to hospitals when a carport collapsed in Fort Worth. Getting ready . In Tennessee, Memphis Light, Gas and Water has 426,000 customers and is preparing for the worst. As of early Friday afternoon, about 5,000 customers were without power. ""MLGW employees have been monitoring this winter storm situation, and we have all of our resources in place should the winter weather hit Memphis and Shelby County,"" said Jerry Collins Jr., the company's president and CEO. ""If indeed a significant storm blankets our city, we are ready to respond."" Memphis called off the annual St. Jude Memphis Marathon, which was scheduled for Saturday. The ice also makes travel messy. Road crews in Memphis are ready to throw down 4,000 tons of sand to give drivers traction, CNN affiliate WMC reported. The governors of Tennessee and Arkansas declared states of emergency ahead of the worst of the storm. ""The most unsettling aspect about Arkansas' weather for most of us is its looming uncertainty,"" Gov. Mike Beebe said. ""During severe weather season, we know when conditions are ripe for tornadoes but never exactly where and when they could strike. In winter, that uncertainty takes a different form but can still create widespread anxiety,"" he said. ""Often, only a few degrees above or below the freezing mark can make the difference between a cold rain, a blanket of snow, an ice storm or a mixture of all of the above."" In the Dallas-Fort Worth area, roads were passable overnight, but it was a fine line as temperatures slipped below freezing. The slushy mess slowly turned into crunchy, bumpy ice. Police urged caution for anyone on the road and reported 21 major car accidents and 32 minor ones Friday. According to energy provider Oncor, more than 200,000 customers were without power in the Dallas-Fort Worth metro area as of Friday morning. This is the largest concentration of outages, with scattered outages elsewhere in the state. Nearly 30,000 were without power in Arkansas, energy companies reported. The weather brought traffic at Dallas/Forth Worth International Airport almost to a standstill. Hundreds of flights were canceled or delayed, prompting Southwest Airlines and American Airlines to urge travelers to consider rebooking their flights. A dark cloud over Dallas events . The National Weather Service predicts a wintry mix of precipitation through Sunday. That forecast prompted the cancellation of a downtown Dallas holiday parade scheduled for Saturday for the first time in 26 years. The city also called off its annual marathon slated for Sunday and the accompanying Health & Fitness Expo scheduled for Friday and Saturday. ""We regret that the race will not go on as planned, but are confident this decision is in the best interest of our runners, volunteers, spectators and the general public,"" A Dallas Marathon statement said. While the focus Friday was on the ice storm stretching from Dallas to Memphis, Mother Nature looked ready to throw another punch. A wintry mix was forecast for Washington beginning about noon Sunday. The National Weather Service said it has the potential to be an ice storm from late Sunday afternoon through the evening hours, but temperatures are expected to rise above freezing by rush hour Monday. A new storm entered the West, bringing snow to the Pacific Coast on Friday. The storm brought significant snowfall to Portland, Oregon, and there is a chance for rare snow in Las Vegas on Saturday. This storm will bring another round of wintry weather to the East by late in the weekend. CNN's Jason Morris, Ralph Ellis and Dave Hennen contributed to this report.","Subfreezing temperatures predicted Saturday in Dallas, Memphis, Little Rock .\nAbout 200,000 customers lose power in Dallas/Fort Worth, 30,000 in Arkansas .\nDallas and Memphis marathons called off .\nWeather-related deaths reported in Texas, Arkansas and Oklahoma ."
3,3902cf8135037480ec4705cfb44be2332833ee05,"London (CNN) -- When Andy Murray won the Brisbane International, a warmup event for January's Australian Open, few were surprised. But what followed was largely out of character for a man who is perceived as one of the more dour characters in the world of sport. After winning the final, Murray turned towards the television cameras and showed a side of himself that had so rarely been seen. ""I'd like to dedicate this victory to one of my best friends,"" the British tennis star told the crowd. ""He's back home watching and you're going to get through."" Thousands of miles away in London, Murray's former roommate Ross Hutchins sat facing the prospect of six months of grueling chemotherapy after being diagnosed with Hodgkin's lymphoma -- a cancer of the lymph node immune system. Friends since their early years and former doubles partners, the two were inseparable on and off the court, with both taking time to tease one another about their receding hairlines. But not even Hutchins, who has seen a side of Murray that few others have caught a glimpse of, expected such a gesture. ""I didn't expect the speech, that's for sure,"" the Englishman told CNN's Open Court. ""I just expected him to, well I was hoping he would win the title ... we had been very close that week as we always are. ""So I was watching the speech and was thinking how pleased I was he had won, and then he came and dedicated his trophy, which meant the world to me. ""It's something which lifted me up and it meant a lot because it was a big stage leading into a grand slam. ""My fiancee cried and she doesn't cry that much. It was a very special moment for us, it was something we shared together and it was something we'll never forget."" It was a rare moment of emotion from Murray, who has often been derided for his downbeat personality. The tears which followed his defeat by Roger Federer in last year's Wimbledon final finally allowed the public a glimpse of what lay behind a perceived deadpan exterior. His victory on the same court at the London Olympics was then followed by his first grand slam triumph at the U.S. Open last September -- a day that Hutchins will never forget. Having been two sets ahead of world No.1 Novak Djokovic in the final at Flushing Meadows, Murray allowed his rival to fight back and move level before triumphing in a pulsating final set. ""He was so determined to win that match, I don't think we can ever appreciate what was going through his head,"" said Hutchins. ""Everyone in the world that had watched him over the last final finals was thinking, 'Oh it's going to happen again.' ""But he wouldn't let it and it was a joy to watch. It was an incredible moment when he picked up the trophy."" A friend in need . While Murray had triumphed and defeated his demons, Hutchins was just beginning his own personal battle that same month. Searing back pains prevented him from sleeping for more than two hours, and left him in absolute agony. So severe was his sleep deprivation that Hutchins tried everything from laying out on the wooden floor in his bathroom, to sleeping on a foam roller covered with tennis balls. It was only after speaking to a coach at a training camp in La Manga, Spain, that Hutchins began to realize the severity of the situation. After initial tests showed pneumonia in his left lung, Hutchins sought further medical advice about an enlarged lymph node in his chest, which turned out to be cancerous. Hodgkin's lymphoma is a cancer of the white blood cell found in the lymphatic system, which is a network of vessels and glands spread throughout the body. Its most common symptom is a painless swelling in a lymph node, normally in the neck, armpit or groin. The day his diagnosis was confirmed -- December 27, 2012 -- is etched into Hutchins' memory. ""I called Andy and spoke to him about it,"" said Hutchins, who got engaged to his longtime girlfriend Lindsay Wood in February. ""Andy was saying, 'You're going to be better after this, you're going to be stronger, you're going to be a far tougher person and tennis player.' ""Obviously my fiancee was upset. I'd actually warned her that it was going to be this. ""She was amazing, she'd been through cancer in her family with one of her parents so she knew what it was all about, so she was like a rock for me. ""I love her to bits and I can't thank her enough. She was great and she was supportive, but that was initially a tough moment."" Support . Hutchins has been overwhelmed with messages of support from well wishers from across the world since he announced his condition at the end of last year. Far from being a household name in the world of tennis, the 28-year-old, who grew up a stone's throw away from Wimbledon, can barely believe the public's reaction. ""It's something that I can't really express in words,"" said Hutchins. ""I've still got all the letters and all the cards in my living room and they mean so much to me. ""People who have taken time from three-year-old kids who have written or drawn pictures to me, to people who have done montages, it's been incredible. A lot of them will remain in my heart forever, because it's something you don't expect."" Murray, in particular, has been a rock for Hutchins -- showing a more sensitive side than their usual joking banter. ""We don't normally have too many serious conversations,"" he added. ""I've needed a different side of Andy but we're closer than ever."" Recovery . Hutchins completed his chemotherapy sessions last week and was courtside as Murray won the Aegon Championships at Queen's Club on Sunday. The two appeared on court, after the world No. 2's final win over Marin Cilic, at the Rally Against Cancer charity match alongside Tim Henman, Murray's coach Ivan Lendl, Tomas Berdych and a host of celebrities. Murray donated his $115,000 winner's prize to the charity, which supports the Royal Marsden hospital where Hutchins was treated. ""Ross has handled it all so well,"" Murray told CNN in his typical understated manner. ""He has a great family and he's been so positive. Hopefully he'll go for his scan and it will all be OK."" Hutchins will be tested in mid-July to see if the cancer has been defeated, while a final verdict will come in late September or October following another scan. Best man . After that, there is the small matter of his wedding at the end of November, at which Murray will be best man. Murray is taking charge of planning the bachelor party along with Hutchins' brother -- a task he was expecting, according to his close friend. ""I think Andy kind of knew it was coming,"" said Hutchins. ""When I got engaged I think he knew it was going to be him and then I said to him, 'You know it would mean a lot to me,' knowing how supportive he has been over the years. ""He kind of jumped to it, he was like 'Great, it means a lot to me, let's do it and let me know if you need me for anything else.' ""And he probably says that a lot at the moment and I am using him a lot."" 'Superstar' Hutchins believes his friend has all the attributes to become ""a great champion"" as he begins his bid next week to become Britain's first Wimbledon men's winner since Fred Perry in 1936. ""You dream about being able to time the ball that sweetly and to have the precision that he has,"" says Hutchins. ""Mentally he's so strong, he's able to rise to occasions and serve aces on break points, he's able to capitalize when he senses his opponent is struggling with a certain tactic. ""He is a superstar, he's a master tactician, and he's as feisty and competitive as they come.""","Ross Hutchins gives a rare insight into the life of Andy Murray .\nMurray dedicated win at Brisbane to Hutchins, who is recovering from cancer .\nScot will be best man at Hutchins' wedding in November .\nHutchins has finished his treatment and is helping raise money for cancer charity ."
4,47a5093bd01e7a3e72034564ee94d75cb1a1e036,"Australians and New Zealanders tend to get annoyed when their accents get mistaken for one another, but there's one thing they do share. Both countries can lay claim to hosting the world's friendliest city, with Melbourne and Auckland jointly taking top honors in a new survey. Another southern hemisphere destination -- South Africa's Johannesburg -- didn't fare quite as well, being named as the most unfriendly. Conde Nast Traveler magazine's annual Readers' Choice Survey says it was ""no surprise readers adore Melbourne,"" calling it Australia's ""capital of cool."" Aucklanders were praised by readers for their ""humor"" and a view on life that's ""something to aspire to attain."" Australia scored a friendly city double, with Sydney also making the top 10 alongside a global scattering that includes Cambodia's ""awe-inspiring"" temple gateway Siem Reap and ""vibrant"" Dublin. The United States fielded two cities onto the friendly list, with Savannah, Georgia, and Charleston, South Carolina, each winning praise. 'City of crime and contrasts' CN Traveler says its ""subjective"" survey asked readers to rate cities on a number of factors, including ""friendliness."" It said the results didn't always reflect the local residents. Such is the case with Beijing, which made number six on the unfriendly list after being dissed for its ""terrible pollution"" and ""dirty streets and hideous traffic."" The Chinese capital was joined by the French port of Marseille (""Stay away! Threatening""), ""overcrowded and ostentatious"" Monaco, ""rude"" Frankfurt, ""forgettable"" Cannes and -- to compound French ignominy -- ""cold and aloof"" Paris. Johannesburg was named most unfriendly because, despite being ""one of the most beautiful"" cities, ""safety still remains a serious concern"" with one reader calling it ""a city of crime and contrasts."" South Africa didn't come home empty handed, however. Its coastal city of Cape Town placed eighth on the friendly list, with one reader describing it as ""the most amazing place on Earth"" thanks to its mix of nature, beaches and mountains. World's friendliest cities according to Conde Nast Traveler . 11. Budapest, Hungary . 9 (tie). Seville, Spain . 9 (tie). Savannah, Georgia . 8. Cape Town, South Africa . 7. Siem Reap, Cambodia . 5 (tie). Sydney . 5 (tie). Dublin, Ireland . 4. Charleston, South Carolina . 3. Victoria, BC, Canada . 1 (tie). Melbourne . 1 (tie). Auckland, New Zealand . World's unfriendliest cities . 10. Nassau, Bahamas . 9. Monte Carlo, Monaco . 8. Milan, Italy . 7. Frankfurt, Germany . 6. Beijing . 5. Marseille, France . 4. Paris . 3. Moscow . 2. Cannes, France . 1. Johannesburg, South Africa .","Annual Conde Nast Traveler survey names Melbourne and Auckland as world's friendliest cities .\nAucklanders praised for their humor, while Melbournians are described as ""a friendly bunch""\nSouth Africa's Johannesburg topped the unfriendly city list with readers criticizing crime levels ."


In [23]:
# Define Model and Tokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"

model_checkpoint = "facebook/bart-base"

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint).to(device)

NameError: name 'torch' is not defined

In [8]:
# Prefix the input with a prompt so BART knows this is a summarization task
prefix = "summarize: "

def preprocess_function(examples):
    # Add a prefix to each document in the "article" column
    # The prefix might be useful for the model to understand that it's dealing with document summaries
    inputs = [prefix + doc for doc in examples["article"]]
    
    # Tokenize the modified documents using the tokenizer
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True)

    # Tokenize the "highlights" column to create labels for the model
    labels = tokenizer(text_target=examples["highlights"], max_length=128, truncation=True)

    # Include the tokenized labels (input_ids) in the model_inputs dictionary
    # This is necessary for training the model with the token-level classification objective
    model_inputs["labels"] = labels["input_ids"]
    
    return model_inputs

In [9]:
# Apply the function on all the elements in dataset(training, testing, and validation) data will be preprocessed in one single command
tokenized_dataset = dataset.map(preprocess_function, batched=True)

Map:   0%|          | 0/287113 [00:00<?, ? examples/s]

Map:   0%|          | 0/13368 [00:00<?, ? examples/s]

Map:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [10]:
# To dynamically pad the sentences to the longest length in a batch during collation, instead of padding the whole dataset to the maximum length
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model_checkpoint)

In [1]:
# Load a evaluation method (ROUGE) with the Evaluate library
rouge = evaluate.load("rouge")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

In [13]:
def compute_metrics(eval_pred):
    # Unpack the tuple containing predictions and labels
    predictions, labels = eval_pred
    
    # Decode the predicted sequences, skipping special tokens
    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    
    # Replace label values of -100 (masked tokens) with pad token id
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    
    # Decode the labels, skipping special tokens
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Compute ROUGE scores between predictions and labels
    result = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    # Compute average length of generated predictions
    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    # Round the metrics to 4 decimal places and return as a dictionary
    return {k: round(v, 4) for k, v in result.items()}

In [19]:
batch_size = 8
num_train_epochs = 8

# Show the training loss with every epoch
logging_steps = len(tokenized_dataset["train"]) // batch_size
model_name = model_checkpoint

args = Seq2SeqTrainingArguments(
    output_dir=f"{model_name}-finetuned-CNN",
    evaluation_strategy="epoch",
    learning_rate=5.6e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=num_train_epochs,
    predict_with_generate=True,
    logging_steps=logging_steps
)

In [20]:
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

NameError: name 'model' is not defined

In [None]:
trainer.train()