In [None]:
!pip install transformers[torch]
!pip install SentencePiece

Collecting accelerate>=0.21.0 (from transformers[torch])
  Downloading accelerate-0.29.2-py3-none-any.whl (297 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.4/297.4 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->transformers[torch])
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->transformers[torch])
  Using cached nvidia_cublas_cu

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

df = pd.read_excel('/content/drive/MyDrive/news_summary_trainingv2.xlsx')

In [None]:
df = df.rename(columns={'article_text': 'text', 'Summary': 'ctext'})
df.ctext = 'summarize: ' + df.ctext # add prefix "summarize: " to input indicating the task
print(df.head())

                                                text  \
0  Share on email (opens in new window)\n\nShare ...   
1  BYD, known for cars that are priced around 100...   
2  TESLA beat estimates for fourth-quarter delive...   
3  HONG KONG – Chinese automaker BYD sold a recor...   
4  Tesla (TSLA) shares ticked up 0.5% in early tr...   

                                               ctext  
0  summarize: BYD has surpassed Tesla as the worl...  
1  summarize: In 2022, BYD became the world's lar...  
2  summarize: Tesla exceeded expectations by deli...  
3  summarize: BYD became the top seller of fully ...  
4  summarize: Tesla's stock rose slightly after r...  


In [None]:
# Importing required libraries
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler

# Importing the T5 modules from huggingface/transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration

In [None]:
# # Setting up the device for GPU usage
from torch import cuda
device = 'cuda' if cuda.is_available() else 'cpu'

In [None]:
# Creating a custom dataset for reading the dataframe and loading it into the dataloader to pass it to the neural network at a later stage for finetuning the model and to prepare it for predictions

class CustomDataset(Dataset):

    def __init__(self, dataframe, tokenizer, source_len, summ_len):
        self.tokenizer = tokenizer
        self.data = dataframe
        self.source_len = source_len
        self.summ_len = summ_len
        self.text = self.data.text
        self.ctext = self.data.ctext

    def __len__(self):
        return len(self.text)

    def __getitem__(self, index):
        ctext = str(self.ctext[index])
        ctext = ' '.join(ctext.split())

        text = str(self.text[index])
        text = ' '.join(text.split())

        source = self.tokenizer.batch_encode_plus([ctext], max_length= self.source_len, pad_to_max_length=True,return_tensors='pt')
        target = self.tokenizer.batch_encode_plus([text], max_length= self.summ_len, pad_to_max_length=True,return_tensors='pt')

        source_ids = source['input_ids'].squeeze()
        source_mask = source['attention_mask'].squeeze()
        target_ids = target['input_ids'].squeeze()
        target_mask = target['attention_mask'].squeeze()

        return {
            'source_ids': source_ids.to(dtype=torch.long),
            'source_mask': source_mask.to(dtype=torch.long),
            'target_ids': target_ids.to(dtype=torch.long),
            'target_ids_y': target_ids.to(dtype=torch.long)
        }

In [None]:
# Creating the training function. This will be called in the main process. It is run depending on the epoch value.
# The model is put into train mode and then we enumerate over the training loader and passed to the defined network

def train(epoch, tokenizer, model, device, loader, optimizer):
    model.train()
    for _,data in enumerate(loader, 0):
        y = data['target_ids'].to(device, dtype = torch.long)
        y_ids = y[:, :-1].contiguous()
        lm_labels = y[:, 1:].clone().detach()
        lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
        ids = data['source_ids'].to(device, dtype = torch.long)
        mask = data['source_mask'].to(device, dtype = torch.long)

        outputs = model(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids, labels=lm_labels)
        loss = outputs[0]

        if _%500==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [None]:
# Creating the training function. This will be called in the main process. It is run depending on the epoch value.
# The model is put into train mode and then we enumerate over the training loader and passed to the defined network

def train(epoch, tokenizer, model, device, loader, optimizer):
    model.train()
    for _,data in enumerate(loader, 0):
        y = data['target_ids'].to(device, dtype = torch.long)
        y_ids = y[:, :-1].contiguous()
        lm_labels = y[:, 1:].clone().detach()
        lm_labels[y[:, 1:] == tokenizer.pad_token_id] = -100
        ids = data['source_ids'].to(device, dtype = torch.long)
        mask = data['source_mask'].to(device, dtype = torch.long)

        outputs = model(input_ids = ids, attention_mask = mask, decoder_input_ids=y_ids, labels=lm_labels)
        loss = outputs[0]

        if _%500==0:
            print(f'Epoch: {epoch}, Loss:  {loss.item()}')

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()


In [None]:
def validate(epoch, tokenizer, model, device, loader):
    model.eval()
    predictions = []
    actuals = []
    with torch.no_grad():
        for _, data in enumerate(loader, 0):
            y = data['target_ids'].to(device, dtype = torch.long)
            ids = data['source_ids'].to(device, dtype = torch.long)
            mask = data['source_mask'].to(device, dtype = torch.long)

            generated_ids = model.generate(
                input_ids = ids,
                attention_mask = mask,
                max_length=150,
                num_beams=2,
                repetition_penalty=2.5,
                length_penalty=1.0,
                early_stopping=True
                )
            preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
            target = [tokenizer.decode(t, skip_special_tokens=True, clean_up_tokenization_spaces=True)for t in y]
            if _%100==0:
                print(f'Completed {_}')

            predictions.extend(preds)
            actuals.extend(target)
    return predictions, actuals

In [None]:
# Defining some key variables that will be used later on in the training
TRAIN_BATCH_SIZE = 2    # input batch size for training (default: 64)
VALID_BATCH_SIZE = 2    # input batch size for testing (default: 1000)
TRAIN_EPOCHS = 8        # number of epochs to train (default: 10)
VAL_EPOCHS = 1
LEARNING_RATE = 1e-4    # learning rate (default: 0.01)
SEED = 42               # random seed (default: 42)
MAX_LEN = 512
SUMMARY_LEN = 150

In [None]:
# Set random seeds and deterministic pytorch for reproducibility
torch.manual_seed(SEED) # pytorch random seed
np.random.seed(SEED) # numpy random seed
torch.backends.cudnn.deterministic = True

# tokenzier for encoding the text
tokenizer = T5Tokenizer.from_pretrained("t5-base")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [None]:
# Creation of Dataset and Dataloader
# Defining the train size. So 80% of the data will be used for training and the rest will be used for validation.
train_size = 0.8
train_dataset=df.sample(frac=train_size,random_state = SEED)
val_dataset=df.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

print("FULL Dataset: {}".format(df.shape))
print("TRAIN Dataset: {}".format(train_dataset.shape))
print("TEST Dataset: {}".format(val_dataset.shape))

FULL Dataset: (1317, 2)
TRAIN Dataset: (1054, 2)
TEST Dataset: (263, 2)


In [None]:
# Creating the Training and Validation dataset for further creation of Dataloader
training_set = CustomDataset(train_dataset, tokenizer, MAX_LEN, SUMMARY_LEN)
val_set = CustomDataset(val_dataset, tokenizer, MAX_LEN, SUMMARY_LEN)

In [None]:
# Creation of Dataloaders for testing and validation. This will be used down for training and validation stage for the model.
training_loader = DataLoader(training_set, batch_size=TRAIN_BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, batch_size=VALID_BATCH_SIZE, shuffle=False)

In [None]:
# Defining the model. We are using t5-base model and added a Language model layer on top for generation of Summary.
# Further this model is sent to device (GPU/TPU) for using the hardware.
model = T5ForConditionalGeneration.from_pretrained("t5-base")
model = model.to(device)

# Defining the optimizer that will be used to tune the weights of the network in the training session.
optimizer = torch.optim.Adam(params=model.parameters(), lr=LEARNING_RATE)


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [None]:
# Training loop
print('Initiating Fine-Tuning for the model on our dataset')

for epoch in range(TRAIN_EPOCHS):
    train(epoch, tokenizer, model, device, training_loader, optimizer)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


Initiating Fine-Tuning for the model on our dataset




Epoch: 0, Loss:  9.476309776306152
Epoch: 0, Loss:  2.8925087451934814
Epoch: 1, Loss:  2.4105594158172607
Epoch: 1, Loss:  2.845522403717041
Epoch: 2, Loss:  2.4294040203094482
Epoch: 2, Loss:  2.5400145053863525
Epoch: 3, Loss:  2.4133875370025635
Epoch: 3, Loss:  2.2197725772857666
Epoch: 4, Loss:  2.1742939949035645
Epoch: 4, Loss:  2.2245967388153076
Epoch: 5, Loss:  2.1858670711517334
Epoch: 5, Loss:  1.494624376296997
Epoch: 6, Loss:  2.4332966804504395
Epoch: 6, Loss:  1.7058039903640747
Epoch: 7, Loss:  1.543834924697876
Epoch: 7, Loss:  1.9410474300384521


In [None]:
# Validation loop and saving the resulting file with predictions and actuals in a dataframe.
# Saving the dataframe as news_summaries.xlsx
print('Now generating summaries on our fine tuned model for the validation dataset and saving it in a dataframe')
for epoch in range(VAL_EPOCHS):
    predictions, actuals = validate(epoch, tokenizer, model, device, val_loader)
    final_df = pd.DataFrame({'Generated Text':predictions,'Actual Text':actuals})
    final_df.to_excel('./news_summaries2.xlsx')
    print('Output Files generated for review')

Now generating summaries on our fine tuned model for the validation dataset and saving it in a dataframe
Completed 0
Completed 100
Output Files generated for review


In [None]:
# Display generated summaries for 10 randomly selected reports
pd.set_option('display.max_colwidth', None)
final_df.head(10)

Unnamed: 0,Generated Text,Actual Text
0,"In, BYD became the world’s largest EV maker, surpassing Tesla. The Chinese automaker delivered 1.86 million EVs in 2022, including 917,118 pure electric vehicles. This was ahead of the US’s Tesla sales of 1,31 million units last year. China’s high EV demand and its domestic discounts helped boost BYD’s sales, according to data provided by UBS Global Market Intelligence. In the fourth quarter, BYD delivered 416,242 plug-in hybrid cars and","BYD, known for cars that are priced around 100,000 yuan (US$14,013), dethroned Tesla as the world’s largest EV maker in 2022. The company, controlled by billionaire Wang Chuanfu, delivered a total 1.86 million EVs that year, most of them in mainland China, beating Tesla’s 1.31 million units. But Tesla makes only pure electric cars, so it shipped 43 per cent more of those than BYD’s 917,118. “BYD’s sales benefited from a rising demand for EVs in China and the discounts it offered to domestic car buyers,” said Gao Shen,"
1,"vehicle sales reached a record 484,507 in the fourth quarter, according to data provided by Bloomberg Finance. The company was ahead of BYD as the largest electric vehicle manufacturer in the world. Tesla’s growth rate fell short of its 50 per cent target and demand for EVs is falling across all brands. Shares of the automaker have recovered half of their 2022 losses but remain below their 2022 peak.","New York CNN — Tesla reported record quarterly sales in the final three months of last year, but it wasn’t enough for it to hang onto its title as the world’s largest maker of electric vehicles. The company reported global sales of 484,507 vehicles in the fourth quarter, a bit better than expected and up nearly 20% from what it sold in the same period of 2022. It was less than the 526,409 EVs sold by Chinese automaker BYD, but Tesla stayed ahead of BYD in total full-year sales. BYD vehicles are sold in Asia and Europe but are not yet available in North America. Tesla is facing greater competition from not just BYD,"
2,"Tesla delivered 484,507 vehicles in the fourth quarter, beating analysts’ estimates. The company delivered 461,538 Model 3 and Model Y vehicles during the quarter, topping a record high. The stock gained 1% in premarket trading on Wednesday, according to data provided by Bloomberg. The carmaker said it delivered 421,538 vehicles in the fourth quarter, including 461,538 Model 3 and Model Y. The increase was driven by increased deliveries of the Model 3 electric sedan before it lost federal tax credits under the Inflation Reduction Act.","Adds shares in paragraph 2, details on deliveries throughout Jan 2 (Reuters) - Tesla TSLA.O on Tuesday reported fourth-quarter deliveries ahead of analysts' estimates after a push to deliver more Model 3 electric cars before some variants of the compact sedan lose federal tax credits in the new year under the Inflation Reduction Act (IRA). The company's shares rose about 1% in premarket trading. Tesla, the world's most valuable automaker, delivered a record number of vehicles in the fourth quarter, helping the company hit its 2023 target of 1.8 million. The company handed 484,507 vehicles in the last three months of the year, compared with estimates of 473,253"
3,"TSLA delivered record-breaking vehicle deliveries in the fourth quarter, beating Wall Street’s consensus target of 1.81 million units. The company delivered more than 480,000 vehicles in the fourth quarter, exceeding its full-year target of 1.81 million units. China’s BYD battery electric vehicles outpaced Tesla in the second quarter, surpassing the stock’s 180,000 deliveries. Tesla stock fell slightly after the report, but remains within a buy zone and outperforms the broader market.","Tesla (TSLA) deliveries in the fourth quarter exceeded Wall Street predictions, as the global EV giant sold a record-setting number of vehicles in Q4 and hit full-year expectations, according to data released by Tesla early Tuesday. TSLA edged down a fraction. X Elon Musk's Tesla reported Tuesday that it produced 494,989 units and delivered 484,507 vehicles during the fourth quarter. For the full year, vehicle deliveries grew 38% to 1.81 million while production increased 35% to 1.85 million units. The company delivered 461,538 Model 3/Y vehicles in Q4 and 22,969 ""other models."" Tesla currently produces the Model 3, Model Y, Model"
4,"footage shows a vast fleet of Teslas at Shanghai South Port, the drone operator said. The fleet primarily consists of right-hand drive variants, suggesting a likely European destination. According to drone operator Wu Wa, the Teslas are expected to depart from the port on January 8, signifying the start of their worldwide distribution.","In an impressive showcase of China’s manufacturing prowess, drone footage captured by longtime Tesla watcher Wu Wa reveals an enormous fleet of Teslas at Shanghai South Port, ready to embark on a global journey. The sheer magnitude of the fleet, referred to colloquially as a ‘gazillion,’ underscores the robust growth in Tesla’s car exports from China. The fleet stationed at the port primarily comprises right-hand drive variants, ruling out destinations like Australia, New Zealand, or the United Kingdom. The logical conclusion is that these Teslas are set to make their way to Europe. Wu Wa, the operator of the drone, predicts their departure from the"
5,"grew its position in shares of Schlumberger by 27.3% during the 4th quarter, according to Avitas Wealth Management. The institutional investor owned 100,019 shares of the oilfield services company's stock after selling 38,570 shares during the period. Schlumberger accounts for approximately 8.5% of Avitas Wealth Management's holdings, making it the 28th largest position in the company. Avitas Wealth Management's holdings in Schlumberger were worth $5.2 million at the end of the most recent quarter. Get Schlumberger alerts: Sign Up Several other large investors have also bought and sold shares of Schlumberger","Avitas Wealth Management LLC grew its holdings in Schlumberger Limited (NYSE:SLB - Free Report) by 27.3% during the 4th quarter, according to the company in its most recent disclosure with the Securities & Exchange Commission. The firm owned 100,019 shares of the oil and gas company's stock after purchasing an additional 21,474 shares during the period. Schlumberger makes up about 0.9% of Avitas Wealth Management LLC's holdings, making the stock its 28th biggest position. Avitas Wealth Management LLC's holdings in Schlumberger were worth $5,205,000 as of its most recent SEC filing. Get Schlumberger alerts: Sign Up"
6,"statements made in this document are not guarantees of future results and involve risks and uncertainties that could cause actual results to differ materially from those expressed or implied. Forward-looking statements made in this document are not guarantees of future results, and forward-looking statements are subject to certain limitations. SLB and ChampionX have filed joint filings with the Securities and Exchange Commission (SEC) for the following reasons: uncertainty regarding the proposed merger transaction between SLB and ChampionX; the ability to integrate the businesses and achieve synergies; changes in market demand; changes in economic conditions; government approvals; cyber-attacks","should, estimates, intends, plans, seeks, targets, may, can, believe, predict, potential, projected, projections, precursor, forecast, ambition, goal, scheduled, think, could, would, will, see, likely, and other similar expressions or"
7,"The Coca-Cola Foundation has awarded the Global Water Center (GWC) $499,439 in grants from The Coca-Cola Foundation to address global water challenges. The funds will support two initiatives: 1. Provides training on solar-powered water systems in India, equipping engineers to supply safe water to millions of people around the world. 2. Provides funding for 149,500 Grants: Enhances GWC’s online courses and develops an SPWS maintenance guide. The funds will also provide technical assistance to partners implementing rural","CHARLESTON, S.C., April 04, 2024 (GLOBE NEWSWIRE) — The Global Water Center has received two grants worth a total of $499,439 from The Coca-Cola Foundation to support multiple initiatives that will utilize training, technology, and relationships to create sustainable, local solutions for water problems worldwide. “The Coca-Cola Foundation’s generosity has significantly accelerated the Global Water Center’s ability, and that of our partners, to help more people have access to safe and reliable drinking water,” said Thomas Johnston, Chief Executive Officer of the Global Water Center. “This grant will allow the Global Water Center to more effectively share knowledge and resources with individuals, organizations"
8,(NYSE: JNJ) today announced an unsolicited mini-tender offer from TRC Capital Investment Corporation to purchase 1 million shares of JNJ stock at a price of $151.23 per share. The offer is 4.12% below the current market price and conditions. The offer is subject to approval by the SEC. Please consult our Securities and Exchange Commission guidance before making any such offer.,"NEW BRUNSWICK, N.J., April 04, 2024--(BUSINESS WIRE)--Johnson & Johnson (NYSE: JNJ) today announced that it has received notice of an unsolicited mini-tender offer by TRC Capital Investment Corporation of Ontario, Canada to purchase up to 1 million shares of Johnson & Johnson common stock at a price of $151.23 per share in cash. TRC Capital Investment’s offer price of $151.23 per share is approximately 4.12% lower than the $157.73 closing share price of Johnson & Johnson’s common stock on April 2, 2024, the business day prior to the date of the offer. The offer is"
9,"launch Salesforce+, a business-focused streaming service, at its Dreamforce event in September. The company said it will offer original content and live experiences at its Dreamforce event in September. “Salesforce+ is going to be different than other streaming services like Disney+ and Netflix,” Salesforce said in a statement. “It’s going to be a great place for businesses to connect with their audiences,” Salesforce said in a statement.","Software company Salesforce announced on Tuesday, August 10, that it will launch Salesforce+, a streaming service with original business-focused content. Salesforce’s in-house studio has developed and produced the content for Salesforce+, which the company plans to unveil at its annual Dreamforce event in September. The service, which is meant to be a business media platform, will include live experiences, original serie,s and podcasts, unlike streaming options offered by Walt Disney and Netflix. The Salesforce+ content includes “Connections,” a series that features marketers from companies including IBM, Levi’s, and GoFundMe, and “The Inflection Point,” a collection of interviews with chief executive officers of brands including Coca-"


In [None]:
#to evaluate the generated text using metrics like "bleu" and "rouge"
!pip install evaluate
import evaluate
import nltk
nltk.download('punkt')
from nltk.tokenize import word_tokenize

Collecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets>=2.0.0 (from evaluate)
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
Collecting dill (from evaluate)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from evaluate)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from evaluate)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
metric = evaluate.load("bleu")
references = [ [a] for a in actuals ]
results = metric.compute(predictions=predictions, references=references, tokenizer=word_tokenize)
results

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

{'bleu': 0.12044687173294896,
 'precisions': [0.47593712212817413,
  0.1897176844420907,
  0.10154834458902981,
  0.0603222180591982],
 'brevity_penalty': 0.7854049579111254,
 'length_ratio': 0.8054410284712528,
 'translation_length': 24810,
 'reference_length': 30803}

In [None]:
!pip install rouge_score
metric = evaluate.load('rouge')
results = metric.compute(predictions=predictions, references=references, tokenizer=word_tokenize)
results

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=67b2618086f771abea63aa6c81341c0d2d5f0078f42a36976d4e758483a83d4e
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2


Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

{'rouge1': 0.4213692852995651,
 'rouge2': 0.16868892844491362,
 'rougeL': 0.26252092774909325,
 'rougeLsum': 0.26201676763947385}

In [None]:
model.save_pretrained('t5_base_epochs_8')
tokenizer.save_pretrained('t5_base_tokenizer_epochs_8')

('t5_base_tokenizer_epochs_8/tokenizer_config.json',
 't5_base_tokenizer_epochs_8/special_tokens_map.json',
 't5_base_tokenizer_epochs_8/spiece.model',
 't5_base_tokenizer_epochs_8/added_tokens.json')

In [None]:
repo_name = 'T5-base-news-summarization'

In [None]:
from transformers import Trainer, TrainingArguments
hf_token = "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"
args = TrainingArguments(
    output_dir='./results',  # where to save model checkpoints
    hub_model_id=f'yatharth97/{repo_name}',  # your HF model repository
    push_to_hub=True,  # enables pushing to hub after training, if you train within this script
    hub_token=hf_token,  # your Hugging Face API token
)

In [None]:
trainer = Trainer(model=model, tokenizer=tokenizer, args=args)

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


In [None]:
trainer.push_to_hub(
    commit_message="Commit message describing the changes made",
    blocking=True
)

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/4.98k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/yatharth97/T5-base-news-summarization/commit/3b078adfd042765ba1c5c61913d2101acb79ba1b', commit_message='Commit message describing the changes made', commit_description='', oid='3b078adfd042765ba1c5c61913d2101acb79ba1b', pr_url=None, pr_revision=None, pr_num=None)