 # Extractive Summarization - BERT

## 1.0 Install Libraries/Packages

In [None]:
%pip install -U datasets
%pip install transformers torch

In [1]:
import pandas as pd
import torch
from datasets import load_dataset
from datasets import load_metric
from transformers import BertTokenizer, BertModel
from typing import Dict, Any
import nltk
nltk.download('punkt')

  torch.utils._pytree._register_pytree_node(
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\edmun\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## 2.0 Load Dataset

In [2]:
# Load test dataset from huggingface
dataset = load_dataset('ccdv/pubmed-summarization', split="test")

# Take only 125 records from specified seed
test_data = dataset.shuffle(seed=42).select(range(125))
test_df = pd.DataFrame(test_data)


You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


## 3.0 Transformer Based Extractive Summarization

In [4]:
# Initialize the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def summarize(text, model, tokenizer, num_sentences=5):
    # Lower the text and tokenize into sentences
    sentences = nltk.sent_tokenize(text.lower())

    # Tokenize each sentence and prepare for model input
    tokenized_batches = tokenizer(sentences, return_tensors='pt', padding=True, truncation=True, max_length=128, add_special_tokens=True)

    # Process tokenized text through the model
    with torch.no_grad():
        outputs = model(**{key: tokenized_batches[key].to(model.device) for key in tokenized_batches})
        embeddings = outputs.last_hidden_state[:, 0, :]  # Get embeddings for [CLS] token

    # Calculate norms and sort by scores
    scores = torch.norm(embeddings, dim=1)
    sorted_indices = torch.argsort(scores, descending=True)

    # Select top sentences based on sorted indices
    best_sentences = [sentences[idx] for idx in sorted_indices[:num_sentences]]
    return ' '.join(best_sentences)

test_df['generated_summary'] = test_df['article'].apply(lambda x: summarize(x, model, tokenizer))


## 4.0 Evaluation

In [5]:
predictions = list(test_df['generated_summary'])
references = list(test_df['abstract'])


In [6]:
rouge = load_metric("rouge")
rouge_scores = rouge.compute(predictions=predictions, references=references)
print(rouge_scores)

def simplify_rouge_scores(rouge_scores: Dict[str, Any]) -> str:
    simplified_text = ""
    for key, value in rouge_scores.items():
        # Extract low, mid, and high scores for each ROUGE metric
        low, mid, high = value.low, value.mid, value.high
        simplified_text += f"{key}: Precision ranges from {low.precision:.2%} to {high.precision:.2%}, "
        simplified_text += f"Recall ranges from {low.recall:.2%} to {high.recall:.2%}, "
        simplified_text += f"F1 Score ranges from {low.fmeasure:.2%} to {high.fmeasure:.2%}.\n"

    return simplified_text

print(simplify_rouge_scores(rouge_scores))


  rouge = load_metric("rouge")
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this metric from the next major release of `datasets`.


{'rouge1': AggregateScore(low=Score(precision=0.30750846788825115, recall=0.275241859700722, fmeasure=0.27130564440688104), mid=Score(precision=0.32912532624236135, recall=0.29860603549467823, fmeasure=0.2875972299071088), high=Score(precision=0.3516718735854453, recall=0.3213135107474994, fmeasure=0.30355767600590294)), 'rouge2': AggregateScore(low=Score(precision=0.07273991749532677, recall=0.06599572691519101, fmeasure=0.06481791224325473), mid=Score(precision=0.08660828906178913, recall=0.07746268327689801, fmeasure=0.07429792990260478), high=Score(precision=0.10424350162825315, recall=0.09227525471415907, fmeasure=0.08545589611269577)), 'rougeL': AggregateScore(low=Score(precision=0.16040256451945498, recall=0.14517032640371555, fmeasure=0.1421760137620293), mid=Score(precision=0.1752389767907766, recall=0.15716670142515324, fmeasure=0.15089707331228103), high=Score(precision=0.1903922134676424, recall=0.17094601441078375, fmeasure=0.15961185813742546)), 'rougeLsum': AggregateScor

In [7]:
print(rouge_scores)

{'rouge1': AggregateScore(low=Score(precision=0.30750846788825115, recall=0.275241859700722, fmeasure=0.27130564440688104), mid=Score(precision=0.32912532624236135, recall=0.29860603549467823, fmeasure=0.2875972299071088), high=Score(precision=0.3516718735854453, recall=0.3213135107474994, fmeasure=0.30355767600590294)), 'rouge2': AggregateScore(low=Score(precision=0.07273991749532677, recall=0.06599572691519101, fmeasure=0.06481791224325473), mid=Score(precision=0.08660828906178913, recall=0.07746268327689801, fmeasure=0.07429792990260478), high=Score(precision=0.10424350162825315, recall=0.09227525471415907, fmeasure=0.08545589611269577)), 'rougeL': AggregateScore(low=Score(precision=0.16040256451945498, recall=0.14517032640371555, fmeasure=0.1421760137620293), mid=Score(precision=0.1752389767907766, recall=0.15716670142515324, fmeasure=0.15089707331228103), high=Score(precision=0.1903922134676424, recall=0.17094601441078375, fmeasure=0.15961185813742546)), 'rougeLsum': AggregateScor

In [11]:
import nltk
from nltk.translate.meteor_score import meteor_score

# Ensure required NLTK resources are downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')

def evaluate_summaries_meteor(df, summary_col, reference_col):
    # Tokenize summaries and references before passing to meteor_score
    scores = [
        meteor_score(
            [nltk.word_tokenize(row[reference_col])], 
            nltk.word_tokenize(row[summary_col])
        ) for _, row in df.iterrows()
    ]
    return sum(scores) / len(scores)  # Calculate the average METEOR score

# Assuming 'test_df' has the columns 'generated_summary' and 'reference_summary'
meteor_average_score = evaluate_summaries_meteor(test_df, 'generated_summary', 'abstract')
print("Average METEOR Score:", meteor_average_score)



[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\edmun\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\edmun\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Average METEOR Score: 0.2125544343018366
