In [None]:
!pip install spacy rouge-score datasets tqdm scikit-learn bert-score

In [None]:
!python -m spacy download en_core_web_lg

In [None]:
import spacy
from heapq import nlargest
from string import punctuation
from spacy.lang.en.stop_words import STOP_WORDS
from rouge_score import rouge_scorer
from tqdm import tqdm
from bert_score import score

In [None]:
from datasets import load_dataset

ds1 = load_dataset("EdinburghNLP/xsum", split="test")


ds2 = load_dataset("ccdv/govreport-summarization", split="test")


ds3 = load_dataset("abisee/cnn_dailymail", "1.0.0", split="test")


ds4 = load_dataset("ccdv/pubmed-summarization", "document", split="test")


# **Function**

In [None]:
nlp = spacy.load("en_core_web_lg")

def summarize_text(text, compression_ratio=0.3):
    try:
        doc = nlp(text)
        stopwords = list(STOP_WORDS)
        word_frequencies = {}

        for word in doc:
            if word.text.lower() not in stopwords and word.text.lower() not in punctuation:
                word_frequencies[word.text.lower()] = word_frequencies.get(word.text.lower(), 0) + 1

        max_frequency = max(word_frequencies.values(), default=1)
        word_frequencies = {word: freq / max_frequency for word, freq in word_frequencies.items()}

        sentence_scores = {}
        for sentence in doc.sents:
            for word in sentence:
                if word.text.lower() in word_frequencies:
                    sentence_scores[sentence] = sentence_scores.get(sentence, 0) + word_frequencies[word.text.lower()]

        select_length = int(len(list(doc.sents)) * compression_ratio)
        summarized_sentences = nlargest(select_length, sentence_scores, key=sentence_scores.get)
        summary = ' '.join([sentence.text for sentence in summarized_sentences])
        return summary
    except Exception as e:
        print(f"Error summarizing text: {e}")
        return text


# **ROUGE**

In [None]:
scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)

num_samples = 100
documents = ds2["article"][:num_samples]
reference_summaries = ds3["abstract"][:num_samples]

rouge_scores = {"rouge1": [], "rouge2": [], "rougeL": []}

for doc, ref_summary in tqdm(zip(documents, reference_summaries), total=num_samples):
        predicted_summary = summarize_text(doc)
        scores = scorer.score(predicted_summary, ref_summary)

        rouge_scores["rouge1"].append(scores["rouge1"].fmeasure)
        rouge_scores["rouge2"].append(scores["rouge2"].fmeasure)
        rouge_scores["rougeL"].append(scores["rougeL"].fmeasure)

avg_rouge = {key: sum(values) / num_samples for key, values in rouge_scores.items()}

print("Average ROUGE Scores:")
for metric, value in avg_rouge.items():
    print(f"{metric.upper()}: {value:.4f}")

# **BERTScore**

In [None]:
def evaluate_bertscore_on_summaries(dataset, summarize_text, num_samples=100):
    documents = ds1["document"][:num_samples]
    reference_summaries = ds1["summary"][:num_samples]

    generated_summaries = []

    for doc in tqdm(documents, total=num_samples):
        predicted_summary = summarize_text(doc)
        generated_summaries.append(predicted_summary)

    P, R, F1 = score(generated_summaries, reference_summaries, lang="en", verbose=True)

    avg_bertscore = F1.mean().item()

    return avg_bertscore

avg_bertscore = evaluate_bertscore_on_summaries(ds1, summarize_text)

print(f"Average BERTScore: {avg_bertscore:.4f}")