# 📚 Summarization Benchmarking

Compare extractive vs abstractive summarization methods using traditional NLP and LLMs.

## 📄 Load Sample Article

In [None]:
with open("data/sample_article.txt") as f:
    article = f.read()

print(article)


## ✂️ Extractive Summarization (TextRank via spaCy + pytextrank)

In [None]:
import spacy
import pytextrank

nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("textrank")

doc = nlp(article)
extractive_summary = doc._.textrank.summary(limit_phrases=10, limit_sentences=3)
for sent in extractive_summary:
    print(sent)


## 🧠 Abstractive Summarization (FLAN-T5 via Transformers)

In [None]:
from transformers import pipeline

abstractive = pipeline("summarization", model="google/flan-t5-base", tokenizer="google/flan-t5-base")
ab_summary = abstractive(article, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
print(ab_summary)


## 📏 ROUGE Evaluation

In [None]:
from rouge import Rouge

rouge = Rouge()
scores = rouge.get_scores(ab_summary, ' '.join([str(s) for s in extractive_summary]))
print(scores)
