# Text Summarization

In [3]:
# pip install rouge-score absl-py nltk

In [14]:
import os
from dotenv import load_dotenv
import tiktoken
from openai import OpenAI
import evaluate

load_dotenv()  # loads from .env in working directory
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("Missing OPENAI_API_KEY in environment")

client = OpenAI(api_key=api_key)

In [15]:
def chunk_text(text, max_tokens=2000, model="gpt-4"):
    enc = tiktoken.encoding_for_model(model)
    tokens = enc.encode(text)
    for i in range(0, len(tokens), max_tokens):
        yield enc.decode(tokens[i : i + max_tokens])

In [16]:
def summarize_chunk(chunk):
    resp = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that summarizes text."},
            {"role": "user", "content": f"Summarize this:\n\n{chunk}"}
        ],
        temperature=0.3,
        max_tokens=1024,
    )
    return resp.choices[0].message.content.strip()

In [17]:
def summarize_long_text(text):
    chunks = list(chunk_text(text))
    summaries = [summarize_chunk(c) for c in chunks]
    if len(summaries) > 1:
        return summarize_chunk("\n\n".join(summaries))
    return summaries[0]

In [18]:
rouge = evaluate.load("rouge")
meteor = evaluate.load("meteor")
bleu = evaluate.load("bleu")

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/tomtaulli/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /Users/tomtaulli/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/tomtaulli/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [21]:
if __name__ == "__main__":

    with open("long_doc.txt", "r") as f:
        text = f.read()

    ref_summary = ""
    if os.path.exists("ref_summary.txt"):
        ref_summary = open("ref_summary.txt", "r").read()
    else:
        print("Warning: No ref_summary.txt found—benchmarks will be skipped.")


    print("🔍 Summarizing...")
    summary = summarize_long_text(text)
    print("\n📄 Summary:\n", summary)

  
    if ref_summary:
        rouge_scores = rouge.compute(predictions=[summary], references=[ref_summary])
        meteor_scores = meteor.compute(predictions=[summary], references=[ref_summary])
        bleu_scores = bleu.compute(predictions=[summary], references=[ref_summary])

        print("\n📊 Evaluation Metrics:")
        print(f"ROUGE‑1: {rouge_scores['rouge1']:.3f}, ROUGE‑2: {rouge_scores['rouge2']:.3f}, ROUGE‑L: {rouge_scores['rougeL']:.3f}")
        print(f"METEOR: {meteor_scores['meteor']:.3f}")
        print(f"BLEU: {bleu_scores['bleu']:.3f}")
    else:
        print("🔹 No reference summary provided—metrics skipped.")

🔍 Summarizing...

📄 Summary:
 "The Yellow Wallpaper" by Charlotte Perkins Gilman is a story about a woman suffering from postpartum depression. Her physician husband, John, prescribes her rest and isolation in a colonial mansion. She is confined to a room with yellow wallpaper that she finds disturbing and becomes obsessed with. Despite her belief that work and social interaction would help her recovery, she is discouraged from expressing her thoughts. She begins to see figures in the wallpaper, including a woman she believes is trapped. Despite her pleas, her husband dismisses her concerns. She grows increasingly paranoid, suspecting her husband and his sister, Jennie, are against her. In her obsession, she peels off the wallpaper, believing she has freed the trapped woman, causing John to faint.

📊 Evaluation Metrics:
ROUGE‑1: 0.466, ROUGE‑2: 0.147, ROUGE‑L: 0.290
METEOR: 0.430
BLEU: 0.060
