# Text Summarization

In [3]:
# pip install rouge-score absl-py nltk

In [8]:
import os
from dotenv import load_dotenv
import tiktoken
from openai import OpenAI
import evaluate

load_dotenv()  # loads from .env in working directory
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("Missing OPENAI_API_KEY in environment")

client = OpenAI(api_key=api_key)

In [9]:
def chunk_text(text, max_tokens=2000, model="gpt-4"):
    enc = tiktoken.encoding_for_model(model)
    tokens = enc.encode(text)
    for i in range(0, len(tokens), max_tokens):
        yield enc.decode(tokens[i : i + max_tokens])

In [10]:
def summarize_chunk(chunk):
    resp = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that summarizes text."},
            {"role": "user", "content": f"Summarize this:\n\n{chunk}"}
        ],
        temperature=0.3,
        max_tokens=1024,
    )
    return resp.choices[0].message.content.strip()

In [11]:
def summarize_long_text(text):
    chunks = list(chunk_text(text))
    summaries = [summarize_chunk(c) for c in chunks]
    if len(summaries) > 1:
        return summarize_chunk("\n\n".join(summaries))
    return summaries[0]

In [12]:
rouge = evaluate.load("rouge")
meteor = evaluate.load("meteor")
bleu = evaluate.load("bleu")

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/tomtaulli/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /Users/tomtaulli/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/tomtaulli/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
if __name__ == "__main__":

    with open("long_doc.txt", "r") as f:
        text = f.read()

    ref_summary = ""
    if os.path.exists("ref_summary.txt"):
        ref_summary = open("ref_summary.txt", "r").read()
    else:
        print("Warning: No ref_summary.txt found‚Äîbenchmarks will be skipped.")


    print("üîç Summarizing...")
    summary = summarize_long_text(text)
    print("\nüìÑ Summary:\n", summary)

  
    if ref_summary:
        rouge_scores = rouge.compute(predictions=[summary], references=[ref_summary])
        meteor_scores = meteor.compute(predictions=[summary], references=[ref_summary])
        bleu_scores = bleu.compute(predictions=[summary], references=[ref_summary])

        print("\nüìä Evaluation Metrics:")
        print(f"ROUGE‚Äë1: {rouge_scores['rouge1']:.3f}, ROUGE‚Äë2: {rouge_scores['rouge2']:.3f}, ROUGE‚ÄëL: {rouge_scores['rougeL']:.3f}")
        print(f"METEOR: {meteor_scores['score']:.3f}")
        print(f"BLEU: {bleu_scores['bleu']:.3f}")
    else:
        print("üîπ No reference summary provided‚Äîmetrics skipped.")