# Text Summarization

In [3]:
# pip install rouge-score absl-py nltk

In [12]:
import os
from dotenv import load_dotenv
import tiktoken
import openai
import evaluate

os.environ["EVALUATE_DISABLE_WIDGETS"] = "True"

load_dotenv()  # loads .env from cwd (or specify path)

api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise ValueError("Missing OPENAI_API_KEY in environment")

In [7]:
def chunk_text(text, max_tokens=2000, model="gpt-4"):
    enc = tiktoken.encoding_for_model(model)
    tokens = enc.encode(text)
    for i in range(0, len(tokens), max_tokens):
        yield enc.decode(tokens[i:i + max_tokens])

In [8]:
def summarize_chunk(chunk):
    resp = openai.ChatCompletion.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant that summarizes text."},
            {"role": "user", "content": f"Summarize this:\n\n{chunk}"}
        ],
        temperature=0.3,
        max_tokens=1024
    )
    return resp.choices[0].message.content.strip()

In [9]:
def summarize_long_text(text):
    summaries = [summarize_chunk(c) for c in chunk_text(text)]
    return summarize_chunk("\n\n".join(summaries)) if len(summaries) > 1 else summaries[0]

In [13]:
rouge = evaluate.load("rouge")
meteor = evaluate.load("meteor")
bleu = evaluate.load("bleu")

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/tomtaulli/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to /Users/tomtaulli/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/tomtaulli/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [14]:
if __name__ == "__main__":
    with open("long_doc.txt") as f:
        text = f.read()
    summary = summarize_long_text(text)
    reference = open("ref_summary.txt").read()

    # Compute metrics
    rouge_scores = rouge.compute(predictions=[summary], references=[reference])
    meteor_scores = meteor.compute(predictions=[summary], references=[reference])
    bleu_scores = bleu.compute(predictions=[summary], references=[reference])

    print("Summary:\n", summary)
    print("\nEvaluation Metrics:")
    print(f"ROUGE‑1/2/L: {rouge_scores['rouge1']:.3f}, {rouge_scores['rouge2']:.3f}, {rouge_scores['rougeL']:.3f}")
    print(f"METEOR: {meteor_scores['score']:.3f}")
    print(f"BLEU: {bleu_scores['bleu']:.3f}")

FileNotFoundError: [Errno 2] No such file or directory: 'long_doc.txt'