In [1]:
import os
import anthropic
import weave
from datetime import datetime, timezone
from dotenv import load_dotenv
from pydantic import BaseModel
import requests
import io
from PyPDF2 import PdfReader

load_dotenv()
# Setup
weave.init("summarization-chain-of-density-cookbook")
anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))

In [None]:
# Define ArxivPaper model
class ArxivPaper(BaseModel):
    entry_id: str
    updated: datetime
    published: datetime
    title: str
    authors: list[str]
    summary: str
    pdf_url: str

# Create sample ArxivPaper
arxiv_paper = ArxivPaper(
    entry_id="http://arxiv.org/abs/2406.04744v1",
    updated=datetime(2024, 6, 7, 8, 43, 7, tzinfo=timezone.utc),
    published=datetime(2024, 6, 7, 8, 43, 7, tzinfo=timezone.utc),
    title="CRAG -- Comprehensive RAG Benchmark",
    authors=["Xiao Yang", "Kai Sun", "Hao Xin"],  # Truncated for brevity
    summary="Retrieval-Augmented Generation (RAG) has recently emerged as a promising solution...",  # Truncated
    pdf_url="https://arxiv.org/pdf/2406.04744"
)

In [None]:
@weave.op()
def load_pdf(pdf_url: str) -> str:
    # Download the PDF
    response = requests.get(pdf_url)
    pdf_file = io.BytesIO(response.content)
    
    # Read the PDF
    pdf_reader = PdfReader(pdf_file)
    
    # Extract text from all pages
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    
    return text

In [None]:
# Chain of Density Summarization
@weave.op()
def summarize_current_summary(document: str, instruction: str, current_summary: str = "", iteration: int = 1, model: str = "claude-3-sonnet-20240229"):
    prompt = f"""
    Document: {document}
    Current summary: {current_summary}
    Instruction to focus on: {instruction}
    Iteration: {iteration}

    Generate an increasingly concise, entity-dense, and highly technical summary from the provided document that specifically addresses the given instruction.
    """
    response = anthropic_client.messages.create(
        model=model,
        max_tokens=4096,
        messages=[{"role": "user", "content": prompt}]
    )
    return response.content[0].text

@weave.op()
def iterative_density_summarization(document: str, instruction: str, current_summary: str, density_iterations: int, model: str = "claude-3-sonnet-20240229"):
    iteration_summaries = []
    for iteration in range(1, density_iterations + 1):
        current_summary = summarize_current_summary(document, instruction, current_summary, iteration, model)
        iteration_summaries.append(current_summary)
    return current_summary, iteration_summaries

@weave.op()
def final_summary(instruction: str, current_summary: str, model: str = "claude-3-sonnet-20240229"):
    prompt = f"""
    Given this summary: {current_summary}
    And this instruction to focus on: {instruction}
    Create an extremely dense, final summary that captures all key technical information in the most concise form possible, while specifically addressing the given instruction.
    """
    return anthropic_client.messages.create(
        model=model,
        max_tokens=4096,
        messages=[{"role": "user", "content": prompt}]
    ).content[0].text

@weave.op()
def chain_of_density_summarization(document: str, instruction: str, current_summary: str = "", model: str = "claude-3-sonnet-20240229", density_iterations: int = 2):
    current_summary, iteration_summaries = iterative_density_summarization(document, instruction, current_summary, density_iterations, model)
    final_summary_text = final_summary(instruction, current_summary, model)
    return {
        "final_summary": final_summary_text,
        "accumulated_summary": current_summary,
        "iteration_summaries": iteration_summaries,
    }

In [None]:
# Weave Model
class ArxivChainOfDensityPipeline(weave.Model):
    model: str = "claude-3-sonnet-20240229"
    density_iterations: int = 3

    @weave.op()
    def predict(self, paper: ArxivPaper, instruction: str) -> dict:
        text = load_pdf(paper["pdf_url"])
        result = chain_of_density_summarization(text, instruction, model=self.model, density_iterations=self.density_iterations)
        return result


In [None]:
import json

@weave.op()
def evaluate_summary(summary: str, instruction: str, model: str = "claude-3-sonnet-20240229") -> dict:
    prompt = f"""
    Summary: {summary}
    Instruction: {instruction}

    Evaluate the summary based on the following criteria:
    1. Relevance (1-5): How well does the summary address the given instruction?
    2. Conciseness (1-5): How concise is the summary while retaining key information?
    3. Technical Accuracy (1-5): How accurately does the summary convey technical details?

    Your response MUST be in the following JSON format:
    {{
        "relevance": {{
            "score": <int>,
            "explanation": "<string>"
        }},
        "conciseness": {{
            "score": <int>,
            "explanation": "<string>"
        }},
        "technical_accuracy": {{
            "score": <int>,
            "explanation": "<string>"
        }}
    }}

    Ensure that the scores are integers between 1 and 5, and that the explanations are concise.
    """
    response = anthropic_client.messages.create(
        model=model,
        max_tokens=1000,
        messages=[{"role": "user", "content": prompt}]
    )
    print(response.content[0].text)
    
    eval_dict = json.loads(response.content[0].text)
    
    return {
        "relevance": eval_dict['relevance']['score'],
        "conciseness": eval_dict['conciseness']['score'],
        "technical_accuracy": eval_dict['technical_accuracy']['score'],
        "average_score": sum(eval_dict[k]['score'] for k in eval_dict) / 3,
        "evaluation_text": response.content[0].text
    }

In [None]:
# Create a Weave Dataset
dataset = weave.Dataset(
    name="arxiv_papers",
    rows=[
        {
            "paper": arxiv_paper,
            "instruction": "What was the approach to experimenting with different data mixtures?"
        },
    ]
)

weave.publish(dataset)

In [None]:
# Define the scorer function
@weave.op()
def quality_scorer(instruction: str, model_output: dict) -> dict:
    result = evaluate_summary(model_output["final_summary"], instruction)
    return result

In [None]:
# Run evaluation
evaluation = weave.Evaluation(dataset=dataset, scorers=[quality_scorer])
arxiv_chain_of_density_pipeline = ArxivChainOfDensityPipeline()
results = await evaluation.evaluate(arxiv_chain_of_density_pipeline)