<a href="https://colab.research.google.com/github/withpi/cookbook-withpi/blob/main/colabs/Braintrust_PI_Integration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
%pip install -U braintrust openai datasets autoevals

In [None]:
# @title Setup API Keys

from google.colab import userdata
import os

os.environ["BRAINTRUST_API_KEY"] = userdata.get("BRAINTRUST_API_KEY")

# Get PI API key: https://build.withpi.ai/account/keys
os.environ["WITHPI_API_KEY"] = userdata.get('WITHPI_API_KEY')

In [None]:
# @title Load a sample dataset

from datasets import load_dataset

ds = load_dataset("withpi/mlmastery_com_blogs_condensed", split="train")

topics = ds["topic"][:5]

data = [{"input": topic} for topic in topics]

display(data)

[{'input': 'Tips for beginners to get started with deep learning, including mastering machine learning fundamentals, choosing a framework, understanding neural network architectures, starting with simple projects, and practicing regularly while engaging with the community.'},
 {'input': 'The topic of this blog post is: "Understanding the Data Science Mind Map: A comprehensive guide to essential Python packages for data preparation, visualization, and statistical analysis, with an emphasis on storytelling in data science."'},
 {'input': 'The 5 Most Influential Machine Learning Research Papers of 2024 and Their Contributions to AI Advancement'},
 {'input': 'Cross-validation techniques for comprehensive model evaluation beyond simple train-test splits'},
 {'input': 'Creating an effective machine learning portfolio that demonstrates practical skills and helps land job opportunities in the competitive ML industry.'}]

In [None]:
# @title Braintrust tracing setup

import braintrust
from openai import OpenAI

MODEL = "gpt-4o-mini"

client = braintrust.wrap_openai(
    OpenAI(
        base_url="https://api.braintrust.dev/v1/proxy",
        api_key=os.environ["BRAINTRUST_API_KEY"],
    )
)

@braintrust.traced
def generate_blog_post(input):
    messages = [
        {
            "role": "system",
            "content": """You are a specialized blog post writer. Given a topic, write a technical blog post. Here are specific instructions:
- Make sure that the blog is approximately under 500 words
- The blog should be technical in nature with clear instructions
""",
        },
        {
            "role": "user",
            "content": input,
        },
    ]
    result = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        max_tokens=4096,
    )
    return result.choices[0].message.content



In [None]:
# @title Pi-Scorer Setup

import os
import requests
from autoevals import ScorerWithPartial
from braintrust_core.score import Score

PI_API_URL = "https://api.withpi.ai/v1/scoring_system/score"
HEADERS = {
    "Content-Type": "application/json",
    "x-api-key": os.environ.get("WITHPI_API_KEY"),
}

class PiScorerBase(ScorerWithPartial):
    question: str = ""
    label: str = ""

    def _run_eval_sync(self, output, expected=None, **kwargs):
        assert "input" in kwargs, "Missing 'input' in kwargs"
        payload = {
            "llm_input": kwargs["input"],
            "llm_output": output,
            "scoring_spec": [{"question": self.question}]
        }
        response = requests.post(PI_API_URL, headers=HEADERS, json=payload)
        pi_score = response.json()
        return Score(name=self.label or self._name(), score=pi_score["total_score"])


class PiScorerConclusion(PiScorerBase):
    question = "Does the post have a conclusion section?"
    label = "conclusion"

class PiScorerPitfall(PiScorerBase):
    question = "Does the post call out potential pitfalls or common mistakes?"
    label = "pitfall"




In [None]:
# @title Run eval

await braintrust.Eval(
    "Blog Post Generator",
    data=data,
    task=generate_blog_post,
    scores=[PiScorerConclusion, PiScorerPitfall],
    experiment_name="Pi Blog Post",
)

Experiment Pi Blog Post-47e9fc56 is running at https://www.braintrust.dev/app/Pi%20Labs/p/Blog%20Post%20Generator/experiments/Pi%20Blog%20Post-47e9fc56
Blog Post Generator [experiment_name=Pi Blog Post] (data): 5it [00:00, 15022.58it/s]


Blog Post Generator [experiment_name=Pi Blog Post] (tasks):   0%|          | 0/5 [00:00<?, ?it/s]


Pi Blog Post-47e9fc56 compared to Pi Blog Post-f3d5ea05:
100.00% 'conclusion' score
00.58% 'pitfall'    score

1745614988.48s start
1745615011.10s end
22.06s (-262.47%) 'duration'         	(2 improvements, 0 regressions)
12.42s (-316.88%) 'llm_duration'     	(2 improvements, 0 regressions)
85.80tok (-) 'prompt_tokens'    	(0 improvements, 0 regressions)
819.80tok (-2000.00%) 'completion_tokens'	(1 improvements, 1 regressions)
905.60tok (-2000.00%) 'total_tokens'     	(1 improvements, 1 regressions)
0.00$ (-00.00%) 'estimated_cost'   	(0 improvements, 0 regressions)

See results for Pi Blog Post-47e9fc56 at https://www.braintrust.dev/app/Pi%20Labs/p/Blog%20Post%20Generator/experiments/Pi%20Blog%20Post-47e9fc56


EvalResultWithSummary(summary="...", results=[...])

In [None]:
# See results: https://www.braintrust.dev/app/Pi%20Labs/p/Blog%20Post%20Generator/experiments/Pi%20Blog%20Post-47e9fc56?c=