# Import the Zenbase Library

In [None]:
import sys
import subprocess

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {package}: {e}")
        raise

def install_packages(packages):
    for package in packages:
        install_package(package)

try:
    # Check if running in Google Colab
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Install the zenbase package if running in Google Colab
    # install_package('zenbase')
    # Install the zenbse package from a GitHub branch if running in Google Colab
    install_package('git+https://github.com/zenbase-ai/lib.git@main#egg=zenbase&subdirectory=py')

    # List of other packages to install in Google Colab
    additional_packages = [
        'python-dotenv',
        'parea-ai==0.2.164',
        'openai',
        'langchain',
        'langchain_openai'
    ]
    
    # Install additional packages
    install_packages(additional_packages)

# Now import the zenbase library
try:
    import zenbase
except ImportError as e:
    print("Failed to import zenbase: ", e)
    raise

# Configure the Environment

In [1]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."
# os.environ["PAREA_API_KEY"] = "..."

load_dotenv(Path("../../.env.test"), override=True)

True

In [2]:
import nest_asyncio

nest_asyncio.apply()

# Initial Setup

In [3]:
from openai import OpenAI
from parea import Parea, trace

parea = Parea()
openai = OpenAI()

parea.wrap_openai_client(openai)

# What you already have should look like below:

## Your Scoring Function should look like this:

In [None]:
from parea.schemas import Log, EvaluationResult


def score_answer(log: Log) -> EvaluationResult:
    print(log)
    if log.target:
        output = log.output.split("#### ")[-1]
        target = log.target.split("#### ")[-1]
        return EvaluationResult("correctness", int(output == target))


## Your OpenAI Call should look like this with LangChain (It could be with OpenAI too, doesn't matter)


In [4]:
@trace(eval_funcs=[score_answer])
def langchain_chain(inputs):
    from langchain_openai import ChatOpenAI
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_core.output_parsers import StrOutputParser

    messages = [
        (
            "system",
            "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples.",
        ),
        ("user", "{question}")
    ]

    chain = (
        ChatPromptTemplate.from_messages(messages)
        | ChatOpenAI(model="gpt-3.5-turbo")
        | StrOutputParser()
    )

    answer = chain.invoke(inputs)
    return answer


## Your Evaluation should look like this:

In [5]:
experiment_kwargs = dict(
    data="GSM8K_test_set_parea_dataset_2inu4fT1X5IZKj361H21BbD2Mc4",
    n_workers=1,
)

parea.experiment(
    name="wibbly-wobbly",
    func=langchain_chain,
    **experiment_kwargs
).run()

Run name set to: dotty-woes, since a name was not provided.
Fetching test collection: GSM8K_test_set_parea_dataset_2inu4fT1X5IZKj361H21BbD2Mc4
Fetched 5 test cases from collection: GSM8K_test_set_parea_dataset_2inu4fT1X5IZKj361H21BbD2Mc4 


 20%|██        | 1/5 [00:01<00:07,  1.99s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'inputs': '{"question": "Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?"}'}, output='72', target='Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72', latency=2.01057, time_to_first_token=None, input_tokens=0, output_tokens=0, total_tokens=0, cost=0.0, scores=[], trace_id='9599e222-6634-446b-b760-5f09fa9ad536', parent_trace_id='9599e222-6634-446b-b760-5f09fa9ad536', root_trace_id='9599e222-6634-446b-b760-5f09fa9ad536', start_timestamp='2024-07-05T00:51:30.214305+00:00', organization_id=None, project_uuid=None, error=None, status='success', deployment_id=None, cache_hit=False, output_for_eval_metrics='72', evaluation_metric_names=None, apply_eval_frac=1.0, feedback_s

 40%|████      | 2/5 [00:02<00:03,  1.15s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'inputs': '{"question": "Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?"}'}, output='10', target='Weng earns 12/60 = $<<12/60=0.2>>0.2 per minute.\nWorking 50 minutes, she earned 0.2 x 50 = $<<0.2*50=10>>10.\n#### 10', latency=0.569417, time_to_first_token=None, input_tokens=0, output_tokens=0, total_tokens=0, cost=0.0, scores=[], trace_id='4766f4cb-b60b-412d-8df6-535bf31133c3', parent_trace_id='4766f4cb-b60b-412d-8df6-535bf31133c3', root_trace_id='4766f4cb-b60b-412d-8df6-535bf31133c3', start_timestamp='2024-07-05T00:51:32.226771+00:00', organization_id=None, project_uuid=None, error=None, status='success', deployment_id=None, cache_hit=False, output_for_eval_metrics='10', evaluation_metric_names=None, apply_eval_frac=1.0, feedback_score=None, trace_name='langchain_chain', children=[

 60%|██████    | 3/5 [00:03<00:01,  1.20it/s]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'inputs': '{"question": "Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How much more money does Betty need to buy the wallet?"}'}, output='20', target="In the beginning, Betty has only 100 / 2 = $<<100/2=50>>50.\nBetty's grandparents gave her 15 * 2 = $<<15*2=30>>30.\nThis means, Betty needs 100 - 50 - 30 - 15 = $<<100-50-30-15=5>>5 more.\n#### 5", latency=0.452865, time_to_first_token=None, input_tokens=0, output_tokens=0, total_tokens=0, cost=0.0, scores=[], trace_id='fde92030-4868-4e6b-8153-fd6fda30dc35', parent_trace_id='fde92030-4868-4e6b-8153-fd6fda30dc35', root_trace_id='fde92030-4868-4e6b-8153-fd6fda30dc35', start_timestamp='2024-07-05T00:51:32.797150+00:00', organization_id=None, pro

 80%|████████  | 4/5 [00:03<00:00,  1.41it/s]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'inputs': '{"question": "Julie is reading a 120-page book. Yesterday, she was able to read 12 pages and today, she read twice as many pages as yesterday. If she wants to read half of the remaining pages tomorrow, how many pages should she read?"}'}, output='72', target='Maila read 12 x 2 = <<12*2=24>>24 pages today.\nSo she was able to read a total of 12 + 24 = <<12+24=36>>36 pages since yesterday.\nThere are 120 - 36 = <<120-36=84>>84 pages left to be read.\nSince she wants to read half of the remaining pages tomorrow, then she should read 84/2 = <<84/2=42>>42 pages.\n#### 42', latency=0.518624, time_to_first_token=None, input_tokens=0, output_tokens=0, total_tokens=0, cost=0.0, scores=[], trace_id='4718810f-6c1b-429b-9050-66de256d44a6', parent_trace_id='4718810f-6c1b-429b-9050-66de256d44a6', root_trace_id='4718810f-6c1b-429b-9050-66de256d44a6', st

100%|██████████| 5/5 [00:04<00:00,  1.24it/s]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'inputs': '{"question": "James writes a 3-page letter to 2 different friends twice a week.  How many pages does he write a year?"}'}, output='624', target='He writes each friend 3*2=<<3*2=6>>6 pages a week\nSo he writes 6*2=<<6*2=12>>12 pages every week\nThat means he writes 12*52=<<12*52=624>>624 pages a year\n#### 624', latency=0.490708, time_to_first_token=None, input_tokens=0, output_tokens=0, total_tokens=0, cost=0.0, scores=[], trace_id='875af573-96fb-47da-94c6-526d0ecace03', parent_trace_id='875af573-96fb-47da-94c6-526d0ecace03', root_trace_id='875af573-96fb-47da-94c6-526d0ecace03', start_timestamp='2024-07-05T00:51:33.770497+00:00', organization_id=None, project_uuid=None, error=None, status='success', deployment_id=None, cache_hit=False, output_for_eval_metrics='624', evaluation_metric_names=None, apply_eval_frac=1.0, feedback_score=None, t


0it [00:04, ?it/s]


Experiment wibbly-wobbly Run dotty-woes stats:
{
  "latency": "0.81",
  "input_tokens": "0.00",
  "output_tokens": "0.00",
  "total_tokens": "0.00",
  "cost": "0.00000",
  "correctness": "0.60"
}


View experiment & traces at: https://app.parea.ai/experiments/wibbly-wobbly/dfb8442c-6ee3-4fde-a1ed-dfde37123e82


# How you should do the few-shot learning

## Rewrite your langchain_chain function to use the `zenbase` decorators

In [6]:
from zenbase.types import LMRequest, deflm

# Step 1: Add the deflm decorator
# Step 2: Incorporate few-shot demonstrations

@deflm # Step 1
@trace(eval_funcs=[score_answer])
def zen_chain(request: LMRequest):
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_openai import ChatOpenAI

    messages = [
        (
            "system",
            "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples.",
        )
    ]

    # Step 2
    for demo in request.zenbase.task_demos:
        messages += [
            ("user", str(demo.inputs).replace("{", "").replace("}", "")),
            ("assistant", str(demo.outputs['target'])),
        ]
    
    messages.append(("user", "{question}"))
    chain = (
        ChatPromptTemplate.from_messages(messages)
        | ChatOpenAI(model="gpt-3.5-turbo")
        | StrOutputParser()
    )

    answer = chain.invoke(request.inputs)
    return answer


It is still working like before:

In [7]:
zen_chain({"question": "What is 2+2?"})

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [], "model_params": {}}, "inputs": {"question": "What is 2+2?"}, "id": "request_2inyb3jIGYhlocjDNk8w4EnTYQa"}'}, output='4', target=None, latency=0.555932, time_to_first_token=None, input_tokens=0, output_tokens=0, total_tokens=0, cost=0.0, scores=[], trace_id='27b4d578-58ea-46e4-93ad-6548f1bbc7e3', parent_trace_id='27b4d578-58ea-46e4-93ad-6548f1bbc7e3', root_trace_id='27b4d578-58ea-46e4-93ad-6548f1bbc7e3', start_timestamp='2024-07-05T00:51:43.403718+00:00', organization_id=None, project_uuid=None, error=None, status='success', deployment_id=None, cache_hit=False, output_for_eval_metrics='4', evaluation_metric_names=None, apply_eval_frac=1.0, feedback_score=None, trace_name='zen_chain', children=[], end_timestamp='2024-07-05T00:51:43.959650+00:00', end_user_identifier=None, session_id=None, metadata=None, tags=

'4'

### Define your optimizer:

In [8]:
from zenbase.optim.metric.labeled_few_shot import LabeledFewShot
from zenbase.adaptors.parea import ZenParea

zen_parea_helper = ZenParea(parea)

demoset = zen_parea_helper.fetch_dataset_demos("GSM8K_validation_set_parea_dataset_2inu4oPWWEZYS4eBsWLJ3ZR4P1f")
optimizer = LabeledFewShot(demoset=demoset, shots=1)

Run name set to: moldy-kobs, since a name was not provided.
Fetching test collection: GSM8K_test_set_parea_dataset_2inu4fT1X5IZKj361H21BbD2Mc4
Fetched 5 test cases from collection: GSM8K_test_set_parea_dataset_2inu4fT1X5IZKj361H21BbD2Mc4 


 20%|██        | 1/5 [00:01<00:05,  1.31s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?\\"}", "id": "request_2inybPvI3uItoFfQWJNSoIByHDC"}'},

 40%|████      | 2/5 [00:02<00:03,  1.09s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?\\"}", "id": "request_2inybZLdSk25SUpDiSTx5awN7oM"}'}, output='Weng earns $12 an hour for babysi

 60%|██████    | 3/5 [00:03<00:02,  1.10s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How

 80%|████████  | 4/5 [00:04<00:01,  1.29s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"Julie is reading a 120-page book. Yesterday, she was able to read 12 pages and today, she read twice as many pages as yesterday. If she wants to read half of the remaining pages tomorrow, how many pages should

100%|██████████| 5/5 [00:06<00:00,  1.29s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"James writes a 3-page letter to 2 different friends twice a week.  How many pages does he write a year?\\"}", "id": "request_2inyc1J0lAIkqI2GZDT5qbFb4nQ"}'}, output='James writes 3 pages to 2 friends, twice a 


0it [00:04, ?it/s]


Experiment zenbase-open-source-grid-enabled Run moldy-kobs stats:
{
  "latency": "1.29",
  "input_tokens": "0.00",
  "output_tokens": "0.00",
  "total_tokens": "0.00",
  "cost": "0.00000",
  "correctness": "1.00"
}


View experiment & traces at: https://app.parea.ai/experiments/zenbase-open-source-grid-enabled/004f5366-661e-43ce-b299-400f8954218b

Run name set to: sedgy-huia, since a name was not provided.
Fetching test collection: GSM8K_test_set_parea_dataset_2inu4fT1X5IZKj361H21BbD2Mc4
Fetched 5 test cases from collection: GSM8K_test_set_parea_dataset_2inu4fT1X5IZKj361H21BbD2Mc4 


 20%|██        | 1/5 [00:01<00:05,  1.30s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?\\"}", "id": "request_2inyd0iC1Su5M7LvkmPWrtvMzFQ"}'},

 40%|████      | 2/5 [00:02<00:04,  1.34s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?\\"}", "id": "request_2inyd6XvgIjwSBpt23iehUVT3ks"}'}, output='Weng earns $12 for 60 minutes of 

 60%|██████    | 3/5 [00:03<00:02,  1.26s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"Betty is saving money for a new wallet which costs $100. Betty has only half of the money she needs. Her parents decided to give her $15 for that purpose, and her grandparents twice as much as her parents. How

 80%|████████  | 4/5 [00:05<00:01,  1.43s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"Julie is reading a 120-page book. Yesterday, she was able to read 12 pages and today, she read twice as many pages as yesterday. If she wants to read half of the remaining pages tomorrow, how many pages should

100%|██████████| 5/5 [00:06<00:00,  1.34s/it]

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": "{\\"question\\": \\"James writes a 3-page letter to 2 different friends twice a week.  How many pages does he write a year?\\"}", "id": "request_2inyddBDMk4RbgMhHaiIbKKOHTx"}'}, output='James writes 3 pages to 2 friends, twice a 


0it [00:04, ?it/s]


Experiment zenbase-balanced-methodical Run sedgy-huia stats:
{
  "latency": "1.34",
  "input_tokens": "0.00",
  "output_tokens": "0.00",
  "total_tokens": "0.00",
  "cost": "0.00000",
  "correctness": "1.00"
}


View experiment & traces at: https://app.parea.ai/experiments/zenbase-balanced-methodical/e6bb2b6a-2673-4eea-b712-64c4fdcb33d1


### Perform the optimization

In [None]:
best_fn, candidate_results, _= optimizer.perform(
    zen_chain,
    evaluator=ZenParea.metric_evaluator(**experiment_kwargs),
    samples=2,
    rounds=1,
)

### Use the best function

In [9]:
output = best_fn({"question": "What is 2+2?"})
output

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": {"question": "What is 2+2?"}, "id": "request_2inyeOhT2stVxhoszw9ygae3vVE"}'}, output='4', target=None, latency=0.570808, time_to_first_token=None, input_tokens=0, output_tokens=0, total_tokens=0, cost=0.0, scores=[], trace_id='814

'4'

### Save the best function

In [11]:
# You can also save the zenbase params for re-use
import pickle

pickled_zenbase = pickle.dumps(best_fn.zenbase)
zen_chain.zenbase = pickle.loads(pickled_zenbase)

zen_chain({"question": "What is 2 + 2?"}) # uses the best few-shot demos

TraceLog(configuration=LLMInputs(model=None, provider=None, model_params=None, messages=None, functions=None, function_call=None), inputs={'request': '{"zenbase": {"task_demos": [{"inputs": {"inputs": "{\\"question\\": \\"Toulouse has twice as many sheep as Charleston. Charleston has 4 times as many sheep as Seattle. How many sheep do Toulouse, Charleston, and Seattle have together if Seattle has 20 sheep?\\"}"}, "outputs": {"target": "If Seattle has 20 sheep, Charleston has 4 * 20 sheep = <<20*4=80>>80 sheep\\nToulouse has twice as many sheep as Charleston, which is 2 * 80 sheep = <<2*80=160>>160 sheep\\nTogether, the three has 20 sheep + 160 sheep + 80 sheep = <<20+160+80=260>>260 sheep\\n#### 260"}, "original_object": null}], "model_params": {}}, "inputs": {"question": "What is 2 + 2?"}, "id": "request_2inyijg5rjE6KfwqZUbKUnRGLof"}'}, output='4', target=None, latency=0.519367, time_to_first_token=None, input_tokens=0, output_tokens=0, total_tokens=0, cost=0.0, scores=[], trace_id='0

'4'