# Import the Zenbase Library

In [1]:
import sys
import subprocess

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {package}: {e}")
        raise

def install_packages(packages):
    for package in packages:
        install_package(package)

try:
    # Check if running in Google Colab
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Install the zenbase package if running in Google Colab
    # install_package('zenbase')
    # Install the zenbse package from a GitHub branch if running in Google Colab
    install_package('git+https://github.com/zenbase-ai/lib.git@main#egg=zenbase&subdirectory=py')

    # List of other packages to install in Google Colab
    additional_packages = [
        'python-dotenv',
        'parea-ai==0.2.164',
        'openai',
        'langchain',
        'langchain_openai'
    ]
    
    # Install additional packages
    install_packages(additional_packages)

# Now import the zenbase library
try:
    import zenbase
except ImportError as e:
    print("Failed to import zenbase: ", e)
    raise

# Configure the Environment

In [2]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."
# os.environ["PAREA_API_KEY"] = "..."

load_dotenv(Path("../../.env.test"), override=True)

True

# Initial Setup

In [3]:
import nest_asyncio

nest_asyncio.apply()

# Initial Setup


In [4]:
from openai import OpenAI
from parea import Parea, trace

parea = Parea()
openai = OpenAI()

parea.wrap_openai_client(openai)

# Now, you probably already have some LLM code.

## And let's say you have an eval function like this

In [17]:
from parea.schemas import Log, EvaluationResult

def score_answer_with_json(log: Log) -> EvaluationResult:
    if log.target:
        output = str(expand_nested_json(log.output)["answer"])
        target = log.target.split("#### ")[-1]
        return EvaluationResult("correctness", int(output == target))


It could use the OpenAI SDK, LangChain, or anything really. But it looks something like this:

In [39]:
@trace(eval_funcs=[score_answer_with_json])
def solver(inputs):
    if isinstance(inputs, str):
        inputs = expand_nested_json(inputs)
    
    plan = planner_chain(inputs)
    operation = operation_finder({
        "plan": plan["plan"],
        "question": inputs["question"],
    })

    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Solve the given problem using the provided plan and operation. Return only the final numerical answer in JSON format."
        },
        {"role": "user", "content": f"Question: {inputs['question']}"},
        {"role": "user", "content": f"Plan: {plan['plan']}"},
        {"role": "user", "content": f"Required mathematical operation: {operation['operation']}"},
        {"role": "user", "content": "Provide the final answer as a number in JSON format: {\"answer\": YOUR_NUMERICAL_ANSWER}"},
    ]

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    answer = json.loads(response.choices[0].message.content)
    return {"answer": answer["answer"]}

@trace
def planner_chain(inputs):
    if isinstance(inputs, str):
        inputs = expand_nested_json(inputs)
    
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Create a step-by-step plan to solve the given math problem. Return the plan as a JSON object with a 'plan' key."
        },
        {"role": "user", "content": inputs["question"]},
    ]

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    answer = json.loads(response.choices[0].message.content)
    return {"plan": answer["plan"]}

@trace
def operation_finder(inputs):
    if isinstance(inputs, str):
        inputs = expand_nested_json(inputs)
        
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Identify the primary mathematical operation needed to solve the problem based on the given question and plan. Use simple operations like addition, subtraction, multiplication, or division. Return the operation as a JSON object with an 'operation' key."
        },
        {"role": "user", "content": f"Question: {inputs['question']}"},
        {"role": "user", "content": f"Plan: {inputs['plan']}"},
    ]

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    answer = json.loads(response.choices[0].message.content)
    return {"operation": answer["operation"]}

Test your function:

In [40]:
solver({"question": "What is 2+2?"})

{'answer': 4}

## Then you're probably evaluating like this

In [41]:
experiment_kwargs = dict(
    data="GSM8K_test_set_parea_dataset_2iyvfWLaZFAhZ6V6NGa8gp3WKBx",
    n_workers=1,
)

parea.experiment(
    name="wibbly-wobbly",
    func=solver,
    **experiment_kwargs
).run()

Run name set to: funny-hard, since a name was not provided.
Fetching test collection: GSM8K_test_set_parea_dataset_2iyvfWLaZFAhZ6V6NGa8gp3WKBx
Fetched 5 test cases from collection: GSM8K_test_set_parea_dataset_2iyvfWLaZFAhZ6V6NGa8gp3WKBx 


 20%|██        | 1/5 [00:02<00:09,  2.42s/it]Future exception was never retrieved
future: <Future finished exception=TypeError('string indices must be integers')>
Traceback (most recent call last):
  File "/Users/amir/.rye/py/cpython@3.10.13/lib/python3.10/concurrent/futures/thread.py", line 58, in run
    result = self.fn(*self.args, **self.kwargs)
  File "/Users/amir/workspace/zenbase-lib/py/.venv/lib/python3.10/site-packages/parea/experiment/experiment.py", line 140, in limit_concurrency_sync
    return func(_parea_target_field=target, **sample_copy)
  File "/Users/amir/workspace/zenbase-lib/py/.venv/lib/python3.10/site-packages/parea/utils/trace_utils.py", line 278, in wrapper
    raise e
  File "/Users/amir/workspace/zenbase-lib/py/.venv/lib/python3.10/site-packages/parea/utils/trace_utils.py", line 271, in wrapper
    result = func(*args, **kwargs)
  File "/var/folders/47/_rbxm97j0kl5nm_6vv2m8h540000gn/T/ipykernel_39150/751121847.py", line 6, in solver
    "question": inputs["que

Experiment wibbly-wobbly Run funny-hard stats:
{
  "latency": "3.13",
  "input_tokens": "0.00",
  "output_tokens": "0.00",
  "total_tokens": "0.00",
  "cost": "0.00000",
  "correctness": "0.60"
}


View experiment & traces at: https://app.parea.ai/experiments/wibbly-wobbly/9c2a54e4-ae0d-4195-ac9e-03e4cee92153


 # Now, how can we optimize this score?

## First, initialize the Zenbase ZenbaseTracer

In [43]:
from zenbase.core.managers import ZenbaseTracer

zenbase_tracer = ZenbaseTracer()

## For Lunary, we have to update our eval function a little bit

In [48]:
def score_answer_with_json(log: Log) -> EvaluationResult:
    if log.target:
        output = str(expand_nested_json(log.output)["answer"])
        target = log.target.split("#### ")[-1]
        return EvaluationResult("correctness", int(output == target))


## Hook up Zenbase to your functions

1. Use the `zenbase_tracer` decorator.
2. Change function inputs to request
3. Use request's `zenbase.task_demos` to get the few-shot examples for the task and add them however you would like into your prompt.
4. If you need to use just a few examples, you can use `request.zenbase.task_demos[:2]` to get the first two examples.

In [64]:
from zenbase.types import LMRequest
import json
from zenbase.utils import expand_nested_json
from parea.schemas import EvaluationResult
import openai

@zenbase_tracer # it is 1
@trace(eval_funcs=[score_answer_with_json])
def solver(request: LMRequest): # it is 2
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Solve the given problem using the provided plan and operation. Return only the final numerical answer in JSON format and the key of answer."
        },
    ]

    for demo in request.zenbase.task_demos: # it is 3
        messages += [
            {"role": "user", "content": f"Example Question: {str(demo.inputs)}"},
            {"role": "assistant", "content": f"Example Answer: {str(demo.outputs)}"},
        ]

    plan = planner_chain(request.inputs)
    the_plan = plan["plan"]
    the_operation = operation_finder(
        {
            "plan": the_plan,
            "question": request.inputs["question"],
        }
    )

    messages.append({"role": "user", "content": f"Question: {request.inputs['question']}"})
    messages.append({"role": "user", "content": f"Plan: {the_plan}"})
    messages.append(
        {"role": "user", "content": f"Mathematical Operation needed: {the_operation['operation']}"}
    )
    messages.append(
        {"role": "user", "content": "Provide the answer as a number in JSON format."}
    )

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    answer = json.loads(response.choices[0].message.content)
    return {"answer": answer["answer"]}

@zenbase_tracer # it is 1
@trace
def planner_chain(request: LMRequest): # it is 2
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Create a step-by-step plan to solve the given math problem. Return the plan as a JSON object with a 'plan' key."
        },
    ]

    if request.zenbase.task_demos: # it is 3
        for demo in request.zenbase.task_demos[:2]: # it is 4
            messages += [
                {"role": "user", "content": str(demo.inputs)},
                {"role": "assistant", "content": str(demo.outputs)},
            ]

    messages.append({"role": "user", "content": request.inputs.get("question", "What is 2 + 2?")})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    answer = json.loads(response.choices[0].message.content)
    return {"plan": " ".join(answer["plan"])}

@zenbase_tracer # it is 1
@trace
def operation_finder(request: LMRequest): # it is 2
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Identify the primary mathematical operation needed to solve the problem based on the given question and plan. Use simple operations like addition, subtraction, multiplication, or division. Return the operation as a JSON object with an 'operation' key."
        },
    ]

    if request.zenbase.task_demos: # it is 3
        for demo in request.zenbase.task_demos[:2]: # it is 4
            messages += [
                {"role": "user", "content": f"Input: {str(demo.inputs)}"},
                {"role": "assistant", "content": str(demo.outputs)},
            ]

    messages.append({"role": "user", "content": f"Question: {request.inputs['question']}"})
    messages.append({"role": "user", "content": f"Plan: {request.inputs['plan']}"})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    answer = json.loads(response.choices[0].message.content)
    return {"operation": answer["operation"]}

## Now we can optimize!

### Set up your optimizer:

In [65]:
from zenbase.optim.metric.bootstrap_few_shot import BootstrapFewShot
from zenbase.adaptors.parea import ZenParea

zen_parea_adaptor = ZenParea(parea)

TRAIN_SET = "GSM8K_train_set_parea_dataset_2inu4fpM2Q5zIo0JDbtXZOeaG9Y"
TEST_SET = "GSM8K_test_set_parea_dataset_2inu4fT1X5IZKj361H21BbD2Mc4"
VALIDATION_SET = "GSM8K_validation_set_parea_dataset_2inu3r5jX6ruauOovt2rf5L5LHG"
SHOTS = 2
SAMPLES = 2

evaluator_kwargs = dict(
    p=parea,
    n_workers=1,
)

bootstrap_few_shot = BootstrapFewShot(
    shots=SHOTS,
    training_set=TRAIN_SET,
    test_set=TEST_SET,
    validation_set=VALIDATION_SET,
    evaluator_kwargs=evaluator_kwargs,
    zen_adaptor=zen_parea_adaptor,
)


### Do the optimization

In [66]:
# Empty the traces
zenbase_tracer.all_traces = {}
# Run the optimization
best_fn, candidates = bootstrap_few_shot.perform(
    solver,
    samples=SAMPLES,
    rounds=1,
    trace_manager=zenbase_tracer,
)

Run name set to: hilly-soya, since a name was not provided.


100%|██████████| 5/5 [00:13<00:00,  2.68s/it]
0it [00:04, ?it/s]


Experiment zenbase-triple-buffered-needs-based Run hilly-soya stats:
{
  "latency": "2.68",
  "input_tokens": "0.00",
  "output_tokens": "0.00",
  "total_tokens": "0.00",
  "cost": "0.00000",
  "correctness": "0.60"
}


View experiment & traces at: https://app.parea.ai/experiments/zenbase-triple-buffered-needs-based/3f017db0-b941-4915-889a-f53b4b5ecaab
Num. experiments: 1
Run name set to: jazzy-tamp, since a name was not provided.


100%|██████████| 2/2 [00:11<00:00,  5.56s/it]
0it [00:04, ?it/s]


Experiment zenbase-sharable-5thgeneration Run jazzy-tamp stats:
{
  "latency": "5.57",
  "input_tokens": "0.00",
  "output_tokens": "0.00",
  "total_tokens": "0.00",
  "cost": "0.00000",
  "correctness": "0.50"
}


View experiment & traces at: https://app.parea.ai/experiments/zenbase-sharable-5thgeneration/86c1596b-e8c2-499c-99d1-a6fd6440972c
Num. experiments: 1
Run name set to: snuff-loot, since a name was not provided.


100%|██████████| 2/2 [00:08<00:00,  4.05s/it]
0it [00:04, ?it/s]


Experiment zenbase-profound-composite Run snuff-loot stats:
{
  "latency": "4.11",
  "input_tokens": "0.00",
  "output_tokens": "0.00",
  "total_tokens": "0.00",
  "cost": "0.00000",
  "correctness": "0.50"
}


View experiment & traces at: https://app.parea.ai/experiments/zenbase-profound-composite/7fbe7752-0f7f-4161-9d3f-f93d37eade11
Num. experiments: 1
Run name set to: girly-kale, since a name was not provided.


100%|██████████| 5/5 [00:16<00:00,  3.36s/it]
0it [00:04, ?it/s]


Experiment zenbase-integrated-encompassing Run girly-kale stats:
{
  "latency": "3.37",
  "input_tokens": "0.00",
  "output_tokens": "0.00",
  "total_tokens": "0.00",
  "cost": "0.00000",
  "correctness": "1.00"
}


View experiment & traces at: https://app.parea.ai/experiments/zenbase-integrated-encompassing/a3b4de9d-601d-44f0-8248-0cb0444bb01c
Num. experiments: 1
Run name set to: boned-reef, since a name was not provided.


100%|██████████| 5/5 [00:14<00:00,  3.00s/it]
0it [00:04, ?it/s]


Experiment zenbase-profound-static Run boned-reef stats:
{
  "latency": "3.00",
  "input_tokens": "0.00",
  "output_tokens": "0.00",
  "total_tokens": "0.00",
  "cost": "0.00000",
  "correctness": "1.00"
}


View experiment & traces at: https://app.parea.ai/experiments/zenbase-profound-static/7a50a5b9-8122-4915-bc6d-c4392e71af24
Num. experiments: 1


### Use your optimized function

In [68]:
zenbase_tracer.all_traces = {}
best_fn({"question": "What is 2 + 2?"})

{'answer': 4}

### Introspect function traces


In [69]:
function_traces = [v for k, v in zenbase_tracer.all_traces.items()][0]["optimized"]

### Check the optimized parameters for solver


In [70]:
from pprint import pprint

pprint(function_traces["solver"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'answer': '10'},
        adaptor_object=None),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which '
                            'costs $100. Betty has only half of the money she '
                            'needs. Her parents decided to give her $15 for '
                            'that purpose, and her grandparents twice as much '
                            'as her parents. How much more money does Betty '
                            'need to buy the wallet?'},
        outputs={'answer': '5'},
        adaptor_object=None),
 LMDemo(inputs={'question': 'Julie is reading a 120-page book. Yesterday, she '
                            'was able to read 12 pages and today, she read '
                            'twice as many pages as yesterda

### Check the optimized parameters for planner_chain


In [71]:
from pprint import pprint

pprint(function_traces["planner_chain"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'plan': 'First, convert 50 minutes to hours by dividing by 60 '
                         'minutes/hour to get 50/60 = 5/6 hour. Next, multiply '
                         'the hours babysat by the hourly rate to find the '
                         'total earnings: (5/6) * $12 = $10.'},
        adaptor_object=None),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which '
                            'costs $100. Betty has only half of the money she '
                            'needs. Her parents decided to give her $15 for '
                            'that purpose, and her grandparents twice as much '
                            'as her parents. How much more money does Betty '
                            'need to buy the wallet?'},
        out

### Check the optimized parameters for operation_finder


In [72]:
from pprint import pprint

pprint(function_traces["operation_finder"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'plan': 'First, convert 50 minutes to hours by dividing by 60 '
                        'minutes/hour to get 50/60 = 5/6 hour. Next, multiply '
                        'the hours babysat by the hourly rate to find the '
                        'total earnings: (5/6) * $12 = $10.',
                'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'operation': 'multiplication'},
        adaptor_object=None),
 LMDemo(inputs={'plan': 'The cost of the wallet is $100 Betty has only half of '
                        'the money she needs, which is $100/2 = $50 Her '
                        'parents gave her $15 Her grandparents gave her twice '
                        'as much as her parents, which is 2 * $15 = $30 The '
                        'total amount of money she has now is $50 + $15 + $30 '
                      

## How to save the function and load it later


### Save the optimized function args to a file


In [73]:
bootstrap_few_shot.save_optimizer_args("bootstrap_few_shot_args.zenbase")

### Load the optimized function args with the function


In [74]:
bootstrap_few_shot.save_optimizer_args("bootstrap_few_shot_args.zenbase")

optimized_function = bootstrap_few_shot.load_optimizer_and_function("bootstrap_few_shot_args.zenbase", solver, zenbase_tracer)

### Use the loaded function and make sure it loaded the demos.


In [75]:
zenbase_tracer.all_traces = {}
optimized_function({"question": "If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?"})
function_traces = [v for k, v in zenbase_tracer.all_traces.items()][0]["optimized"]
from pprint import pprint

pprint(function_traces["solver"]["args"]["request"].zenbase.task_demos)
pprint(function_traces["planner_chain"]["args"]["request"].zenbase.task_demos)
pprint(function_traces["operation_finder"]["args"]["request"].zenbase.task_demos)

[LMDemo(inputs={'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'answer': '10'},
        adaptor_object=None),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which '
                            'costs $100. Betty has only half of the money she '
                            'needs. Her parents decided to give her $15 for '
                            'that purpose, and her grandparents twice as much '
                            'as her parents. How much more money does Betty '
                            'need to buy the wallet?'},
        outputs={'answer': '5'},
        adaptor_object=None),
 LMDemo(inputs={'question': 'Julie is reading a 120-page book. Yesterday, she '
                            'was able to read 12 pages and today, she read '
                            'twice as many pages as yesterda