Install packages:

In [1]:
import sys
import subprocess

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {package}: {e}")
        raise

def install_packages(packages):
    for package in packages:
        install_package(package)

try:
    # Check if running in Google Colab
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Install the zenbase package if running in Google Colab
    # install_package('zenbase')
    # Install the zenbse package from a GitHub branch if running in Google Colab
    install_package('git+https://github.com/zenbase-ai/lib.git@main#egg=zenbase&subdirectory=py')

    # List of other packages to install in Google Colab
    additional_packages = [
        'python-dotenv',
        'langsmith[vcr]',
        'openai',
        'langchain',
        'langchain_openai'
    ]
    
    # Install additional packages
    install_packages(additional_packages)

# Now import the zenbase library
try:
    import zenbase
except ImportError as e:
    print("Failed to import zenbase: ", e)
    raise

In [2]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."
# os.environ["LANGCHAIN_API_KEY"] = "..."
# os.environ["LANGCHAIN_TRACING_V2"] = "true"

load_dotenv(Path("../../.env.test"), override=True)

True

In [3]:
import nest_asyncio

nest_asyncio.apply()

In [4]:
import json

from langsmith import traceable
from langsmith.schemas import Run, Example
from langsmith.wrappers import wrap_openai
from openai import OpenAI

openai = wrap_openai(OpenAI())

# Define your LLM function
@traceable
def openai_json_response(inputs: dict) -> dict:
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples. Think step by step. Respond with a JSON object with a key of 'answer'.",
        },
        {"role": "user", "content": json.dumps(inputs)}
    ]

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)

# Define your Langsmith evaluator
def score_answer(run: Run, example: Example):
    print(run.outputs)
    match (answer := run.outputs["answer"]):
        case int():
            output = str(answer).strip()
        case str():
            output = answer.split("#### ")[-1].strip()
    target = example.outputs["answer"].split("#### ")[-1].strip()
    return {
        "key": "correctness",
        "score": int(output == target),
    }

In [5]:
# Evaluate using LangSmith
from langsmith import Client, evaluate

langsmith = Client()
evalset = list(langsmith.list_examples(dataset_name="GSM8K_test_set_langsmith_dataset_2ii5SKBzVHu3UVUmiFIxFxSvsFm"))

evaluate_kwargs = dict(
    data=evalset,
    evaluators=[score_answer],
    client=langsmith,
    max_concurrency=2,
)

evaluate(openai_json_response, **evaluate_kwargs)

View the evaluation results for experiment: 'healthy-juice-57' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=c88b49d5-9e79-4a9f-8151-2d5aa895f882


0it [00:00, ?it/s]

{'answer': '3'}{'answer': 360}


<ExperimentResults healthy-juice-57>

In [6]:
from zenbase.core.managers import ZenbaseTracer
trace_manager = ZenbaseTracer()

# Define trace manager for Zenbase
from zenbase.adaptors.langchain import ZenLangSmith

zen_langsmith = ZenLangSmith(langsmith)

In [7]:
from zenbase.types import LMRequest

# Wrap your existing chain with @deflm and take in a `LMRequest` object
# An LMRequest has the inputs for your chain and has a `zenbase` attribute.
# This `zenbase` attribute includes the fields that Zenbase optimises.

# LMRequest.inputs => LM function inputs
# LMRequest.zenbase => optimized LLM params

@trace_manager.trace_function
@traceable
def openai_json_response_2(request: LMRequest) -> dict:
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples. Think step by step. Respond with a JSON object.",
        },
    ]

    for demo in request.zenbase.task_demos:
        messages += [
            {"role": "user", "content": json.dumps(demo.inputs)},
            {"role": "assistant", "content": json.dumps(demo.outputs)},
        ]
    messages.append({"role": "user", "content": json.dumps(request.inputs)})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)


@trace_manager.trace_function
@traceable
def openai_json_response(request: LMRequest) -> dict:
    openai_json_response_2(request.inputs)
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples. Think step by step. Respond with a JSON object.",
        },
    ]

    for demo in request.zenbase.task_demos:
        messages += [
            {"role": "user", "content": json.dumps(demo.inputs)},
            {"role": "assistant", "content": json.dumps(demo.outputs)},
        ]
    messages.append({"role": "user", "content": json.dumps(request.inputs)})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)



In [8]:
from zenbase.adaptors.langchain import ZenLangSmith
from zenbase.optim.metric.labeled_few_shot import LabeledFewShot

demoset = ZenLangSmith.examples_to_demos(
    langsmith.list_examples(dataset_name="GSM8K_train_set_langsmith_dataset_2ii5SPh2qGRjlCzFIkp8d8qcKhH")
)
optimizer = LabeledFewShot(demoset=demoset, shots=3)

best_fn, candidates, _ = optimizer.perform(
    # Pass deflm decorated function
    openai_json_response,
    # Exactly the same as what you are passing to your evaluate function
    evaluator=ZenLangSmith.metric_evaluator(**evaluate_kwargs),
    samples=5,
    rounds=1,
)

View the evaluation results for experiment: 'zenbase-automated-demand-driven-25d9ff96' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=c7f466ad-6041-436e-91fc-b7d1e226a782


0it [00:00, ?it/s]

{'answer': "Janet uses 3+4=<<3+4=7>>7 eggs for breakfast and muffins daily.\nTherefore, she has 16-7=<<16-7=9>>9 eggs to sell daily.\nThese 9 eggs will make her 9*2=<<9*2=18>>18 dollars every day at the farmers' market.\n#### 18"}{'answer': 'A robe takes 2 bolts of blue fiber and 2/2 = <<2/2=1>>1 bolt of white fiber.\nTherefore, it takes 2 + 1 = <<2+1=3>>3 bolts in total.\n#### 3'}
View the evaluation results for experiment: 'zenbase-open-architected-asynchronous-13537dd9' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=0ed1f346-1398-4300-ba47-53d4f3d36089


0it [00:00, ?it/s]

{'answer': 'A robe takes 2 + 2/2 = <<2+2/2=3>>3 bolts in total.\n#### 3'}{'answer': "Janet uses 3 + 4 = <<3+4=7>>7 eggs every day for breakfast and muffins.\nThe number of eggs she sells daily is 16 - 7 = <<16-7=9>>9.\nShe makes 9 x 2 = $<<9*2=18>>18 every day at the farmers' market.\n#### 18"}
View the evaluation results for experiment: 'zenbase-progressive-non-volatile-3cafce42' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=24a50358-6420-4217-8efe-f74c24ae53b8


0it [00:00, ?it/s]

{'answer': 'A robe takes 2 bolts of blue fiber and 2 / 2 = 1 bolt of white fiber.\nIn total, a robe takes 2 + 1 = 3 bolts.\n#### 3'}
{'answer': "Janet uses 3 + 4 = 7 eggs for breakfast and baking.\nSo, she sells 16 - 7 = 9 eggs at the farmers' market.\nShe makes 9 x 2 = $<<9*2=18>>18 every day at the farmers' market.\n#### 18"}
View the evaluation results for experiment: 'zenbase-implemented-bandwidth-monitored-8ea39662' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=cf93bf56-1b89-4279-b362-4fd51f6f9d33


0it [00:00, ?it/s]

{'answer': 'A robe takes 2 bolts of blue fiber and 2/2=<<2/2=1>>1 bolt of white fiber.\nSo in total, a robe takes 2 + 1 = <<2+1=3>>3 bolts.\n#### 3'}{'answer': "Janet uses 3+4 = <<3+4=7>>7 eggs every day for breakfast and muffins.\nSo the ducks lay 16-7 = <<16-7=9>>9 eggs that she sells every day.\nShe makes 9*2 = <<9*2=18>>18 dollars every day at the farmers' market.\n#### 18"}
View the evaluation results for experiment: 'zenbase-upgradable-grid-enabled-615e3717' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=962d3f51-1d18-49b8-9dd1-5abe094b7e6b


0it [00:00, ?it/s]

{'answer': 'It takes 2/2 = <<2/2=1>>1 bolt of white fiber.\nSo, in total, it takes 2 + 1 = <<2+1=3>>3 bolts.\n#### 3'}
{'answer': "Janet has 16-3-4=<<16-3-4=9>>9 eggs left for sale every day.\nSince she sells the eggs at $2 each, she makes 9*2=$<<9*2=18>>18 every day at the farmers' market.\n#### 18"}


In [9]:
from zenbase.optim.metric.bootstrap_few_shot import BootstrapFewShot

# Define your Langsmith and helper
langsmith = Client()
langsmith_helper = ZenLangSmith(client=langsmith)

TRAIN_SET = "GSM8K_train_set_langsmith_dataset_2iiXubwwzCNQo4yokckJiUn98UG"
TEST_SET = "GSM8K_test_set_langsmith_dataset_2iiXuiuySmWoINORDrPuvWHgbaT"
VALIDATION_SET = "GSM8K_validation_set_langsmith_dataset_2iiXukhQxsrsHiwRdOh3O9j43wQ"
SHOTS = 2
SAMPLES = 2

train_set = langsmith_helper.fetch_dataset(dataset_name=TRAIN_SET)
test_set = langsmith_helper.fetch_dataset(dataset_name=TEST_SET)
validation_set = langsmith_helper.fetch_dataset(dataset_name=VALIDATION_SET)

evaluator_kwargs = dict(
    evaluators=[score_answer],
    client=langsmith,
    max_concurrency=1,
)

bootstrap_few_shot = BootstrapFewShot(
    shots=SHOTS,
    training_set=train_set,
    test_set=test_set,
    validation_set=validation_set,
    evaluator_kwargs=evaluator_kwargs,
    zen_adaptor=langsmith_helper,
)

teacher_lm = bootstrap_few_shot.perform(
    openai_json_response,
    samples=SAMPLES,
    rounds=1,
    trace_manager=trace_manager,
)


View the evaluation results for experiment: 'zenbase-synergistic-didactic-a9f25e3c' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/10fd5ab6-4eb1-43a3-9a82-1b7d1c40300a/compare?selectedSessions=e66c2026-e170-4a2d-9d3e-49ce7cc01f7d


0it [00:00, ?it/s]

{'answer': 144}
{'answer': 3}
View the evaluation results for experiment: 'zenbase-customer-focused-modular-d9adfd8a' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/d8e98c14-0b6d-4380-bb1e-05a8f3b1bf34/compare?selectedSessions=6219c3d2-a4b6-4adc-9a12-d2ed6642507a


0it [00:00, ?it/s]

{'answer': 'He bought 2 large pizzas, each with 16 slices, so 2*16=<<2*16=32>>32 slices of large pizza\nHe also bought 2 small pizzas, each with 8 slices, so 2*8=<<2*8=16>>16 slices of small pizza\nIn total, he eats 32+16=<<32+16=48>>48 slices of pizza that day\n#### 48'}
{'answer': 'The weight after adding jelly beans was 2*3=<<2*3=6>>6 pounds.\nThen he added another 2 pounds of jelly beans making it 6+2=<<6+2=8>>8 pounds.\nFinally, the weight doubled after adding gummy worms, making it 8*2=<<8*2=16>>16 pounds.\n#### 16'}
View the evaluation results for experiment: 'zenbase-customizable-fault-tolerant-db4a51c2' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/d8e98c14-0b6d-4380-bb1e-05a8f3b1bf34/compare?selectedSessions=5183a483-3167-4477-abe5-d3d1b867c18a


0it [00:00, ?it/s]

{'answer': 'In total, Albert buys 2 x 16 = <<2*16=32>>32 slices of large pizzas.\nHe also buys 2 x 8 = <<2*8=16>>16 slices of small pizzas.\nTherefore, Albert eats in total 32 + 16 = <<32+16=48>>48 pieces that day.\n#### 48'}
{'answer': 'The initial weight was 2 pounds from the jelly beans.\nAdding brownies tripled the weight to 2 x 3 = 6 pounds.\nAdding more jelly beans brought the weight to 6 + 2 = 8 pounds.\nAdding gummy worms doubled the weight to 8 x 2 = 16 pounds.\n#### 16'}
View the evaluation results for experiment: 'zenbase-cross-group-bandwidth-monitored-affcea0a' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/571de8fd-a35b-4524-b03f-6463f3362552/compare?selectedSessions=91a5b21b-5668-4924-90c4-d5708a769cc9


0it [00:00, ?it/s]

{'answer': '72'}
{'answer': 'Weng earns $12/60 = <<12/60=0.2>>0.2 dollars per minute.\nTherefore, for 50 minutes, she earned 50*0.2 = <<50*0.2=10>>10 dollars.\n#### 10'}
{'answer': 'Betty has 100/2 = <<100/2=50>>50 dollars.\nHer grandparents give her 15*2 = <<15*2=30>>30 dollars.\nIn total, Betty has 50+15+30 = <<50+15+30=95>>95 dollars.\nBetty still needs 100-95 = <<100-95=5>>5 dollars to buy the wallet.\n#### 5'}
{'answer': 'Julie read 12*2 = <<12*2=24>>24 pages today.\nJulie has read a total of 12+24 = <<12+24=36>>36 pages.\nShe has 120-36 = <<120-36=84>>84 pages remaining.\nTherefore, she should read 84/2 = <<84/2=42>>42 pages tomorrow.\n#### 42'}
{'answer': '624'}
View the evaluation results for experiment: 'zenbase-focused-logistical-e64da8aa' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/10fd5ab6-4eb1-43a3-9a82-1b7d1c40300a/compare?selectedSessions=66bea0dd-5872-4070-b617-f98cf1bc59a6


0it [00:00, ?it/s]

{'answer': "She uses 3+4=7 eggs every day for breakfast and muffins.\nThe remaining eggs are 16-7=9 eggs.\nShe makes 9*2=$18 every day at the farmers' market.\n#### 18"}
{'answer': '3'}


In [11]:
# Now you can use your zenbase fn
teacher_lm.best_function({"question": "If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?"})

{'answer': '45'}

In [12]:
trace_manager.all_traces = {}

In [14]:
# Now you can use your zenbase fn
teacher_lm.best_function({"question": "If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?"})

{'answer': '4500000'}

In [15]:
function_traces = [v for k, v in trace_manager.all_traces.items()][0]["optimized"]

In [16]:
from pprint import pprint

pprint(function_traces["openai_json_response_2"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'question': 'Natalia sold clips to 48 of her friends in April, '
                            'and then she sold half as many clips in May. How '
                            'many clips did Natalia sell altogether in April '
                            'and May?'},
        outputs={'answer': '72'},
        original_object=None),
 LMDemo(inputs={'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'answer': '12'},
        original_object=None),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which '
                            'costs $100. Betty has only half of the money she '
                            'needs. Her parents decided to give her $15 for '
                            'that purpose, and her grandparents twice as much '
                            'as her parents. How much more money do

In [17]:
from pprint import pprint

pprint(function_traces["openai_json_response"]["args"]["request"].zenbase.task_demos)

[LMDemo(inputs={'question': 'Natalia sold clips to 48 of her friends in April, '
                            'and then she sold half as many clips in May. How '
                            'many clips did Natalia sell altogether in April '
                            'and May?'},
        outputs={'answer': '72'},
        original_object=None),
 LMDemo(inputs={'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'answer': 'She earns $12/60 = $0.2 per minute.\n'
                           'Therefore, she earned $0.2*50 = $<<0.2*50=10>>10 '
                           'for 50 minutes of babysitting.\n'
                           '#### 10'},
        original_object=None),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which '
                            'costs $100. Betty has only half of the money she '
            

In [18]:
bootstrap_few_shot.save_optimizer_args("bootstrap_few_shot_args.zenbase")

optimized_function = bootstrap_few_shot.load_optimizer_and_function("bootstrap_few_shot_args.zenbase", openai_json_response, trace_manager)

In [20]:
trace_manager.all_traces = {}
teacher_lm.best_function({"question": "If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?"})
function_traces = [v for k, v in trace_manager.all_traces.items()][0]["optimized"]
from pprint import pprint

pprint(function_traces["openai_json_response"]["args"]["request"].zenbase.task_demos)
pprint(function_traces["openai_json_response_2"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'question': 'Natalia sold clips to 48 of her friends in April, '
                            'and then she sold half as many clips in May. How '
                            'many clips did Natalia sell altogether in April '
                            'and May?'},
        outputs={'answer': '72'},
        original_object=None),
 LMDemo(inputs={'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'answer': 'She earns $12/60 = $0.2 per minute.\n'
                           'Therefore, she earned $0.2*50 = $<<0.2*50=10>>10 '
                           'for 50 minutes of babysitting.\n'
                           '#### 10'},
        original_object=None),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which '
                            'costs $100. Betty has only half of the money she '
            