Install packages:

In [28]:
import sys
import subprocess

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {package}: {e}")
        raise

def install_packages(packages):
    for package in packages:
        install_package(package)

try:
    # Check if running in Google Colab
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Install the zenbase package if running in Google Colab
    # install_package('zenbase')
    # Install the zenbse package from a GitHub branch if running in Google Colab
    install_package('git+https://github.com/zenbase-ai/lib.git@main#egg=zenbase&subdirectory=py')

    # List of other packages to install in Google Colab
    additional_packages = [
        'python-dotenv',
        'langsmith[vcr]',
        'openai',
        'langchain',
        'langchain_openai'
    ]
    
    # Install additional packages
    install_packages(additional_packages)

# Now import the zenbase library
try:
    import zenbase
except ImportError as e:
    print("Failed to import zenbase: ", e)
    raise

In [23]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."
# os.environ["LANGCHAIN_API_KEY"] = "..."
# os.environ["LANGCHAIN_TRACING_V2"] = "true"

load_dotenv(Path("../../.env.test"), override=True)

True

In [24]:
import nest_asyncio

nest_asyncio.apply()

In [25]:
import json

from langsmith import traceable
from langsmith.schemas import Run, Example
from langsmith.wrappers import wrap_openai
from openai import OpenAI

openai = wrap_openai(OpenAI())

# Define your LLM function
@traceable
def openai_json_response(inputs: dict) -> dict:
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples. Think step by step. Respond with a JSON object with a key of 'answer'.",
        },
        {"role": "user", "content": json.dumps(inputs)}
    ]

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)

# Define your Langsmith evaluator
def score_answer(run: Run, example: Example):
    print(run.outputs)
    match (answer := run.outputs["answer"]):
        case int():
            output = str(answer).strip()
        case str():
            output = answer.split("#### ")[-1].strip()
    target = example.outputs["answer"].split("#### ")[-1].strip()
    return {
        "key": "correctness",
        "score": int(output == target),
    }

In [26]:
# Evaluate using LangSmith
from langsmith import Client, evaluate

langsmith = Client()
evalset = list(langsmith.list_examples(dataset_name="GSM8K_test_set_langsmith_dataset_2ii5SKBzVHu3UVUmiFIxFxSvsFm"))

evaluate_kwargs = dict(
    data=evalset,
    evaluators=[score_answer],
    client=langsmith,
    max_concurrency=2,
)

evaluate(openai_json_response, **evaluate_kwargs)

View the evaluation results for experiment: 'worthwhile-limit-24' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=ef7b63cd-058c-4556-8b21-54425c00dd8a


0it [00:00, ?it/s]

{'answer': 16}{'answer': '3'}


<ExperimentResults worthwhile-limit-24>

In [27]:
from zenbase.core.managers import TraceManager
trace_manager = TraceManager()

# Define trace manager for Zenbase
from zenbase.adaptors.langchain import ZenLangSmith

zen_langsmith = ZenLangSmith(langsmith)

In [7]:
from zenbase.types import LMRequest, deflm

# Wrap your existing chain with @deflm and take in a `LMRequest` object
# An LMRequest has the inputs for your chain and has a `zenbase` attribute.
# This `zenbase` attribute includes the fields that Zenbase optimises.

# LMRequest.inputs => LM function inputs
# LMRequest.zenbase => optimized LLM params


# @trace_manager.trace_function
# @traceable
# def planner_openai_call(request: LMRequest) -> dict:
#     messages = [
#         {
#             "role": "system",
#             "content": "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples. Think step by step. Respond with a JSON object. return it with the \{\"plan\": \"...\"\} format.",
#         },
#     ]
# 
#     for demo in request.zenbase.task_demos:
#         messages += [
#             {"role": "user", "content": json.dumps(demo.inputs)},
#             {"role": "assistant", "content": json.dumps(demo.outputs)},
#         ]
#     messages.append({"role": "user", "content": json.dumps(request.inputs)})
#     
#     response = openai.chat.completions.create(
#         model="gpt-3.5-turbo",
#         messages=messages,
#         response_format={"type": "json_object"},
#     )
#     print(response)
#     return json.loads(response.choices[0].message.content)

@trace_manager.trace_function
@traceable
def openai_json_response_2(request: LMRequest) -> dict:
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples. Think step by step. Respond with a JSON object.",
        },
    ]

    for demo in request.zenbase.task_demos:
        messages += [
            {"role": "user", "content": json.dumps(demo.inputs)},
            {"role": "assistant", "content": json.dumps(demo.outputs)},
        ]
    messages.append({"role": "user", "content": json.dumps(request.inputs)})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)


@trace_manager.trace_function
@traceable
def openai_json_response(request: LMRequest) -> dict:
    openai_json_response_2(request.inputs)
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples. Think step by step. Respond with a JSON object.",
        },
    ]

    for demo in request.zenbase.task_demos:
        messages += [
            {"role": "user", "content": json.dumps(demo.inputs)},
            {"role": "assistant", "content": json.dumps(demo.outputs)},
        ]
    messages.append({"role": "user", "content": json.dumps(request.inputs)})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)



In [8]:
from zenbase.adaptors.langchain import ZenLangSmith
from zenbase.optim.metric.labeled_few_shot import LabeledFewShot

demoset = ZenLangSmith.examples_to_demos(
    langsmith.list_examples(dataset_name="GSM8K_train_set_langsmith_dataset_2ii5SPh2qGRjlCzFIkp8d8qcKhH")
)
optimizer = LabeledFewShot(demoset=demoset, shots=3)

best_fn, candidates, _ = optimizer.perform(
    # Pass deflm decorated function
    openai_json_response,
    # Exactly the same as what you are passing to your evaluate function
    evaluator=ZenLangSmith.metric_evaluator(**evaluate_kwargs),
    samples=5,
    rounds=1,
)

View the evaluation results for experiment: 'zenbase-focused-leadingedge-a935c30c' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=b110cf5c-1ebf-4bb2-b811-75e8cb26c25d


0it [00:00, ?it/s]

{'answer': 'The robe takes 2 / 2 = 1 bolt of white fiber.\nIn total, the robe takes 2 + 1 = 3 bolts.\n#### 3'}{'answer': "Janet uses 3+4=<<3+4=7>>7 eggs for breakfast and baking each day.\nThat means she has 16-7=<<16-7=9>>9 eggs left to sell daily.\nIf she sells the remaining eggs at $2 each, she makes 9*2=$<<9*2=18>>18 every day at the farmers' market.\n#### 18"}
View the evaluation results for experiment: 'zenbase-mandatory-next-f732ce7e' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=9e03ad55-8f17-4f7e-bc47-998ceda05828


0it [00:00, ?it/s]

{'answer': 'Janet has 16 - 3 - 4 = <<16-3-4=9>>9 eggs left for selling.\nSelling them at $2 per egg she makes 9 x $2 = $<<9*2=18>>18 every day.\n#### 18'}{'answer': 'The robe takes 2/2 = <<2/2=1>>1 bolt of white fiber.\nThe total number of bolts needed is 2 + 1 = <<2+1=3>>3.\n#### 3'}
View the evaluation results for experiment: 'zenbase-horizontal-context-sensitive-4ec3f21c' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=c0c6d1c2-e9c1-4317-aacf-210850692d0e


0it [00:00, ?it/s]

{'answer': 'A robe takes 2 bolts of blue fiber and 2 / 2 = 1 bolt of white fiber.\nIn total, a robe takes 2 + 1 = <<2+1=3>>3 bolts.\n#### 3'}{'answer': "Janet uses 3 + 4 = <<3+4=7>>7 eggs every day.\nShe has 16 - 7 = <<16-7=9>>9 eggs left to sell.\nShe makes 9 x 2 = $<<9*2=18>>18 every day at the farmers' market.\n#### 18"}
View the evaluation results for experiment: 'zenbase-integrated-empowering-1f2afc8a' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=890ad3e1-45cf-4347-b1f1-208159e41cfc


0it [00:00, ?it/s]

{'answer': 'A robe takes 2 bolts of blue fiber and 2/2 = 1 bolt of white fiber.\nSo in total, it takes 2 + 1 = <<2+1=3>>3 bolts.\n#### 3'}{'answer': "Janet uses 3+4 = <<3+4=7>>7 eggs every day for breakfast and baking muffins.\nThe number of eggs left for selling is 16-7 = <<16-7=9>>9 eggs.\nSo, Janet makes 9 * 2 = $<<9*2=18>>18 every day at the farmers' market.\n#### 18"}
View the evaluation results for experiment: 'zenbase-integrated-3rdgeneration-4a9242d8' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/ab14f6ad-3e00-43b8-9ad6-3ce6bdb510f0/compare?selectedSessions=dae2283d-e808-4424-8e4e-90813a612609


0it [00:00, ?it/s]

{'answer': "Janet has 16-3-4=<<16-3-4=9>>9 eggs left to sell each day.\nShe makes 9 x $2 = $<<9*2=18>>18 every day at the farmers' market.\n#### 18"}
{'answer': 'It takes 2/2=<<2/2=1>>1 bolt of white fiber\nSo it takes 2+1=<<2+1=3>>3 bolts in total\n#### 3'}


In [9]:
from zenbase.optim.metric.bootstrap_few_shot import BootstrapFewShot

# Define your Langsmith and helper
langsmith = Client()
langsmith_helper = ZenLangSmith(client=langsmith)

TRAIN_SET = "GSM8K_train_set_langsmith_dataset_2iiXubwwzCNQo4yokckJiUn98UG"
TEST_SET = "GSM8K_test_set_langsmith_dataset_2iiXuiuySmWoINORDrPuvWHgbaT"
VALIDATION_SET = "GSM8K_validation_set_langsmith_dataset_2iiXukhQxsrsHiwRdOh3O9j43wQ"
SHOTS = 2
SAMPLES = 2

train_set = langsmith_helper.fetch_dataset(dataset_name=TRAIN_SET)
test_set = langsmith_helper.fetch_dataset(dataset_name=TEST_SET)
validation_set = langsmith_helper.fetch_dataset(dataset_name=VALIDATION_SET)

evaluator_kwargs = dict(
    evaluators=[score_answer],
    client=langsmith,
    max_concurrency=1,
)

bootstrap_few_shot = BootstrapFewShot(
    shots=SHOTS,
    training_set_original=train_set,
    test_set_original=test_set,
    validation_set_original=validation_set,
    evaluator_kwargs=evaluator_kwargs,
    zen_adaptor=langsmith_helper,
)

teacher_lm, candidates = bootstrap_few_shot.perform(
    openai_json_response,
    samples=SAMPLES,
    rounds=1,
    trace_manager=trace_manager,
)


View the evaluation results for experiment: 'zenbase-re-engineered-asynchronous-a3e123a2' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/10fd5ab6-4eb1-43a3-9a82-1b7d1c40300a/compare?selectedSessions=2e353811-914f-4baa-87f6-1b1fba363643


0it [00:00, ?it/s]

{'answer': 46}
{'answer': 3}
View the evaluation results for experiment: 'zenbase-progressive-fresh-thinking-559b9428' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/d8e98c14-0b6d-4380-bb1e-05a8f3b1bf34/compare?selectedSessions=dedb3ab1-8979-4c20-af9e-79edd837f3d8


0it [00:00, ?it/s]

{'answer': 'He buys 2 large pizzas * 16 slices per large pizza = <<2*16=32>>32 slices from the large pizzas\nHe buys 2 small pizzas * 8 slices per small pizza = <<2*8=16>>16 slices from the small pizzas\nSo, he eats 32+16 = <<32+16=48>>48 slices that day\n#### 48'}
{'answer': 'Initial weight of the box + jelly beans = 2\nTriple the weight after adding brownies = 2*3 = <<2*3=6>>6\nAdd another 2 pounds of jelly beans = 6+2 = <<6+2=8>>8\nDouble the weight after adding gummy worms = 8*2 = <<8*2=16>>16\n#### 16'}
View the evaluation results for experiment: 'zenbase-advanced-didactic-0e817654' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/d8e98c14-0b6d-4380-bb1e-05a8f3b1bf34/compare?selectedSessions=fb4bc277-f801-47ee-9d75-a1a6e7063990


0it [00:00, ?it/s]

{'answer': 'Two large pizzas have 2 x 16 = <<2*16=32>>32 slices.\nTwo small pizzas have 2 x 8 = <<2*8=16>>16 slices.\nIn total, he eats 32 + 16 = <<32+16=48>>48 slices.\n#### 48'}
{'answer': 'Initially, the box weighed 2 pounds.\nAfter adding brownies, the weight tripled to 2 x 3 = 6 pounds.\nThen 2 more pounds of jelly beans were added, making the weight 6 + 2 = 8 pounds.\nFinally, gummy worms are added to double the weight to 8 x 2 = 16 pounds.\n#### 16'}
View the evaluation results for experiment: 'zenbase-fully-configurable-content-based-9e65ed69' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/571de8fd-a35b-4524-b03f-6463f3362552/compare?selectedSessions=b1e021b6-376b-482c-bc83-41c2f1e063c4


0it [00:00, ?it/s]

{'answer': '72'}
{'answer': 'Weng earns $12/60 = $<<12/60=0.2>>0.2 per minute.\nFor 50 minutes, she earned 50*0.2 = <<50*0.2=10>>10\n#### 10'}
{'answer': 'Betty needs 100/2 = <<100/2=50>>50 dollars for the wallet.\nHer grandparents give her 15*2 = <<15*2=30>>30 dollars.\nIn total, Betty will have 50+15+30 = <<50+15+30=95>>95 dollars.\nSo, she still needs 100-95 = <<100-95=5>>5 dollars to buy the wallet.\n#### 5'}
{'answer': 'Today she read 12*2=<<12*2=24>>24 pages.\nJulie has read 12+24=<<12+24=36>>36 pages so far.\nShe still needs to read 120-36=<<120-36=84>>84 pages.\nTo read half of the remaining pages, she needs to read 84/2=<<84/2=42>>42 pages tomorrow.\n#### 42'}
{'answer': '624'}
View the evaluation results for experiment: 'zenbase-progressive-24/7-d5c353bf' at:
https://smith.langchain.com/o/f145e0fe-631c-5153-984d-08acb624f83e/datasets/10fd5ab6-4eb1-43a3-9a82-1b7d1c40300a/compare?selectedSessions=166d4c01-5320-420e-9d3a-94f3797a548e


0it [00:00, ?it/s]

{'answer': "Janet has 16-3-4 = 9 eggs left to sell.\nShe makes 9*$2 = $18 every day at the farmers' market.\n#### 18"}
{'answer': '3'}


In [10]:
# Now you can use your zenbase fn
teacher_lm({"question": "If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?"})

{'answer': '45'}

In [11]:
trace_manager.all_traces = {}

In [12]:
# Now you can use your zenbase fn
teacher_lm({"question": "If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?"})

{'answer': '31.5%'}

In [13]:
function_traces = [v for k, v in trace_manager.all_traces.items()][0]["optimized"]

In [14]:
from pprint import pprint

pprint(function_traces["openai_json_response_2"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'question': 'Natalia sold clips to 48 of her friends in April, '
                            'and then she sold half as many clips in May. How '
                            'many clips did Natalia sell altogether in April '
                            'and May?'},
        outputs={'answer': '72'},
        original_object=None),
 LMDemo(inputs={'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'answer': '10'},
        original_object=None),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which '
                            'costs $100. Betty has only half of the money she '
                            'needs. Her parents decided to give her $15 for '
                            'that purpose, and her grandparents twice as much '
                            'as her parents. How much more money do

In [17]:
from pprint import pprint

pprint(function_traces["openai_json_response"]["args"]["request"].zenbase.task_demos)

[LMDemo(inputs={'question': 'Natalia sold clips to 48 of her friends in April, '
                            'and then she sold half as many clips in May. How '
                            'many clips did Natalia sell altogether in April '
                            'and May?'},
        outputs={'answer': '72'},
        original_object=None),
 LMDemo(inputs={'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'answer': 'She earns $12/60 = $0.2 per minute\n'
                           'Therefore, for 50 minutes, she earns 50*$0.2 = $10 '
                           'yesterday.\n'
                           '#### 10'},
        original_object=None),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which '
                            'costs $100. Betty has only half of the money she '
                            'ne

In [19]:
bootstrap_few_shot.save_optimizer_args("bootstrap_few_shot_args.zenbase")

optimized_function = bootstrap_few_shot.load_optimizer_and_function("bootstrap_few_shot_args.zenbase", openai_json_response, trace_manager)

In [20]:
trace_manager.all_traces = {}
teacher_lm({"question": "If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?"})
function_traces = [v for k, v in trace_manager.all_traces.items()][0]["optimized"]
from pprint import pprint

pprint(function_traces["openai_json_response"]["args"]["request"].zenbase.task_demos)
pprint(function_traces["openai_json_response_2"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'question': 'Natalia sold clips to 48 of her friends in April, '
                            'and then she sold half as many clips in May. How '
                            'many clips did Natalia sell altogether in April '
                            'and May?'},
        outputs={'answer': '72'},
        original_object=None),
 LMDemo(inputs={'question': 'Weng earns $12 an hour for babysitting. '
                            'Yesterday, she just did 50 minutes of '
                            'babysitting. How much did she earn?'},
        outputs={'answer': 'She earns $12/60 = $0.2 per minute\n'
                           'Therefore, for 50 minutes, she earns 50*$0.2 = $10 '
                           'yesterday.\n'
                           '#### 10'},
        original_object=None),
 LMDemo(inputs={'question': 'Betty is saving money for a new wallet which '
                            'costs $100. Betty has only half of the money she '
                            'ne