# Import the Zenbase Library

In [1]:
import sys
import subprocess

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {package}: {e}")
        raise

def install_packages(packages):
    for package in packages:
        install_package(package)

try:
    # Check if running in Google Colab
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Install the zenbase package if running in Google Colab
    # install_package('zenbase')
    # Install the zenbse package from a GitHub branch if running in Google Colab
    install_package('git+https://github.com/zenbase-ai/lib.git@main#egg=zenbase&subdirectory=py')

    # List of other packages to install in Google Colab
    additional_packages = [
        'python-dotenv',
        'langfuse',
        'openai',
        'langchain',
        'langchain_openai'
    ]
    
    # Install additional packages
    install_packages(additional_packages)

# Now import the zenbase library
try:
    import zenbase
except ImportError as e:
    print("Failed to import zenbase: ", e)
    raise

# Configure the Environment

In [2]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."
# os.environ["LANGFUSE_HOST"] = "..."
# os.environ["LANGFUSE_PUBLIC_KEY"] = "..."
# os.environ["LANGFUSE_SECRET_KEY"] = "..."

load_dotenv(Path("../../.env.test"), override=True)

True

In [3]:
import nest_asyncio

nest_asyncio.apply()

# Initial Setup


In [4]:
from openai import OpenAI
from langfuse import Langfuse

langfuse = Langfuse()
langfuse.auth_check()

openai = OpenAI()

# Now, you probably already have some LLM code.

It could use the OpenAI SDK, LangChain, or anything really. But it looks something like this:

In [5]:
from langfuse.decorators import observe

@observe()
def solver(inputs):
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_openai import ChatOpenAI

    messages = [
        (
            "system",
            "You are an expert math solver. "
            "You have an question that you should answer, "
            "You have step by step actions that you should take to solve the problem."
            "You have the opertaions that you should do to solve the problem"
            "You should come just with the number for the answer, just the actual number like examples that you have."  # noqa
            ""
            ""
            "Follow the format of the examples as they have the final answer, you need to came up to the plan for solving them.",  # noqa
        )
    ]

    messages.append(("user", "Question: {question}"))
    messages.append(("user", "Plan: {plan}"))
    messages.append(("user", "Mathematical Operation that needed: {operation}"))
    messages.append(
        ("user", "Now come with the answer as number, just return the number, nothing else, just NUMBERS.")
    )

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    plan = planner_chain(inputs)
    the_plan = plan["plan"]
    the_operation = operation_finder(
        {
            "plan": the_plan,
            "question": inputs["question"],
        }
    )
    inputs_to_answer = {
        "question": inputs["question"],
        "plan": the_plan,
        "operation": the_operation["operation"],
    }
    answer = chain.invoke(inputs_to_answer)
    return {"answer": answer}

@observe()
def planner_chain(inputs):
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_openai import ChatOpenAI

    messages = [
        (
            "system",
            "You are an expert math solver. You have an question that you should create step-by-step plan to solve it. "  # noqa
            "Follow the format of the examples.",
            # noqa
        )
    ]

    messages.append(("user", "{question}"))

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    answer = chain.invoke(inputs)
    return {"plan": answer}

@observe()
def operation_finder(inputs):
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_openai import ChatOpenAI

    messages = [
        (
            "system",
            "You are an expert math solver. You have a plan for solve a problem that is step-by-step, you need to find the overal operation in the math to solve it. "  # noqa
            "Just come up with math operation with simple match operations like sum, multiply, division and minus. "
            ""
            "Follow the format of the examples.",
            # noqa
        )
    ]
    messages.append(("user", "{question}"))
    messages.append(("user", "{plan}"))

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    answer = chain.invoke(inputs)
    return {"operation": answer}



## And let's say you have an eval function like this

In [6]:
def score_answer(answer: str, expected_output: dict):
    """The first argument is the return value from the `langchain_chain` function above."""
    score = int(answer['answer'] == expected_output.split("#### ")[-1])
    langfuse.score(
        name="correctness",
        value=score,
        trace_id=langfuse.get_trace_id(),
    )
    return {"score": score}


## Then you're probably evaluating like this

In [7]:
evalset = langfuse.get_dataset("gsm8k-testset")

scores = []
for item in evalset.items:
    answer = solver(item.input)
    eval = score_answer(answer, item.expected_output)
    scores.append(eval["score"])

print("Average score", sum(scores) / len(scores))

Average score 0.6


 # Now, how can we optimize this score of 0.6?

## First, initialize the Zenbase ZenbaseTracer and import the Langfuse helper

In [9]:
from zenbase.adaptors.langfuse_helper import ZenLangfuse
from zenbase.core.managers import ZenbaseTracer

zen_langfuse_adaptor = ZenLangfuse(langfuse)
zenbase_tracer = ZenbaseTracer()


## Hook up Zenbase to your functions

1. Use the `zenbase_tracer` decorator.
2. Change function inputs to request
3. Use request's `zenbase.task_demos` to get the few-shot examples for the task and add them however you would like into your prompt.
4. If you need to use just a few examples, you can use `request.zenbase.task_demos[:2]` to get the first two examples.

In [27]:
from zenbase.types import LMRequest, LMDemo

@zenbase_tracer # it is 1
@observe()
def solver(request: LMRequest): # it is 2
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_openai import ChatOpenAI

    messages = [
        (
            "system",
            "You are an expert math solver. "
            "You have an question that you should answer, "
            "You have step by step actions that you should take to solve the problem."
            "You have the operations that you should do to solve the problem"
            "You should come just with the number for the answer, just the actual number like examples that you have."  # noqa
            ""
            ""
            "Follow the format of the examples as they have the final answer, you need to came up to the plan for solving them.",  # noqa
        )
    ]
    for demo in request.zenbase.task_demos: # it is 3 and 4
        if isinstance(demo.outputs, dict):
            the_output = demo.outputs["answer"]
        else:
            the_output = demo.outputs

        messages += [
            ("user", f'Example Question: {demo.inputs["question"]}'),
            ("assistant", f"Example Answer: {the_output}"),
        ]

    messages.append(("user", "Question: {question}"))
    messages.append(("user", "Plan: {plan}"))
    messages.append(("user", "Mathematical Operation that needed: {operation}"))
    messages.append(
        ("user", "Now come with the answer as number, just return the number, nothing else, just NUMBERS.")
    )

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    plan = planner_chain(request.inputs)
    the_plan = plan["plan"]
    the_operation = operation_finder(
        {
            "plan": the_plan,
            "question": request.inputs["question"],
        }
    )
    inputs_to_answer = {
        "question": request.inputs["question"],
        "plan": the_plan,
        "operation": the_operation["operation"],
    }
    answer = chain.invoke(inputs_to_answer)
    return {"answer": answer}

@zenbase_tracer # it is 1
@observe()
def planner_chain(request: LMRequest): # it is 2
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_openai import ChatOpenAI

    messages = [
        (
            "system",
            "You are an expert math solver. You have an question that you should create step-by-step plan to solve it. "  # noqa
            "Follow the format of the examples.",
            # noqa
        )
    ]
    if request.zenbase.task_demos: # it is 3
        for demo in request.zenbase.task_demos[:2]: # it is 4
            messages += [
                ("user", demo.inputs["question"]),
                ("assistant", demo.outputs["plan"]),
            ]

    messages.append(("user", "{question}"))

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    answer = chain.invoke(request.inputs)
    return {"plan": answer}

@zenbase_tracer # it is 1
@observe()
def operation_finder(request: LMRequest): # it is 2 
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_openai import ChatOpenAI

    messages = [
        (
            "system",
            "You are an expert math solver. You have a plan for solve a problem that is step-by-step, you need to find the overal operation in the math to solve it. "  # noqa
            "Just come up with math operation with simple match operations like sum, multiply, division and minus. "
            ""
            "Follow the format of the examples.",
            # noqa
        )
    ]
    if request.zenbase.task_demos: # it is 3
        for demo in request.zenbase.task_demos[:2]: # it is 4
            messages += [
                ("user", demo.inputs["question"]),
                ("user", demo.inputs["plan"]),
                ("assistant", demo.outputs["operation"]),
            ]

    messages.append(("user", "{question}"))
    messages.append(("user", "{plan}"))

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    answer = chain.invoke(request.inputs)
    return {"operation": answer}


## For Langfuse, we have to update our eval function a little bit

In [28]:
def score_answer_with_json(answer: str, demo: LMDemo, langfuse: Langfuse):
    """The first argument is the return value from the `langchain_chain` function above."""
    score = int(answer["answer"] == demo.outputs.split("#### ")[-1])
    langfuse.score(
        name="correctness",
        value=score,
        trace_id=langfuse.get_trace_id(),
    )
    return {"score": score}

## Now we can optimize!

### Set up your optimizer:

In [29]:
from zenbase.optim.metric.bootstrap_few_shot import BootstrapFewShot

train_set = 'GSM8K_train_set_langsmith_dataset_2it3BpoNmwfYa5Nvk6dRButWA56'
validation_set = 'GSM8K_validation_set_langsmith_dataset_2it1pPsf4w75FJ82v5BwweOxnS4'
test_set = 'GSM8K_test_set_langsmith_dataset_2it1pZxHYNfqO8wHTsRP7NiUi1e'
SHOTS = 2
SAMPLES = 2


evaluator_kwargs = dict(
    evaluate=score_answer_with_json,
)


bootstrap_few_shot = BootstrapFewShot(
    shots=SHOTS,
    training_set=train_set,
    test_set=test_set,
    validation_set=validation_set,
    evaluator_kwargs=evaluator_kwargs,
    zen_adaptor=zen_langfuse_adaptor,
)


### Do the optimization

In [31]:
# Empty the traces
zenbase_tracer.all_traces = {}
# Run the optimization
best_fn, candidates = bootstrap_few_shot.perform(
    solver,
    samples=SAMPLES,
    rounds=1,
    trace_manager=zenbase_tracer,
)

### Use your optimized function


In [32]:
zenbase_tracer.all_traces = {}
best_fn({"question": "What is 2 + 2?"})

{'answer': '4'}

### Introspect function traces

In [33]:
function_traces = [v for k, v in zenbase_tracer.all_traces.items()][0]["optimized"]


### Check the optimized parameters for planner_chain

In [34]:
from pprint import pprint

pprint(function_traces["planner_chain"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'question': 'James writes a 3-page letter to 2 different '
                            'friends twice a week.  How many pages does he '
                            'write a year?'},
        outputs={'plan': 'To find the total number of pages James writes in a '
                         'year, we first need to calculate how many pages he '
                         'writes in a week and then multiply that by the '
                         'number of weeks in a year.\n'
                         '\n'
                         '1. Calculate how many pages James writes in a week:\n'
                         '   - James writes a 3-page letter to 2 different '
                         'friends twice a week.\n'
                         '   - He writes 3 pages * 2 friends = 6 pages per '
                         'letter.\n'
                         '   - Since he writes two letters a week, he writes 6 '
                         'pages * 2 = 12 pages per week.\n'
                  

### Check the optimized parameters for operation_finder chain

In [35]:
from pprint import pprint

pprint(function_traces["operation_finder"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'plan': 'To find the total number of pages James writes in a '
                        'year, we first need to calculate how many pages he '
                        'writes in a week and then multiply that by the number '
                        'of weeks in a year.\n'
                        '\n'
                        '1. Calculate how many pages James writes in a week:\n'
                        '   - James writes a 3-page letter to 2 different '
                        'friends twice a week.\n'
                        '   - He writes 3 pages * 2 friends = 6 pages per '
                        'letter.\n'
                        '   - Since he writes two letters a week, he writes 6 '
                        'pages * 2 = 12 pages per week.\n'
                        '\n'
                        '2. Calculate how many pages James writes in a year:\n'
                        '   - There are 52 weeks in a year.\n'
                        '   - Total pages written in a y

### Check the optimized parameters for solver

In [36]:
from pprint import pprint

pprint(function_traces["solver"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs={'question': 'James writes a 3-page letter to 2 different '
                            'friends twice a week.  How many pages does he '
                            'write a year?'},
        outputs={'answer': '624'},
        adaptor_object=None),
 LMDemo(inputs={'question': 'Julie is reading a 120-page book. Yesterday, she '
                            'was able to read 12 pages and today, she read '
                            'twice as many pages as yesterday. If she wants to '
                            'read half of the remaining pages tomorrow, how '
                            'many pages should she read?'},
        outputs={'answer': '42'},
        adaptor_object=None),
 LMDemo(inputs={'question': 'Natalia sold clips to 48 of her friends in April, '
                            'and then she sold half as many clips in May. How '
                            'many clips did Natalia sell altogether in April '
                            'and May?'},
        outputs=