# Import the Zenbase Library

In [None]:
import sys
import subprocess

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {package}: {e}")
        raise

def install_packages(packages):
    for package in packages:
        install_package(package)

try:
    # Check if running in Google Colab
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Install the zenbase package if running in Google Colab
    # install_package('zenbase')
    # Install the zenbse package from a GitHub branch if running in Google Colab
    install_package('git+https://github.com/zenbase-ai/lib.git@main#egg=zenbase&subdirectory=py')

    # List of other packages to install in Google Colab
    additional_packages = [
        'python-dotenv',
        'lunary',
        'openai',
        'langchain',
        'langchain_openai'
    ]
    
    # Install additional packages
    install_packages(additional_packages)

# Now import the zenbase library
try:
    import zenbase
except ImportError as e:
    print("Failed to import zenbase: ", e)
    raise

# Configure the Environment

In [None]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."
# os.environ["LUNARY_PUBLIC_KEY"] = "..."

load_dotenv(Path("../../.env.test"), override=True)

In [None]:
import nest_asyncio

nest_asyncio.apply()

# Initial Setup

In [None]:
from openai import OpenAI
import lunary

openai = OpenAI()
lunary.monitor(openai)

# Now, you probably already have some LLM code.

It could use the OpenAI SDK, LangChain, or anything really. But it looks something like this:

In [None]:
import openai
from typing import Dict

def solver(inputs: str) -> str:
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Provide the numerical answer to the given question based on the provided plan and operation. Return only the number as a JSON object in the format: {\"answer\": \"<number>\"}"
        }
    ]

    plan = planner_chain(inputs)
    operation = operation_finder({"plan": plan["plan"], "question": inputs})

    messages.extend([
        {"role": "user", "content": f"Question: {inputs}"},
        {"role": "user", "content": f"Plan: {plan['plan']}"},
        {"role": "user", "content": f"Mathematical Operation needed: {operation['operation']}"},
        {"role": "user", "content": "Provide only the numerical answer."}
    ])

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    answer = json.loads(response.choices[0].message.content)
    return answer["answer"]

def planner_chain(inputs: str) -> Dict[str, str]:
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Create a step-by-step plan to solve the given question. Return the plan as a JSON object in the format: {\"plan\": \"<step-by-step plan>\"}"
        },
        {"role": "user", "content": inputs}
    ]

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)

def operation_finder(inputs: Dict[str, str]) -> Dict[str, str]:
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. Identify the primary mathematical operation needed to solve the problem based on the given plan. Use simple operations like addition, subtraction, multiplication, or division. Return the operation as a JSON object in the format: {\"operation\": \"<operation>\"}"
        },
        {"role": "user", "content": f"Question: {inputs['question']}"},
        {"role": "user", "content": f"Plan: {inputs['plan']}"}
    ]

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)

Test the function to see if that works:

In [None]:
solver("What is 2 + 2?")

## Then you're probably evaluating like this

In [None]:
import lunary 
evalset = lunary.get_dataset("gsm8k-evalset")

scores = []
for item in evalset:
    answer = solver(item.input)
    passed, results = lunary.evaluate(
        "exact-match",
        input=item.input,
        output=answer,
        ideal_output=item.ideal_output.split("#### ")[-1],
    )
    scores.append(int(passed))

print("Average score", sum(scores) / len(scores))

 # Now, how can we optimize this score?

## First, initialize the Zenbase ZenbaseTracer and ZenLunary objects

In [None]:
from zenbase.core.managers import ZenbaseTracer
from zenbase.adaptors.lunary import ZenLunary

zenbase_tracer = ZenbaseTracer()
lunary_adaptor = ZenLunary(client=lunary)

In [None]:
import json
from typing import Dict, Any
from zenbase.types import LMRequest
import openai

@zenbase_tracer
def solver(request: LMRequest) -> str:
    system_message = {
        "role": "system",
        "content": (
            "You are an expert math solver. Analyze the given question, follow the provided step-by-step plan, "
            "and perform the specified mathematical operation. Your response should be a single number representing "
            "the final answer. Format your response as a JSON object: {\"answer\": \"[numerical result]\"}"
        )
    }
    
    messages = [system_message]
    
    for demo in request.zenbase.task_demos:
        messages.extend([
            {"role": "user", "content": f"Example Question: {demo.inputs}"},
            {"role": "assistant", "content": f"Example Answer: {demo.outputs}"}
        ])

    plan = planner_chain(request.inputs)
    operation = operation_finder({"plan": plan["plan"], "question": request.inputs})

    messages.extend([
        {"role": "user", "content": f"Question: {request.inputs}"},
        {"role": "user", "content": f"Plan: {plan['plan']}"},
        {"role": "user", "content": f"Required Operation: {operation['operation']}"},
        {"role": "user", "content": "Provide only the numerical answer in the specified JSON format."}
    ])

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)["answer"]

@zenbase_tracer
def planner_chain(request: LMRequest) -> Dict[str, str]:
    system_message = {
        "role": "system",
        "content": (
            "As an expert math solver, create a detailed step-by-step plan to solve the given problem. "
            "Your plan should be clear, concise, and easy to follow. "
            "Format your response as a JSON object: {\"plan\": \"[step-by-step plan]\"}"
        )
    }
    
    messages = [system_message]
    
    if request.zenbase.task_demos:
        for demo in request.zenbase.task_demos[:2]:
            messages.extend([
                {"role": "user", "content": demo.inputs},
                {"role": "assistant", "content": demo.outputs["plan"]}
            ])
    
    messages.append({"role": "user", "content": request.inputs})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)

@zenbase_tracer
def operation_finder(request: LMRequest) -> Dict[str, str]:
    system_message = {
        "role": "system",
        "content": (
            "Based on the given question and solution plan, identify the primary mathematical operation required. "
            "Focus on basic operations: addition, subtraction, multiplication, or division. "
            "Format your response as a JSON object: {\"operation\": \"[primary operation]\"}"
        )
    }
    
    messages = [system_message]
    
    if request.zenbase.task_demos:
        for demo in request.zenbase.task_demos[:2]:
            messages.extend([
                {"role": "user", "content": f"Question: {demo.inputs['question']}"},
                {"role": "user", "content": f"Plan: {demo.inputs['plan']}"},
                {"role": "assistant", "content": demo.outputs["operation"]}
            ])
    
    messages.extend([
        {"role": "user", "content": f"Question: {request.inputs['question']}"},
        {"role": "user", "content": f"Plan: {request.inputs['plan']}"}
    ])

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    return json.loads(response.choices[0].message.content)

## Now we can optimize!

### Set up your optimizer:

In [None]:
evaluator_kwargs = dict(
    checklist="exact-match",
    concurrency=2,
)

# for lunary there is not feature to create dataset with code, so dataset are created
# manually with UI, if you want to replicate the test on your own, you should put
# GSM8K examples to dataset name like below:
TRAIN_SET = "gsmk8k-train-set"
TEST_SET = "gsm8k-test-set"
VALIDATION_SET = "gsm8k-validation-set"

assert lunary_adaptor.fetch_dataset_demos(TRAIN_SET) is not None
assert lunary_adaptor.fetch_dataset_demos(TEST_SET) is not None
assert lunary_adaptor.fetch_dataset_demos(VALIDATION_SET) is not None

SAMPLES = 2
SHOTS = 3

from zenbase.optim.metric.bootstrap_few_shot import BootstrapFewShot

bootstrap_few_shot = BootstrapFewShot(
    shots=SHOTS,
    training_set=TRAIN_SET,
    test_set=TEST_SET,
    validation_set=VALIDATION_SET,
    evaluator_kwargs=evaluator_kwargs,
    zen_adaptor=lunary_adaptor,
)

### Do the optimization

In [None]:
best_fn, _ = bootstrap_few_shot.perform(
    solver,
    samples=SAMPLES,
    rounds=1,
    trace_manager=zenbase_tracer,
)

### Introspect evaluation improvement

You can see in this example that the best function has improved the evaluation score by 50%.

In [None]:
bootstrap_few_shot.base_evaluation.evals

In [None]:
bootstrap_few_shot.best_evaluation.evals

### Use your optimized function


In [None]:
zenbase_tracer.flush()

In [None]:
best_fn("If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?")

### Introspect function traces

In [None]:
function_traces = [v for k, v in zenbase_tracer.all_traces.items()][0]["optimized"]

### Check the optimized parameters for solver


In [None]:
from pprint import pprint

pprint(function_traces["solver"]["args"]["request"].zenbase.task_demos)


### Check the optimized parameters for planner_chain


In [None]:
from pprint import pprint

pprint(function_traces["planner_chain"]["args"]["request"].zenbase.task_demos)

### Check the optimized parameters for operation_finder

In [None]:
from pprint import pprint

pprint(function_traces["operation_finder"]["args"]["request"].zenbase.task_demos)

## How to save the function and load it later


### Save the optimized function args to a file


In [None]:
bootstrap_few_shot.save_optimizer_args("bootstrap_few_shot_args.zenbase")


### Load the optimized function args with the function

In [None]:
optimized_function = bootstrap_few_shot.load_optimizer_and_function("bootstrap_few_shot_args.zenbase", solver, zenbase_tracer)


### Use the loaded function and make sure it loaded the demos.


In [None]:
zenbase_tracer.flush()
optimized_function("If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?")
function_traces = [v for k, v in zenbase_tracer.all_traces.items()][0]["optimized"]
from pprint import pprint

pprint(function_traces["solver"]["args"]["request"].zenbase.task_demos)
pprint(function_traces["planner_chain"]["args"]["request"].zenbase.task_demos)
pprint(function_traces["operation_finder"]["args"]["request"].zenbase.task_demos)
