In [1]:
import sys
import subprocess

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {package}: {e}")
        raise

def install_packages(packages):
    for package in packages:
        install_package(package)

try:
    # Check if running in Google Colab
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Install the zenbase package if running in Google Colab
    # install_package('zenbase')
    # Install the zenbse package from a GitHub branch if running in Google Colab
    install_package('git+https://github.com/zenbase-ai/lib.git@main#egg=zenbase&subdirectory=py')

    # List of other packages to install in Google Colab
    additional_packages = [
        'python-dotenv',
        'lunary',
        'openai',
        'langchain',
        'langchain_openai'
    ]
    
    # Install additional packages
    install_packages(additional_packages)

# Now import the zenbase library
try:
    import zenbase
except ImportError as e:
    print("Failed to import zenbase: ", e)
    raise

In [2]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."
# os.environ["LUNARY_PUBLIC_KEY"] = "..."

load_dotenv(Path("../../.env.test"), override=True)

True

In [3]:
import nest_asyncio

nest_asyncio.apply()

In [4]:
from openai import OpenAI
import lunary

openai = OpenAI()
lunary.monitor(openai)

In [5]:
from zenbase.core.managers import TraceManager
zenbase_manager = TraceManager()

In [6]:

import json
from zenbase.types import LMRequest


@zenbase_manager.trace_function
# @retry(
#     stop=stop_after_attempt(3),
#     wait=wait_exponential_jitter(max=8),
#     before_sleep=before_sleep_log(log, logging.WARN),
# )
def solver(request: LMRequest):
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. You have a question that you should answer. You have step by step actions that you should take to solve the problem. You have the operations that you should do to solve the problem. You should come just with the number for the answer, just the actual number like examples that you have. Follow the format of the examples as they have the final answer, you need to come up with the plan for solving them."  # noqa
            'return it with json like return it in the {"answer": " the answer "}',
        }
    ]

    for demo in request.zenbase.task_demos:
        messages += [
            {"role": "user", "content": f"Example Question: {demo.inputs}"},
            {"role": "assistant", "content": f"Example Answer: {demo.outputs}"},
        ]

    plan = planner_chain(request.inputs)
    the_plan = plan["plan"]
    # the_plan = 'plan["plan"]'
    the_operation = operation_finder(
        {
            "plan": the_plan,
            "question": request.inputs,
        }
    )
    # the_operation = {"operation": "operation_finder"}

    messages.append({"role": "user", "content": f"Question: {request.inputs}"})
    messages.append({"role": "user", "content": f"Plan: {the_plan}"})
    messages.append(
        {"role": "user", "content": f"Mathematical Operation that needed: {the_operation['operation']}"}
    )
    messages.append(
        {
            "role": "user",
            "content": "Now come with the answer as number, just return the number, nothing else, just NUMBERS.",
        }
    )

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    print("Mathing...")
    answer = json.loads(response.choices[0].message.content)
    return answer["answer"]

@zenbase_manager.trace_function
# @retry(
#     stop=stop_after_attempt(3),
#     wait=wait_exponential_jitter(max=8),
#     before_sleep=before_sleep_log(log, logging.WARN),
# )
def planner_chain(request: LMRequest):
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. You have a question that you should create a step-by-step plan to solve it. Follow the format of the examples and return JSON object."  # noqa
            'return it in the {"plan": " the plan "}',
        }
    ]

    if request.zenbase.task_demos:
        for demo in request.zenbase.task_demos[:2]:
            messages += [
                {"role": "user", "content": demo.inputs},
                {"role": "assistant", "content": demo.outputs["plan"]},
            ]
    messages.append({"role": "user", "content": request.inputs})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    print("Planning...")
    answer = json.loads(response.choices[0].message.content)
    return {"plan": " ".join(i for i in answer["plan"])}

@zenbase_manager.trace_function
# @retry(
#     stop=stop_after_attempt(3),
#     wait=wait_exponential_jitter(max=8),
#     before_sleep=before_sleep_log(log, logging.WARN),
# )
def operation_finder(request: LMRequest):
    messages = [
        {
            "role": "system",
            "content": "You are an expert math solver. You have a plan for solving a problem that is step-by-step, you need to find the overall operation in the math to solve it. Just come up with math operation with simple math operations like sum, multiply, division and minus. Follow the format of the examples."  # noqa
            'return it with json like return it in the {"operation": " the operation "}',
        }
    ]

    if request.zenbase.task_demos:
        for demo in request.zenbase.task_demos[:2]:
            messages += [
                {"role": "user", "content": f"Question: {demo.inputs['question']}"},
                {"role": "user", "content": f"Plan: {demo.inputs['plan']}"},
                {"role": "assistant", "content": demo.outputs["operation"]},
            ]

    messages.append({"role": "user", "content": f"Question: {request.inputs['question']}"})
    messages.append({"role": "user", "content": f"Plan: {request.inputs['plan']}"})

    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        response_format={"type": "json_object"},
    )

    print("Finding operation...")
    answer = json.loads(response.choices[0].message.content)
    return {"operation": answer["operation"]}


In [7]:
from zenbase.adaptors.lunary import ZenLunary

lunary_helper = ZenLunary(client=lunary)

In [8]:
solver("What is 2 + 2?")

Planning...
Finding operation...
Mathing...


'4'

In [9]:
evalset = lunary.get_dataset("gsm8k-evalset")

scores = []
for item in evalset:
    answer = solver(item.input)
    passed, results = lunary.evaluate(
        "exact-match",
        input=item.input,
        output=answer,
        ideal_output=item.ideal_output.split("#### ")[-1],
    )
    scores.append(int(passed))

print("Average score", sum(scores) / len(scores))

Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Average score 0.6


In [10]:
evaluator_kwargs = dict(
    checklist="exact-match",
    concurrency=2,
)

In [11]:
# for lunary there is not feature to create dataset with code, so dataset are created
# manually with UI, if you want to replicate the test on your own, you should put
# GSM8K examples to dataset name like below:
TRAIN_SET = "gsmk8k-train-set"
TEST_SET = "gsm8k-test-set"
VALIDATION_SET = "gsm8k-validation-set"

assert lunary_helper.fetch_dataset_demos(TRAIN_SET) is not None
assert lunary_helper.fetch_dataset_demos(TEST_SET) is not None
assert lunary_helper.fetch_dataset_demos(VALIDATION_SET) is not None

In [12]:
SAMPLES = 2
SHOTS = 3

In [13]:
from zenbase.optim.metric.bootstrap_few_shot import BootstrapFewShot

bootstrap_few_shot = BootstrapFewShot(
    shots=SHOTS,
    training_set=TRAIN_SET,
    test_set=TEST_SET,
    validation_set=VALIDATION_SET,
    evaluator_kwargs=evaluator_kwargs,
    zen_adaptor=lunary_helper,
)

teacher_lm = bootstrap_few_shot.perform(
    solver,
    samples=SAMPLES,
    rounds=1,
    trace_manager=zenbase_manager,
)

Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...
Planning...
Finding operation...
Mathing...


In [17]:
bootstrap_few_shot.base_evaluation.evals

{'score': 1.0}

In [16]:
bootstrap_few_shot.best_evaluation.evals

{'score': 0.5}

In [19]:
# Now you can use your zenbase fn
teacher_lm.best_function("If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?")

Planning...
Finding operation...
Mathing...


'4550000'

In [28]:
zenbase_manager.all_traces = {}


In [30]:
# Now you can use your zenbase fn
teacher_lm.best_function("If I have 30% of shares, and Mo has 24.5% of shares, how many of our 10M shares are unassigned?")

Planning...
Finding operation...
Mathing...


'45.5'

In [33]:
function_traces = [v for k, v in zenbase_manager.all_traces.items()][0]["optimized"]

In [34]:
function_traces

{'planner_chain': {'args': {'request': LMRequest(zenbase=LMZenbase(task_demos=[LMDemo(inputs='Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?', outputs={'plan': '1 .   C a l c u l a t e   h a l f   t h e   n u m b e r   o f   c l i p s   s o l d   i n   A p r i l :   4 8   /   2   =   2 4   c l i p s   s o l d   i n   M a y .   2 .   A d d   t h e   n u m b e r   o f   c l i p s   s o l d   i n   A p r i l   a n d   M a y :   4 8   ( A p r i l )   +   2 4   ( M a y )   =   7 2   c l i p s   s o l d   a l t o g e t h e r   i n   A p r i l   a n d   M a y .'}, original_object=None), LMDemo(inputs='Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?', outputs={'plan': "T o   f i n d   o u t   h o w   m u c h   W e n g   e a r n e d   f o r   b a b y s i t t i n g   5 0   m i n u t e s ,   f i r s t   c o n v e r t   5 0  

In [35]:
from pprint import pprint

pprint(function_traces["solver"]["args"]["request"].zenbase.task_demos)


[LMDemo(inputs='Natalia sold clips to 48 of her friends in April, and then she '
               'sold half as many clips in May. How many clips did Natalia '
               'sell altogether in April and May?',
        outputs='72',
        original_object=None),
 LMDemo(inputs='Weng earns $12 an hour for babysitting. Yesterday, she just '
               'did 50 minutes of babysitting. How much did she earn?',
        outputs='10',
        original_object=None),
 LMDemo(inputs='Betty is saving money for a new wallet which costs $100. Betty '
               'has only half of the money she needs. Her parents decided to '
               'give her $15 for that purpose, and her grandparents twice as '
               'much as her parents. How much more money does Betty need to '
               'buy the wallet?',
        outputs=5,
        original_object=None)]


In [36]:
from pprint import pprint

pprint(function_traces["planner_chain"]["args"]["request"].zenbase.task_demos)

[LMDemo(inputs='Natalia sold clips to 48 of her friends in April, and then she '
               'sold half as many clips in May. How many clips did Natalia '
               'sell altogether in April and May?',
        outputs={'plan': '1 .   C a l c u l a t e   h a l f   t h e   n u m b '
                         'e r   o f   c l i p s   s o l d   i n   A p r i l '
                         ':   4 8   /   2   =   2 4   c l i p s   s o l d   i '
                         'n   M a y .   2 .   A d d   t h e   n u m b e r   o '
                         'f   c l i p s   s o l d   i n   A p r i l   a n d   '
                         'M a y :   4 8   ( A p r i l )   +   2 4   ( M a y '
                         ')   =   7 2   c l i p s   s o l d   a l t o g e t h '
                         'e r   i n   A p r i l   a n d   M a y .'},
        original_object=None),
 LMDemo(inputs='Weng earns $12 an hour for babysitting. Yesterday, she just '
               'did 50 minutes of babysitting. How much 

In [37]:
from pprint import pprint

pprint(function_traces["operation_finder"]["args"]["request"].zenbase.task_demos)

[LMDemo(inputs={'plan': '1 .   C a l c u l a t e   h a l f   t h e   n u m b e '
                        'r   o f   c l i p s   s o l d   i n   A p r i l :   4 '
                        '8   /   2   =   2 4   c l i p s   s o l d   i n   M a '
                        'y .   2 .   A d d   t h e   n u m b e r   o f   c l i '
                        'p s   s o l d   i n   A p r i l   a n d   M a y :   4 '
                        '8   ( A p r i l )   +   2 4   ( M a y )   =   7 2   c '
                        'l i p s   s o l d   a l t o g e t h e r   i n   A p r '
                        'i l   a n d   M a y .',
                'question': 'Natalia sold clips to 48 of her friends in April, '
                            'and then she sold half as many clips in May. How '
                            'many clips did Natalia sell altogether in April '
                            'and May?'},
        outputs={'operation': '48 + (48 / 2)'},
        original_object=None),
 LMDemo(inputs={'plan': 