# Import the Zenbase Library

In [None]:
import sys
import subprocess

def install_package(package):
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
    except subprocess.CalledProcessError as e:
        print(f"Failed to install {package}: {e}")
        raise

def install_packages(packages):
    for package in packages:
        install_package(package)

try:
    # Check if running in Google Colab
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

if IN_COLAB:
    # Install the zenbase package if running in Google Colab
    # install_package('zenbase')
    # Install the zenbse package from a GitHub branch if running in Google Colab
    install_package('git+https://github.com/zenbase-ai/lib.git@main#egg=zenbase&subdirectory=py')

    # List of other packages to install in Google Colab
    additional_packages = [
        'python-dotenv',
        'arize-phoenix[evals]',
        'openai',
        'langchain',
        'langchain_openai'
    ]
    
    # Install additional packages
    install_packages(additional_packages)

# Now import the zenbase library
try:
    import zenbase
except ImportError as e:
    print("Failed to import zenbase: ", e)
    raise

# Configure the Environment

In [None]:
from pathlib import Path
from dotenv import load_dotenv

# import os
#
# os.environ["OPENAI_API_KEY"] = "..."

load_dotenv(Path("../../.env.test"), override=True)

In [None]:
import nest_asyncio

nest_asyncio.apply()

# Initial Setup

In [None]:
# initiate the phoenix app
import phoenix as px
px.launch_app()
# initiate the phoenix client
arize_phoenix = px.Client()

In [None]:
from openai import OpenAI
openai = OpenAI()

In [None]:
from zenbase.utils import ksuid
from zenbase.adaptors.arize import ZenArizeAdaptor
# setup datasets
import datasets
gsm8k_dataset = datasets.load_dataset("gsm8k", "main")
zen_arize_adaptor = ZenArizeAdaptor(arize_phoenix)
TESTSET_SIZE = 2
TRAINSET_SIZE = 5
VALIDATIONSET_SIZE = 2


def create_dataset_with_examples(zen_arize_adaptor: ZenArizeAdaptor, prefix: str, item_set: list) -> str:
    dataset_name = ksuid(prefix=prefix)

    inputs = [{"question": example["question"]} for example in item_set]
    expected_outputs = [{"answer": example["answer"]} for example in item_set]
    zen_arize_adaptor.add_examples_to_dataset(dataset_name, inputs, expected_outputs)
    return dataset_name

train_set = create_dataset_with_examples(
        zen_arize_adaptor,
        "GSM8K_train_set",
        list(gsm8k_dataset["train"].select(range(TRAINSET_SIZE))),
    )

validation_set = create_dataset_with_examples(
        zen_arize_adaptor,
        "GSM8K_validation_set",
        list(gsm8k_dataset["train"].select(range(TRAINSET_SIZE + 1, TRAINSET_SIZE + VALIDATIONSET_SIZE + 1))),
    )

test_set = create_dataset_with_examples(
        zen_arize_adaptor,
        "GSM8K_test_set",
        list(gsm8k_dataset["test"].select(range(TESTSET_SIZE))),
    )

# What you already have should look like below:

## Your OpenAI Call should look like this with LangChain (It could be with OpenAI too, doesn't matter)

In [None]:
def langchain_chain(inputs) -> str:
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_openai import ChatOpenAI

    messages = [
        (
            "system",
            "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples.",  # noqa
        )
    ]
    messages.append(("user", "{question}"))

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    print("Mathing...")
    answer = chain.invoke(inputs["inputs"])
    return answer

## Your Scoring Function should look like this:

In [None]:
def score_answer(output: str, expected: dict):
    print(output, expected["outputs"])
    """The first argument is the return value from the `langchain_chain` function above."""
    score = int(output == expected["outputs"]["answer"].split("#### ")[-1])
    return score

## Your Evaluation should look like this:

In [None]:
from phoenix.experiments import run_experiment

experiment = run_experiment(
                arize_phoenix.get_dataset(name=test_set),
                langchain_chain,
                experiment_name="Experiment-Name",
                evaluators=[score_answer],
            )

# How you should do the few-shot learning

## Rewrite your langchain_chain function to use the `zenbase` decorators

In [None]:
from zenbase.core.managers import ZenbaseTracer

zenbase_tracer = ZenbaseTracer()


In [None]:
from zenbase.types import LMRequest, LMDemo
@zenbase_tracer
def zen_chain(request: LMRequest) -> str:
    print(request)
    from langchain_core.output_parsers import StrOutputParser
    from langchain_core.prompts import ChatPromptTemplate
    from langchain_openai import ChatOpenAI

    messages = [
        (
            "system",
            "You are an expert math solver. Your answer must be just the number with no separators, and nothing else. Follow the format of the examples.",  # noqa
        )
    ]
    for demo in request.zenbase.task_demos:
        messages += [
            ("user", demo.inputs["question"]),
            ("assistant", demo.outputs["answer"]),
        ]

    messages.append(("user", "{question}"))

    chain = ChatPromptTemplate.from_messages(messages) | ChatOpenAI(model="gpt-3.5-turbo") | StrOutputParser()

    print("Mathing...")
    answer = chain.invoke(request.inputs["inputs"])
    return answer


In [None]:
return_langchain = zen_chain({"inputs": {"question": "What is 2 + 2?"}})


## Make your evaluation function to use the langfuse from the LMDemo and the langfuse from the Zenbase

In [None]:
def score_answer(output: str, expected: dict):
    print(output, expected["outputs"])
    """The first argument is the return value from the `langchain_chain` function above."""
    # if there is any #### in the output
    if "####" in expected["outputs"]["answer"]:
        output = output.split("#### ")[-1]

    score = int(output == expected["outputs"]["answer"].split("#### ")[-1])
    return score

## Optimize the few-shot learning

### Define your optimizer:


In [None]:
from zenbase.optim.metric.labeled_few_shot import LabeledFewShot

optimizer = LabeledFewShot(
    demoset=zen_arize_adaptor.fetch_dataset_demos(train_set), ## The dataset to use for the few-shot learning and training
    shots=3,
)

### Perform the optimization

In [None]:
best_fn, candidate_results, _ = optimizer.perform(
    zen_chain,
    evaluator=zen_arize_adaptor.metric_evaluator(
        dataset=arize_phoenix.get_dataset(name=test_set), evaluators=[score_answer]
    ),
    samples=4,
    concurrency=1,
    rounds=1,
)

### Use the best function

In [None]:
output = best_fn({"inputs":{"question": "What is 2+2?"}})
output

### Save the best function

In [None]:
# You can also save the zenbase params for re-use
import pickle

pickled_zenbase = pickle.dumps(best_fn.zenbase)
zen_chain.zenbase = pickle.loads(pickled_zenbase)

zen_chain({"inputs":{"question": "What is 2+2?"}}) # uses the best few-shot demos