This notebooks shows how to use [OpenAI assistant](https://platform.openai.com/docs/guides/function-calling) to do function calling.
- steps:
  - create an assistant (LLM + a lot of predefined func)
  - call LLM to see which predefined func to use and what are the input params
  - return the results to LLM
  - get the answer from LLM

In [None]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import os
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from pathlib import Path
import sys
import time

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.utils import convert_types, eval
from utils.vars import DATA_DIR, DATASET_FILES, QUESTION_FILES
from utils.assistants import AzureOpenAIAssistant
from utils.customized_func_tools import (
    TOOLS,
)

load_dotenv()
ASSISTANT_NAME_PREFIX = "customized_func"

In [None]:
# get the prompt template
prompt_path = "prompts/prompt.jinja2"
# get the client object
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2025-03-01-preview",  # different from assistant
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
)
assistant = AzureOpenAIAssistant(client=client)

In [None]:
display(assistant.list_all_assistants())

In [None]:
# delete assistant
assistant.delete_assistant(name=f"{ASSISTANT_NAME_PREFIX}_nyc_taxi")
assistant.delete_assistant(name=f"{ASSISTANT_NAME_PREFIX}_melbourne_temp")
assistant.delete_assistant(name=f"{ASSISTANT_NAME_PREFIX}_air_passengers")

display(assistant.list_all_assistants())

## Run 3 files x all questions

In [None]:
# read the prompt
instruction = (
    Environment(loader=FileSystemLoader(".")).get_template(prompt_path).render()
)

df_result = []

for question_path in QUESTION_FILES:
    question_path = Path(question_path)
    print(f"Question file: {question_path.name}")
    # read questions
    df_questions = pd.read_csv(DATA_DIR / question_path)
    # loop through each csv file
    for dataset_path in DATASET_FILES:
        dataset_path = Path(DATA_DIR, dataset_path)
        print(f"file: {dataset_path.name}")

        assistant_id = assistant.create_or_retrieve(
            prompt_path=prompt_path,
            assistant_name=f"{ASSISTANT_NAME_PREFIX}_{dataset_path.stem}",
            tools=TOOLS,
            tool_resources=None,
            prompt_args={"dataset_path": dataset_path},
        )
        # loop through questions
        for _, row in df_questions.iterrows():
            question = row["question"]
            answer_true = row[dataset_path.name]

            start_time = time.time()

            result = assistant.ask_a_question(
                question=question, assistant_id=assistant_id, tools=TOOLS
            )

            df_result.append(
                {
                    **result,
                    "question_file": question_path.name,
                    "question": question,
                    "execution_time_s": round(time.time() - start_time, 2),
                    "dataset_file": dataset_path.name,
                    "org_answer_true": answer_true,
                }
            )

In [None]:
# eval
df_result = pd.DataFrame(df_result)

df_result["answer_pred"] = df_result["org_answer_pred"].apply(
    lambda x: convert_types(x)
)
df_result["answer_true"] = df_result["org_answer_true"].apply(
    lambda x: convert_types(x)
)

# loop through each file
eval(df=df_result, details=True)

In [None]:
# check detailed observability metrics
df_result.groupby(["question_file", "dataset_file"])[
    ["total_tokens", "execution_time_s"]
].describe()[[("total_tokens", "mean"), ("execution_time_s", "mean")]].round(2)

In [None]:
# check overall observability metrics
df_result.describe().round(2)

In [None]:
with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(df_result.groupby(["dataset_file"]).describe())

## Run one question

In [None]:
# list all assistants
assistant.list_all_assistants()

In [None]:
question = "Forecast the next data point using the naïve method."
result = assistant.ask_a_question(
    question=question, assistant_id="asst_ggryg6Pu49DHOFuOB6l6awG4", tools=TOOLS
)
result