This notebooks shows how to use [OpenAI assistant](https://platform.openai.com/docs/guides/function-calling) to do function calling.
- steps:
  - create an assistant (LLM + a lot of predefined func)
  - call LLM to see which predefined func to use and what are the input params
  - return the results to LLM
  - get the answer from LLM

In [1]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import os
import pandas as pd
from jinja2 import Environment, FileSystemLoader
from pathlib import Path
import sys
import time

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.utils import convert_types, eval
from utils.vars import DATA_DIR, DATASET_FILES, QUESTION_FILES
from utils.assistants import AzureOpenAIAssistant
from utils.customized_func_tools import (
    TOOLS,
)

load_dotenv()
ASSISTANT_NAME_PREFIX = "customized_func"

In [2]:
# get the prompt template
prompt_path = "prompts/prompt.jinja2"
# get the client object
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2025-03-01-preview",  # different from assistant
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
)
assistant = AzureOpenAIAssistant(client=client)

In [3]:
display(assistant.list_all_assistants())

Unnamed: 0,id,created_at,description,instructions,metadata,model,name,object,tools,response_format,temperature,tool_resources,top_p
0,asst_tihDxvHK3TSBDoabAYk4nrpB,1747654063,,You are a data scientist specializing in time ...,{},gpt-4o,customized_func_nyc_taxi,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
1,asst_G6lW78uX2ZCBvPhkTNkpCY0P,1747654004,,You are a data scientist specializing in time ...,{},gpt-4o,customized_func_melbourne_temp,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
2,asst_PyqoRwGSDRSliYUZFpuyRQGT,1747653943,,You are a data scientist specializing in time ...,{},gpt-4o,customized_func_air_passengers,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
3,asst_zEURRNR06p0UK9YYlNLNdDPt,1747185462,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_nyc_taxi,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
4,asst_yb8Ab2f244pJlmrZQjn8uTCO,1747185389,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_melbourne_temp,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
5,asst_WHmQuNTDclu5t75pTbZh48bT,1747185310,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_air_passengers,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
6,asst_x4c0gSGzdmoljHa4DrmZjPZO,1734135165,,You are a python expert in univariate time ser...,{},gpt-4o,code_interpreter_nyc_taxi_plot,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0


In [4]:
# delete assistant
assistant.delete_assistant(name=f"{ASSISTANT_NAME_PREFIX}_nyc_taxi")
assistant.delete_assistant(name=f"{ASSISTANT_NAME_PREFIX}_melbourne_temp")
assistant.delete_assistant(name=f"{ASSISTANT_NAME_PREFIX}_air_passengers")

display(assistant.list_all_assistants())

Unnamed: 0,id,created_at,description,instructions,metadata,model,name,object,tools,response_format,temperature,tool_resources,top_p
0,asst_zEURRNR06p0UK9YYlNLNdDPt,1747185462,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_nyc_taxi,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
1,asst_yb8Ab2f244pJlmrZQjn8uTCO,1747185389,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_melbourne_temp,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
2,asst_WHmQuNTDclu5t75pTbZh48bT,1747185310,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_air_passengers,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
3,asst_x4c0gSGzdmoljHa4DrmZjPZO,1734135165,,You are a python expert in univariate time ser...,{},gpt-4o,code_interpreter_nyc_taxi_plot,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0


## Run 3 files x all questions

In [5]:
# cons
# cannot pass a lot of values to the prompt due to token limit
# really need to design the function properly

In [6]:
# read the prompt
instruction = (
    Environment(loader=FileSystemLoader(".")).get_template(prompt_path).render()
)

df_result = []

for question_path in QUESTION_FILES:
    question_path = Path(question_path)
    print(f"Question file: {question_path.name}")
    # read questions
    df_questions = pd.read_csv(DATA_DIR / question_path)
    # loop through each csv file
    for dataset_path in DATASET_FILES:
        dataset_path = Path(DATA_DIR, dataset_path)
        print(f"file: {dataset_path.name}")

        assistant_id = assistant.create_or_retrieve(
            prompt_path=prompt_path,
            assistant_name=f"{ASSISTANT_NAME_PREFIX}_{dataset_path.stem}",
            tools=TOOLS,
            tool_resources=None,
            prompt_args={"dataset_path": dataset_path},
        )
        # loop through questions
        for _, row in df_questions.iterrows():
            question = row["question"]
            answer_true = row[dataset_path.name]

            start_time = time.time()

            result = assistant.ask_a_question(
                question=question, assistant_id=assistant_id, tools=TOOLS
            )

            df_result.append(
                {
                    **result,
                    "question_file": question_path.name,
                    "question": question,
                    "execution_time_s": round(time.time() - start_time, 2),
                    "dataset_file": dataset_path.name,
                    "org_answer_true": answer_true,
                }
            )

Question file: easy_questions.csv
file: air_passengers.csv
file: melbourne_temp.csv
file: nyc_taxi.csv
Question file: medium_questions.csv
file: air_passengers.csv
file: melbourne_temp.csv
file: nyc_taxi.csv
Question file: hard_questions.csv
file: air_passengers.csv
file: melbourne_temp.csv
file: nyc_taxi.csv


In [9]:
# eval
df_result = pd.DataFrame(df_result)

df_result["answer_pred"] = df_result["org_answer_pred"].apply(
    lambda x: convert_types(x)
)
df_result["answer_true"] = df_result["org_answer_true"].apply(
    lambda x: convert_types(x)
)

# loop through each file
eval(df=df_result, details=True)

Question file: easy_questions.csv; Dataset File: air_passengers.csv; Accuracy: 1.0
Question file: easy_questions.csv; Dataset File: melbourne_temp.csv; Accuracy: 1.0
Question file: easy_questions.csv; Dataset File: nyc_taxi.csv; Accuracy: 1.0
Question file: medium_questions.csv; Dataset File: air_passengers.csv; Accuracy: 0.9
question: what is the typical value?
answer_pred: 265.5
answer_true: 280.3
messages: user: what is the typical value?
assistant: {"output": "265.5"}
steps: [{'name': 'get_descriptive_statistics', 'input': None, 'output': '265.5', 'args': '{"file_path":"../../data/air_passengers.csv","statistic_name":"50%","col_name":"target_col"}'}]
**************************************************
Question file: medium_questions.csv; Dataset File: melbourne_temp.csv; Accuracy: 1.0
Question file: medium_questions.csv; Dataset File: nyc_taxi.csv; Accuracy: 0.8
question: what is the typical value?
answer_pred: 16778.0
answer_true: 15137.57
messages: user: what is the typical value?

In [10]:
# check detailed observability metrics
df_result.groupby(["question_file", "dataset_file"])[
    ["total_tokens", "execution_time_s"]
].describe()[[("total_tokens", "mean"), ("execution_time_s", "mean")]].round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_tokens,execution_time_s
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean
question_file,dataset_file,Unnamed: 2_level_2,Unnamed: 3_level_2
easy_questions.csv,air_passengers.csv,5109.2,5.94
easy_questions.csv,melbourne_temp.csv,5113.7,6.92
easy_questions.csv,nyc_taxi.csv,5117.1,7.04
hard_questions.csv,air_passengers.csv,5156.4,6.13
hard_questions.csv,melbourne_temp.csv,4944.1,6.4
hard_questions.csv,nyc_taxi.csv,5177.6,5.99
medium_questions.csv,air_passengers.csv,3918.0,5.41
medium_questions.csv,melbourne_temp.csv,3919.5,5.88
medium_questions.csv,nyc_taxi.csv,4129.4,5.11


In [11]:
# check overall observability metrics
df_result.describe().round(2)

Unnamed: 0,completion_tokens,prompt_tokens,total_tokens,execution_time_s_in_code_interpreter,execution_time_s
count,90.0,90.0,90.0,90.0,90.0
mean,59.76,4671.91,4731.67,4.94,6.09
std,30.11,1211.75,1233.52,2.47,2.45
min,29.0,3832.0,3862.0,2.0,3.68
25%,38.0,3843.25,3880.0,3.0,4.36
50%,46.0,3861.0,3910.0,4.0,5.1
75%,72.75,5857.75,5930.0,6.0,6.89
max,188.0,7963.0,8077.0,15.0,16.17


In [12]:
with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(df_result.groupby(["dataset_file"]).describe())

Unnamed: 0_level_0,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
dataset_file,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2
air_passengers.csv,30.0,58.833333,29.996647,29.0,38.0,45.5,72.0,155.0,30.0,4669.033333,1262.358042,3832.0,3841.25,3860.5,5853.0,7950.0,30.0,4727.866667,1284.279987,3862.0,3878.25,3907.0,5925.0,8060.0,30.0,4.6,2.077797,2.0,3.0,4.0,5.0,12.0,30.0,5.825333,2.063473,3.78,4.17,5.09,6.8275,13.32
melbourne_temp.csv,30.0,57.333333,26.028278,30.0,38.25,45.5,72.0,130.0,30.0,4601.766667,1127.15041,3832.0,3842.0,3860.5,5858.0,7918.0,30.0,4659.1,1146.223705,3863.0,3880.0,3907.0,5930.0,8019.0,30.0,5.266667,3.279333,2.0,3.0,4.0,6.0,15.0,30.0,6.398667,3.234532,3.77,4.2475,5.105,6.77,16.17
nyc_taxi.csv,30.0,63.1,34.458721,32.0,39.0,50.0,75.75,188.0,30.0,4744.933333,1277.347168,3835.0,3844.75,3863.5,5860.75,7963.0,30.0,4808.033333,1301.905592,3869.0,3882.0,3913.0,5936.5,8077.0,30.0,4.966667,1.865907,3.0,3.25,4.0,6.0,9.0,30.0,6.046333,1.85925,3.68,4.9375,5.22,7.925,10.43


## Run one question

In [13]:
# list all assistants
assistant.list_all_assistants()

Unnamed: 0,id,created_at,description,instructions,metadata,model,name,object,tools,response_format,temperature,tool_resources,top_p
0,asst_ezGHJBYCFh3nx0JdQtUrBsdw,1747655525,,You are a data scientist specializing in time ...,{},gpt-4o,customized_func_nyc_taxi,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
1,asst_KdIz1fUmJQnC9f0omQb1rGBk,1747655455,,You are a data scientist specializing in time ...,{},gpt-4o,customized_func_melbourne_temp,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
2,asst_VjGjUNyjAj31MAgDbGbmgjlh,1747655395,,You are a data scientist specializing in time ...,{},gpt-4o,customized_func_air_passengers,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
3,asst_zEURRNR06p0UK9YYlNLNdDPt,1747185462,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_nyc_taxi,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
4,asst_yb8Ab2f244pJlmrZQjn8uTCO,1747185389,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_melbourne_temp,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
5,asst_WHmQuNTDclu5t75pTbZh48bT,1747185310,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_air_passengers,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
6,asst_x4c0gSGzdmoljHa4DrmZjPZO,1734135165,,You are a python expert in univariate time ser...,{},gpt-4o,code_interpreter_nyc_taxi_plot,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0


In [14]:
question = "Forecast the next data point using the naïve method."
result = assistant.ask_a_question(
    question=question, assistant_id="asst_ggryg6Pu49DHOFuOB6l6awG4", tools=TOOLS
)
result

NotFoundError: Error code: 404 - {'error': {'message': "No assistant found with id 'asst_ggryg6Pu49DHOFuOB6l6awG4'.", 'type': 'invalid_request_error', 'param': None, 'code': None}}