This notebooks shows how to use code interpreter [OpenAI assistant](https://platform.openai.com/docs/assistants/tools/code-interpreter). This tool generates the code and executes the code in the sandbox.
- steps:
  - create an assistant (LLM with code interpreter)
  - call the assistant to write the code, and executes the code in the sandbox (this might contain multiple rounds in order to get the answer)
  - get the answer from the assistant
- the input data are: [`air_passengers.csv`](../../data/air_passengers.csv), [`melbourne_temp.csv`](../../data/melbourne_temp.csv), [`nyc_taxi.csv`](../../data/nyc_taxi.csv)
- the question is: [`easy_precise_questions.csv`](../../data/easy_precise_questions.csv)
- the question is: [`medium_precise_questions.csv`](../../data/medium_precise_questions.csv)

In [None]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import pandas as pd
import os
from jinja2 import Environment, FileSystemLoader
from pathlib import Path
import time
import sys
from PIL import Image

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.utils import convert_types, eval
from utils.vars import DATA_DIR, DATASET_FILES, QUESTION_FILES
from utils.assistants import AzureOpenAIAssistant

load_dotenv()
ASSISTANT_NAME_PREFIX = "code_interpreter"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# don't need to define function provide more flexibility but more prompt tokens and execution time but less accurate (need to give more accurate instruction). slower compared to function calling.

In [3]:
# get the prompt template
prompt_path = "prompts/prompt.jinja2"

# get the client object
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2025-03-01-preview",  # different from assistant
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
)
assistant = AzureOpenAIAssistant(client=client)

In [4]:
display(assistant.list_all_assistants())

Unnamed: 0,id,created_at,description,instructions,metadata,model,name,object,tools,response_format,temperature,tool_resources,top_p
0,asst_zHHFD4m7JuPigNgC0v7UYVaW,1747184961,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_nyc_taxi,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
1,asst_39J6tadJ2D4Eg1jUXlR77ROc,1747184893,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_melbourne_temp,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
2,asst_m1LrmgJyTMPKZspr3jmMHWhx,1747184798,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_air_passengers,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
3,asst_vOt2vULsZnzSZfqASNWCA1uv,1735007676,,You are a data scientist in univariate time se...,{},gpt-4o,customized_func_nyc_taxi,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
4,asst_qXfVisu4RHEIYa5Qs4L3I1Xa,1735007596,,You are a data scientist in univariate time se...,{},gpt-4o,customized_func_melbourne_temp,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
5,asst_kFyF79QuX27kMYRnguWb80n9,1735007502,,You are a data scientist in univariate time se...,{},gpt-4o,customized_func_air_passengers,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
6,asst_x4c0gSGzdmoljHa4DrmZjPZO,1734135165,,You are a python expert in univariate time ser...,{},gpt-4o,code_interpreter_nyc_taxi_plot,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0


In [5]:
# delete assistant
assistant.delete_assistant(name=f"{ASSISTANT_NAME_PREFIX}_nyc_taxi")
assistant.delete_assistant(name=f"{ASSISTANT_NAME_PREFIX}_melbourne_temp")
assistant.delete_assistant(name=f"{ASSISTANT_NAME_PREFIX}_air_passengers")

display(assistant.list_all_assistants())

Unnamed: 0,id,created_at,description,instructions,metadata,model,name,object,tools,response_format,temperature,tool_resources,top_p
0,asst_vOt2vULsZnzSZfqASNWCA1uv,1735007676,,You are a data scientist in univariate time se...,{},gpt-4o,customized_func_nyc_taxi,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
1,asst_qXfVisu4RHEIYa5Qs4L3I1Xa,1735007596,,You are a data scientist in univariate time se...,{},gpt-4o,customized_func_melbourne_temp,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
2,asst_kFyF79QuX27kMYRnguWb80n9,1735007502,,You are a data scientist in univariate time se...,{},gpt-4o,customized_func_air_passengers,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
3,asst_x4c0gSGzdmoljHa4DrmZjPZO,1734135165,,You are a python expert in univariate time ser...,{},gpt-4o,code_interpreter_nyc_taxi_plot,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0


In [6]:
# read the prompt
instruction = (
    Environment(loader=FileSystemLoader(".")).get_template(prompt_path).render()
)

df_result = []

# loop through files
# loop over the question files
for question_path in QUESTION_FILES:
    question_path = Path(question_path)
    print(f"Question file: {question_path.name}")
    # read questions
    df_questions = pd.read_csv(DATA_DIR / question_path)
    # loop through each csv file
    for dataset_path in DATASET_FILES:
        dataset_path = Path(dataset_path)
        print(f"file: {dataset_path.name}")

        # check if file has been uploaded to the client
        file_id = assistant.upload_or_retrieve_file(file_path=dataset_path)

        # create or retrieve an assistant
        assistant_id = assistant.create_or_retrieve(
            prompt_path=prompt_path,
            assistant_name=f"{ASSISTANT_NAME_PREFIX}_{dataset_path.stem}",
            tools=[{"type": "code_interpreter"}],
            tool_resources={"code_interpreter": {"file_ids": [file_id]}},
        )

        # loop through questions
        for _, row in df_questions.iterrows():
            question = row["question"]
            answer_true = row[dataset_path.name]

            start_time = time.time()

            result = assistant.ask_a_question(
                question=question, assistant_id=assistant_id
            )

            df_result.append(
                {
                    **result,
                    "question_file": question_path.name,
                    "question": question,
                    "execution_time_s": round(time.time() - start_time, 2),
                    "dataset_file": dataset_path.name,
                    "org_answer_true": answer_true,
                }
            )

Question file: easy_questions.csv
file: air_passengers.csv
JSONDecodeError: There are no missing values in the `y` column.

The target column is `y`.

{"output": "y"}
file: melbourne_temp.csv
file: nyc_taxi.csv
Question file: medium_questions.csv
file: air_passengers.csv
JSONDecodeError: Based on the dataset structure, it contains two columns: `ds` (time column) and `y` (target column). This indicates that it is a univariate time series.

{"output": "univariate"}
file: melbourne_temp.csv
file: nyc_taxi.csv
JSONDecodeError: The dataset contains two columns: "time" and "#Passengers". This indicates that it is a univariate time series, as there is only one target variable being measured over time.

{"output": "univariate"}
Question file: hard_questions.csv
file: air_passengers.csv
file: melbourne_temp.csv
file: nyc_taxi.csv


In [7]:
# eval
df_result = pd.DataFrame(df_result)

df_result["answer_pred"] = df_result["org_answer_pred"].apply(
    lambda x: convert_types(x)
)
df_result["answer_true"] = df_result["org_answer_true"].apply(
    lambda x: convert_types(x)
)

# loop through each file
eval(df=df_result, details=True)

Question file: easy_questions.csv; Dataset File: air_passengers.csv; Accuracy: 0.9
question: What is the target column?
answer_pred: there are no missing values in the `y` column.

the target column is `y`.

{"output": "y"}
answer_true: y
messages: user: What is the target column?
assistant: To identify the target column, I will first load the dataset and inspect the column names and data types. Let's start by loading the dataset and displaying the first few rows.
assistant: The dataset contains two columns: `ds` and `y`. Based on the column names and the data, it appears that `ds` is the time column and `y` is the target column.

Let's confirm the data types of these columns to ensure they are appropriate for time series analysis.
assistant: The `ds` column is of type `object`, which needs to be converted to datetime format. The `y` column is of type `int64`, which is appropriate for the target column.

Let's proceed with the conversion of the `ds` column to datetime format and check 

In [8]:
# check detailed observability metrics
df_result.groupby(["question_file", "dataset_file"])[
    ["total_tokens", "execution_time_s"]
].describe()[[("total_tokens", "mean"), ("execution_time_s", "mean")]].round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_tokens,execution_time_s
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean
question_file,dataset_file,Unnamed: 2_level_2,Unnamed: 3_level_2
easy_questions.csv,air_passengers.csv,2644.2,7.82
easy_questions.csv,melbourne_temp.csv,1544.1,7.21
easy_questions.csv,nyc_taxi.csv,2269.5,7.8
hard_questions.csv,air_passengers.csv,3743.2,12.99
hard_questions.csv,melbourne_temp.csv,3644.8,13.09
hard_questions.csv,nyc_taxi.csv,3547.9,12.85
medium_questions.csv,air_passengers.csv,3126.1,10.21
medium_questions.csv,melbourne_temp.csv,2627.1,10.5
medium_questions.csv,nyc_taxi.csv,2697.4,10.13


In [9]:
# check overall observability metrics
df_result.describe().round(2)

Unnamed: 0,completion_tokens,prompt_tokens,total_tokens,execution_time_s_in_code_interpreter,execution_time_s
count,90.0,90.0,90.0,90.0,90.0
mean,287.79,2583.8,2871.59,8.79,10.29
std,121.18,1127.09,1220.14,3.34,3.44
min,72.0,808.0,885.0,3.0,4.8
25%,210.75,1588.75,1808.25,7.0,7.93
50%,299.5,2796.0,3123.0,8.0,9.59
75%,376.0,2964.0,3332.25,11.0,12.44
max,548.0,6301.0,6847.0,23.0,24.28


In [10]:
with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(df_result.groupby(["dataset_file"]).describe())

Unnamed: 0_level_0,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s_in_code_interpreter,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
dataset_file,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2
air_passengers.csv,30.0,300.166667,109.862636,77.0,251.75,310.0,377.5,546.0,30.0,2871.0,1206.359556,812.0,2624.5,2785.0,2898.25,6301.0,30.0,3171.166667,1286.059367,889.0,2890.25,3105.0,3300.5,6847.0,30.0,9.066667,3.226328,4.0,7.0,8.0,11.0,17.0,30.0,10.342,3.272943,5.4,8.29,9.575,12.4125,18.42
melbourne_temp.csv,30.0,278.1,134.010383,72.0,204.75,257.5,372.5,546.0,30.0,2327.233333,1048.610926,808.0,1551.75,2726.5,2883.5,4887.0,30.0,2605.333333,1167.008357,885.0,1756.5,3019.5,3274.0,5433.0,30.0,8.3,3.14204,4.0,6.25,8.0,10.0,16.0,30.0,10.263333,3.478243,4.8,7.6975,9.54,13.06,17.08
nyc_taxi.csv,30.0,285.1,121.549295,77.0,205.25,306.0,362.0,548.0,30.0,2553.166667,1090.561705,820.0,1641.25,2886.5,3025.75,6035.0,30.0,2838.266667,1177.16925,901.0,1852.75,3203.5,3431.5,6303.0,30.0,9.0,3.695291,3.0,7.0,9.0,10.75,23.0,30.0,10.258333,3.666827,4.91,8.235,9.66,11.835,24.28


## Run one question

In [11]:
# list all assistants
assistant.list_all_assistants()

Unnamed: 0,id,created_at,description,instructions,metadata,model,name,object,tools,response_format,temperature,tool_resources,top_p
0,asst_zEURRNR06p0UK9YYlNLNdDPt,1747185462,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_nyc_taxi,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
1,asst_yb8Ab2f244pJlmrZQjn8uTCO,1747185389,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_melbourne_temp,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
2,asst_WHmQuNTDclu5t75pTbZh48bT,1747185310,,You are a data scientist specializing in time ...,{},gpt-4o,code_interpreter_air_passengers,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0
3,asst_vOt2vULsZnzSZfqASNWCA1uv,1735007676,,You are a data scientist in univariate time se...,{},gpt-4o,customized_func_nyc_taxi,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
4,asst_qXfVisu4RHEIYa5Qs4L3I1Xa,1735007596,,You are a data scientist in univariate time se...,{},gpt-4o,customized_func_melbourne_temp,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
5,asst_kFyF79QuX27kMYRnguWb80n9,1735007502,,You are a data scientist in univariate time se...,{},gpt-4o,customized_func_air_passengers,assistant,[{'function': {'name': 'get_time_col_and_targe...,auto,0.0,{},1.0
6,asst_x4c0gSGzdmoljHa4DrmZjPZO,1734135165,,You are a python expert in univariate time ser...,{},gpt-4o,code_interpreter_nyc_taxi_plot,assistant,[{'type': 'code_interpreter'}],auto,0.0,{'code_interpreter': {'file_ids': ['assistant-...,1.0


In [12]:
question = "What is the target column?"
assistant_id = "asst_VGmOogPrbDPXHIJeUbVmWWzZ"
print(
    f'answer_true: {df_questions[df_questions["question"] == question]["nyc_taxi.csv"].values[0]}'
)
result = assistant.ask_a_question(question=question, assistant_id=assistant_id)
result

IndexError: index 0 is out of bounds for axis 0 with size 0

## Appendix: Generate an image

In [2]:
prompt_path = "prompts/draw_picture.jinja2"
question = "Generate a box plot of the target column using seaborn with text annotation for min, max, q1, q3, and median."
file_path = Path(DATA_DIR) / "nyc_taxi.csv"
file_id = assistant.upload_or_retrieve_file(file_path=file_path)

NameError: name 'Path' is not defined

In [None]:
assistant_id = assistant.create_or_retrieve(
    assistant_name=f"{ASSISTANT_NAME_PREFIX}_{file_path.stem}_plot",
    prompt_path=prompt_path,
    tools=[{"type": "code_interpreter"}],
    tool_resources={"code_interpreter": {"file_ids": [file_id]}},
)
result = assistant.ask_a_question(question=question, assistant_id=assistant_id)

# print the output
print(f'messages: {"\n".join(result["messages"])}')
print(f'steps: {result["steps"]}')

In [None]:
img = result["attachments"][0]
with open(img["file_name"], "wb") as file:
    file.write(img["file_bytes"])
Image(filename=img["file_name"], width=1000)