This notebooks shows how to use pure prompt to get the answer for each question.
- steps:
  - inject the data and the question into the prompt.
  - call the LLM
  - get the answer from LLM
- Note:  [`nyc_taxi.csv`](../../data/nyc_taxi.csv) cannot be fitted into the prompt due to the token limit

In [1]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import pandas as pd
import os
from jinja2 import Environment, FileSystemLoader
from pathlib import Path
import time
import sys
import json

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.utils import convert_types, eval
from utils.vars import DATA_DIR, DATASET_FILES, QUESTION_FILES, MODEL_ARGS

load_dotenv()

True

In [2]:
# get the prompt template
prompt_path = "prompts/prompt.jinja2"
# get the client object
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2025-03-01-preview",  # different from assistant
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
)

In [3]:
# read the prompt
instruction = (
    Environment(loader=FileSystemLoader(".")).get_template(prompt_path).render()
)

df_result = []

# loop over the question files
for question_path in QUESTION_FILES:
    question_path = Path(question_path)
    print(f"Question file: {question_path.name}")
    # read questions
    df_questions = pd.read_csv(DATA_DIR / question_path)
    # loop through each csv file
    for dataset_path in DATASET_FILES:
        dataset_path = Path(dataset_path)
        if dataset_path.name in ["nyc_taxi.csv"]:
            continue
        print(f"file: {dataset_path.name}")
        # read the data
        df = pd.read_csv(DATA_DIR / dataset_path)

        # call openai
        for _, row in df_questions.iterrows():
            question = row["question"]
            answer_true = row[dataset_path.name]

            start_time = time.time()

            response = client.chat.completions.create(
                **{
                    **MODEL_ARGS,
                    "seed": 42,
                    "messages": [
                        {"role": "system", "content": instruction},
                        {
                            "role": "user",
                            "content": f"Here is the dataset in the markdown format. {df.to_markdown()}",
                        },
                        {"role": "user", "content": question},
                    ],
                }
            )

            # format the output
            try:
                processed_response = response.choices[0].message.content
                processed_response = (
                    processed_response.replace("```json", "").replace("```", "").strip()
                )
                answer_pred = json.loads(processed_response)["output"]
            except json.decoder.JSONDecodeError:
                answer_pred = None
                print(
                    f"Original output: {response.choices[0].message.content}; JSONDecodeError: {answer_pred}"
                )

            df_result.append(
                {
                    **response.usage.to_dict(),
                    "question_file": question_path.name,
                    "question": question,
                    "execution_time_s": round(time.time() - start_time, 2),
                    "dataset_file": dataset_path.name,
                    "org_answer_pred": answer_pred,
                    "org_answer_true": answer_true,
                }
            )

Question file: easy_questions.csv
file: air_passengers.csv


APITimeoutError: Request timed out.

In [4]:
# eval
df_result = pd.DataFrame(df_result)

In [5]:
df_result["answer_pred"] = df_result["org_answer_pred"].apply(
    lambda x: convert_types(x)
)
df_result["answer_true"] = df_result["org_answer_true"].apply(
    lambda x: convert_types(x)
)

In [7]:
# check overall observability metrics
df_result.describe().round(2)

Unnamed: 0,completion_tokens,prompt_tokens,total_tokens,execution_time_s
count,60.0,60.0,60.0,60.0
mean,7.9,35801.83,35809.73,3.55
std,1.22,33599.67,33599.88,9.81
min,7.0,2477.0,2484.0,0.46
25%,7.0,2482.75,2490.0,0.53
50%,7.0,35805.5,35812.5,1.19
75%,9.0,69119.25,69128.0,1.41
max,12.0,69134.0,69143.0,47.5


In [8]:
# check detailed observability metrics
df_result.groupby(["question_file", "dataset_file"])[
    ["total_tokens", "execution_time_s"]
].describe()[[("total_tokens", "mean"), ("execution_time_s", "mean")]].round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,total_tokens,execution_time_s
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean
question_file,dataset_file,Unnamed: 2_level_2,Unnamed: 3_level_2
easy_questions.csv,air_passengers.csv,2488.6,0.59
easy_questions.csv,melbourne_temp.csv,69126.3,6.58
hard_questions.csv,air_passengers.csv,2491.6,0.57
hard_questions.csv,melbourne_temp.csv,69128.6,6.5
medium_questions.csv,air_passengers.csv,2492.9,0.58
medium_questions.csv,melbourne_temp.csv,69130.4,6.46


In [9]:
df_result[df_result["answer_pred"].isna()].groupby(
    ["question_file", "dataset_file"]
).size()

Series([], dtype: int64)

In [10]:
# loop through each file
eval(df=df_result, details=False)

Question file: easy_questions.csv; Dataset File: air_passengers.csv; Accuracy: 0.9
question: What is the standard deviation of the target variable?
answer_pred: 140.62
answer_true: 119.97
**************************************************
Question file: easy_questions.csv; Dataset File: melbourne_temp.csv; Accuracy: 0.7
question: What is the mean of the target variable?
answer_pred: 13.34
answer_true: 11.18
**************************************************
question: What is the standard deviation of the target variable?
answer_pred: 4.31
answer_true: 4.07
**************************************************
question: What is the Q1 of the target variable?
answer_pred: 10.1
answer_true: 8.3
**************************************************
Question file: medium_questions.csv; Dataset File: air_passengers.csv; Accuracy: 0.6
question: what is the typical value?
answer_pred: 280.25
answer_true: 280.3
**************************************************
question: Give me the total amount when

In [11]:
with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(df_result.groupby(["dataset_file"]).describe())

Unnamed: 0_level_0,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,completion_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,prompt_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,total_tokens,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s,execution_time_s
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
dataset_file,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2
air_passengers.csv,30.0,7.7,1.178836,7.0,7.0,7.0,8.75,12.0,30.0,2483.333333,4.751285,2477.0,2481.0,2482.5,2484.75,2497.0,30.0,2491.033333,5.054758,2484.0,2488.0,2490.0,2493.0,2504.0,30.0,0.581,0.179949,0.46,0.5,0.53,0.56,1.26
melbourne_temp.csv,30.0,8.1,1.241523,7.0,7.0,8.0,9.0,12.0,30.0,69120.333333,4.751285,69114.0,69118.0,69119.5,69121.75,69134.0,30.0,69128.433333,5.103639,69121.0,69125.0,69128.0,69130.0,69143.0,30.0,6.512,13.327872,1.17,1.3125,1.415,3.3425,47.5
