This notebooks shows how to use pure prompt to get the answer for each question.
- steps:
  - inject the data and the question into the prompt.
  - call the LLM
  - get the answer from LLM
- Note:  [`nyc_taxi.csv`](../../data/nyc_taxi.csv) cannot be fitted into the prompt due to the token limit

In [None]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import pandas as pd
import os
from jinja2 import Environment, FileSystemLoader
from pathlib import Path
import time
import sys
import json

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

from utils.utils import convert_types, eval
from utils.vars import DATA_DIR, DATASET_FILES, QUESTION_FILES, MODEL_ARGS

load_dotenv()

In [None]:
# get the prompt template
prompt_path = "prompts/prompt.jinja2"
# get the client object
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version="2025-03-01-preview",  # different from assistant
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
)

In [None]:
# read the prompt
instruction = (
    Environment(loader=FileSystemLoader(".")).get_template(prompt_path).render()
)

df_result = []

# loop over the question files
for question_path in QUESTION_FILES:
    question_path = Path(question_path)
    print(f"Question file: {question_path.name}")
    # read questions
    df_questions = pd.read_csv(DATA_DIR / question_path)
    # loop through each csv file
    for dataset_path in DATASET_FILES:
        dataset_path = Path(dataset_path)
        if dataset_path.name in ["nyc_taxi.csv"]:
            continue
        print(f"file: {dataset_path.name}")
        # read the data
        df = pd.read_csv(DATA_DIR / dataset_path)

        # call openai
        for _, row in df_questions.iterrows():
            question = row["question"]
            answer_true = row[dataset_path.name]

            start_time = time.time()

            response = client.chat.completions.create(
                **{
                    **MODEL_ARGS,
                    "seed": 42,
                    "messages": [
                        {"role": "system", "content": instruction},
                        {
                            "role": "user",
                            "content": f"Here is the dataset in the markdown format. {df.to_markdown()}",
                        },
                        {"role": "user", "content": question},
                    ],
                }
            )

            # format the output
            try:
                processed_response = response.choices[0].message.content
                processed_response = (
                    processed_response.replace("```json", "").replace("```", "").strip()
                )
                answer_pred = json.loads(processed_response)["output"]
            except json.decoder.JSONDecodeError:
                answer_pred = None
                print(
                    f"Original output: {response.choices[0].message.content}; JSONDecodeError: {answer_pred}"
                )

            df_result.append(
                {
                    **response.usage.to_dict(),
                    "question_file": question_path.name,
                    "question": question,
                    "execution_time_s": round(time.time() - start_time, 2),
                    "dataset_file": dataset_path.name,
                    "org_answer_pred": answer_pred,
                    "org_answer_true": answer_true,
                }
            )

In [None]:
# eval
df_result = pd.DataFrame(df_result)

In [None]:
df_result["answer_pred"] = df_result["org_answer_pred"].apply(
    lambda x: convert_types(x)
)
df_result["answer_true"] = df_result["org_answer_true"].apply(
    lambda x: convert_types(x)
)

In [None]:
# check overall observability metrics
df_result.describe().round(2)

In [None]:
# check detailed observability metrics
df_result.groupby(["question_file", "dataset_file"])[
    ["total_tokens", "execution_time_s"]
].describe()[[("total_tokens", "mean"), ("execution_time_s", "mean")]].round(2)

In [None]:
df_result[df_result["answer_pred"].isna()].groupby(
    ["question_file", "dataset_file"]
).size()

In [None]:
# loop through each file
eval(df=df_result, details=False)

In [None]:
with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(df_result.groupby(["dataset_file"]).describe())