In [1]:
from openai import OpenAI
import pandas as pd
import wandb
import os
import time
from tqdm import tqdm

In [2]:
data_file = "golden_test_data.csv"

df = pd.read_csv(data_file)

In [3]:
from dotenv import load_dotenv

load_dotenv()

True

In [4]:
api_key = os.getenv("OPENAI_API_KEY")

In [5]:
# ✅ Init W&B
# run = wandb.init(
#     project="chatbot-openai-multi-prompt-eval",
#     name="chatbot-multimetric-eval",
#     config={
#         "model": "gpt-4.1-mini",
#         "eval_model": "gpt-4.1-mini",
#         "temperature": 0.7,
#         "max_tokens": 500
#     },
#     save_code=True
# )
# config = wandb.config

wandb: Currently logged in as: aayush-drishte (aayush-drishte-tredence) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


In [6]:
MODEL_PRICES = {
    "gpt-3.5-turbo": 0.0015,
    
    "gpt-4.1-mini": 0.00015,
    "gpt-4.1-nano": 0.00000525
}

In [7]:
df.head()

Unnamed: 0.1,Unnamed: 0,persona,category,customer_query,bot_response,correctness,helpfulness,tone,safety
0,62,polite user,Account Access,"Hi, I’m having trouble logging into my account...",Hello! I’m sorry to hear you’re having trouble...,5,5,5,5
1,41,polite user,Billing,"Hello, I noticed an unexpected charge on my bi...",Hello! I’m sorry to hear about the unexpected ...,5,5,5,5
2,46,confused first-time user,Account Access,"Hi, I just signed up but I can’t seem to log i...",I’m sorry to hear you’re having trouble loggin...,5,5,5,5
3,20,frustrated user,Technical Issue,I've been trying to upload my files for the pa...,I'm really sorry to hear you're having trouble...,5,5,5,5
4,5,angry user,Account Access,I’ve been trying to log into my account for ov...,I’m really sorry for the frustration this is c...,5,5,5,5


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   Unnamed: 0      20 non-null     int64 
 1   persona         20 non-null     object
 2   category        20 non-null     object
 3   customer_query  20 non-null     object
 4   bot_response    20 non-null     object
 5   correctness     20 non-null     int64 
 6   helpfulness     20 non-null     int64 
 7   tone            20 non-null     int64 
 8   safety          20 non-null     int64 
dtypes: int64(5), object(4)
memory usage: 1.5+ KB


In [9]:
#loading test samples for prompt trials

CUSTOMER_QUERIES = [i for i in df['customer_query']]
CUSTOMER_QUERIES

['Hi, I’m having trouble logging into my account. It keeps saying my password is incorrect, but I’m sure I’m entering the right one. Can you help me regain access?',
 'Hello, I noticed an unexpected charge on my bill this month. Could you please help me understand what it is and how to resolve it?',
 'Hi, I just signed up but I can’t seem to log into my account. It says my password is incorrect, but I’m sure I typed it right. What should I do?',
 'I\'ve been trying to upload my files for the past hour, but every time I get an error message saying "Upload failed." I\'ve checked my internet connection and it\'s working fine. This is really frustrating because I have a deadline. Can you please help me fix this?',
 'I’ve been trying to log into my account for over an hour and it keeps saying my password is wrong! I KNOW it’s right. Why is this happening? I need access NOW!',
 'Hi, I recently noticed that my subscription was renewed, but I’m not sure I meant to continue it. Is there any way

In [10]:
PROMPT_VARIANTS = {
   
    "bare-minimum": "Answer this: {customer_query}",
    "no-context": "Reply to a customer: {customer_query}",
    "basic-support": "You are a support agent. {customer_query}",
    "no-tone": "You're a chatbot. Help the customer with their question: {customer_query}",
    "robotic": "Provide a formal and structured answer to this: {customer_query}",

    "ok-tone": "Answer politely and clearly: {customer_query}",
    "good-clarity": "You are a helpful support agent. Assist this user: {customer_query}",
    "with-role": "You are a customer service rep. Here's a query: {customer_query}",
    "contextualized": "You are a support agent for an e-commerce platform. Help the customer: {customer_query}",
    "tone-aware": "Respond empathetically and clearly to this user question: {customer_query}",

    
    "context-tone": "You’re a friendly and knowledgeable support agent. Help with this query: {customer_query}",
    "product-aware": "You support an online subscription service. Respond empathetically and informatively: {customer_query}",
    "task-aware": "You're an expert in billing, subscriptions, and tech help. Help the customer: {customer_query}",
    "persona-aware": "You support frustrated customers. Respond helpfully: {customer_query}",
    "rich-context": "You're a support agent for a SaaS product. Be empathetic and precise: {customer_query}",
    "top-tier": "You are an award-winning support agent. Give clear, helpful, warm answers: {customer_query}",
    "holistic": "Respond with a helpful solution, emotional tone, and safe advice to: {customer_query}",

    
    "cot-clarification": 
        """You are a support agent. First, think about what the customer's question is asking. 
        Then break the problem down and respond in a step-by-step manner.\n\n
        Customer Query: {customer_query}"""
    ,
    "cot-safety-check": """
        Think carefully before answering. First, consider if the query involves any sensitive or risky topics. 
        Then, answer in a polite and safe way.\n\nCustomer Query: {customer_query}
   """,

    "fewshot-basic": """
        You are a customer support agent. Here are examples of good responses:\n\n"
        Example 1:\nCustomer: I can't log in to my account.\nSupport: I'm sorry you're having trouble logging in. Let's get this fixed. Can you try resetting your password using the 'Forgot Password' link?\n\n
        Example 2:\nCustomer: I was charged twice for my subscription.\nSupport: I'm really sorry about that. I've checked your account and see the duplicate charge. I've issued a refund, and you should see it within 3–5 business days.\n\n
        Now answer this query:\nCustomer: {customer_query}"""
    ,
    "fewshot-tone-aware": """
        You are a warm, empathetic support agent. Learn from the examples:\n\n
        Example 1:\nCustomer: My package is late.\nSupport: I completely understand how frustrating that is. Let me check on the status for you and get it sorted right away.\n\n
        Example 2:\nCustomer: I'm getting error 403.\nSupport: That error usually means there's a permission issue. Could you try logging out and back in?\n\n
        Now handle this:\nCustomer: {customer_query}"""
    ,
    "fewshot-with-cot": """
        You are a thoughtful support agent. Here’s how to handle queries step-by-step:\n\n
        Example:\nCustomer: I want to cancel my subscription.\nSupport: First, check if the user is on a free or paid plan. Then, provide the cancellation link or instructions. Respond with empathy.\n\n
        Now apply the same reasoning:\nCustomer: {customer_query}"""
    ,

 
    "optimized": """
        "You're a highly trained support rep for a SaaS company. Read the customer's question carefully, think step-by-step if needed, and answer clearly, empathetically, and safely.\n\n
        "Customer: {customer_query}"""
    ,
    "Finalized_prompt": """
        You're a customer support expert at a subscription-based SaaS company. First, think about the intent of the query. 
        Make sure your answer is:\n1. Accurate\n2. Helpful\n3. Polite and empathetic\n4. Safe\n\n
        Then write your response:\n\nCustomer: {customer_query}"""
    
}


In [11]:


def call_chat(client,model, messages, temperature=0.7, max_tokens=500):
    start = time.time()
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
        max_tokens=max_tokens
    )
    latency = time.time() - start
    usage = response.usage
    content = response.choices[0].message.content
    cost = (
        (usage.prompt_tokens / 1000) * MODEL_PRICES[model] +
        (usage.completion_tokens / 1000) * MODEL_PRICES.get(f"{model}-completion", MODEL_PRICES[model])
    )
    time.sleep(0.7)
    return content.strip(), usage, cost, latency

In [12]:
client = OpenAI(api_key=api_key)

model = "gpt-4.1-mini"

In [13]:
for variant_name, prompt_template in tqdm(PROMPT_VARIANTS.items()):
    run = wandb.init(project="customer-support-prompt-experiments",
               name=f"{variant_name}")
    table = wandb.Table(columns=["customer_query",
                "prompt",
                "output_content",
                "latency",
                "completion_tokens",
                "prompt_tokens",
                "total_tokens",
                "cost"
    ])
    for i, customer_query in enumerate(CUSTOMER_QUERIES):
        filled_prompt = prompt_template.format(customer_query=customer_query)
        messages = [{"role":"user","content":filled_prompt}]
        content, usage, cost, latency = call_chat(client, model, messages)
        completion_token=usage.completion_tokens
        prompt_tokens = usage.prompt_tokens
        total_tokens = usage.total_tokens
        # print(type(content), type(usage), type(cost), type(latency))
        table.add_data(customer_query, filled_prompt, content, latency,completion_token,prompt_tokens,total_tokens,cost)
        logg = {#"customer_query":customer_query,
                #"prompt":filled_prompt,
                #"output_content":content,
                "latency":latency,
                "completion_tokens":completion_token,
                "prompt_tokens":prompt_tokens,
                "total_tokens":total_tokens,
                "cost":cost}
        wandb.log(logg)
        run.log({"prompt_table":table})
        
    wandb.finish()
    
        
        
  

  0%|                                                                                           | 0/24 [00:00<?, ?it/s]

0,1
completion_tokens,▅▂▃█▅▂▁▁▁▄▇▁▄▄▂▄▃▁▁▂
cost,▅▂▃█▅▂▂▂▂▄▇▁▄▄▃▄▄▁▂▂
latency,▆▂▃█▅▂▁▁▁▃▇▁▃▄▃▄▃▁▁▂
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▅▂▃█▅▂▂▂▂▄▇▁▄▄▃▄▄▁▂▂

0,1
completion_tokens,82.0
cost,2e-05
latency,2.19281
prompt_tokens,45.0
total_tokens,127.0


  4%|███▍                                                                               | 1/24 [01:30<34:30, 90.02s/it]

0,1
completion_tokens,▆▃▁▇▇▄▂▅▅▅▆▄▅▆█▂▇▁▅▄
cost,▅▃▂█▆▄▂▅▅▅▆▃▅▆█▃▇▁▄▄
latency,█▂▁██▂▂█▅▆▅▂▃▅▅▄▆▃▂▄
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▅▃▂█▆▄▂▅▅▅▆▃▅▆█▃▇▁▄▄

0,1
completion_tokens,103.0
cost,2e-05
latency,2.58921
prompt_tokens,47.0
total_tokens,150.0


  8%|██████▉                                                                            | 2/24 [02:45<29:54, 81.55s/it]

0,1
completion_tokens,▂▂▆▇▅▂▁▃▃▅█▁▆█▆▃▄▁▂▃
cost,▃▂▆▇▆▃▂▃▃▅█▁▆█▆▃▅▁▂▄
latency,▁▁▃▅▄▁▁▂▂▂▄▁█▆▅▂▃▁▂▃
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▂▆▇▆▃▂▃▃▅█▁▆█▆▃▅▁▂▄

0,1
completion_tokens,97.0
cost,2e-05
latency,3.11908
prompt_tokens,48.0
total_tokens,145.0


 12%|██████████▍                                                                        | 3/24 [03:59<27:20, 78.13s/it]

0,1
completion_tokens,▅▂▄█▅▃▂▂▂▆█▁▅█▆▄▄▁▁▅
cost,▅▂▄█▅▃▂▂▃▅▇▁▅▇▆▄▅▁▂▄
latency,▄▃▄▆▇▃▁▂▂▅█▁▅▇▄▃▃▂▁▄
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▅▂▄█▅▃▂▂▃▅▇▁▅▇▆▄▅▁▂▄

0,1
completion_tokens,131.0
cost,3e-05
latency,2.70628
prompt_tokens,53.0
total_tokens,184.0


 17%|█████████████▊                                                                     | 4/24 [05:12<25:21, 76.08s/it]

0,1
completion_tokens,▃▂▄▄▅▂▁▂▂▃█▁▃▅▄▃▃▃▁▂
cost,▃▂▄▅▅▂▁▂▂▃█▁▄▅▄▃▄▂▁▂
latency,▃▂▄▂▄▁▁▃▄▃█▁▂▅▄▃▄▄▂▄
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▂▄▅▅▂▁▂▂▃█▁▄▅▄▃▄▂▁▂

0,1
completion_tokens,169.0
cost,3e-05
latency,5.55415
prompt_tokens,51.0
total_tokens,220.0


 21%|█████████████████▎                                                                 | 5/24 [07:08<28:38, 90.45s/it]

0,1
completion_tokens,▂▂▄▆▄▃▂▃▆▂▆▂▅█▅▃▃▁▂▅
cost,▃▂▄▇▄▄▂▄▆▃▆▂▆█▆▃▅▁▃▅
latency,▃▂▄█▄▃▃▃▇▂▆▃▄█▇▂▅▁▃▄
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▂▄▇▄▄▂▄▆▃▆▂▆█▆▃▅▁▃▅

0,1
completion_tokens,107.0
cost,2e-05
latency,2.11365
prompt_tokens,47.0
total_tokens,154.0


 25%|████████████████████▊                                                              | 6/24 [08:10<24:15, 80.83s/it]

0,1
completion_tokens,▄▁▂▆▄▁▁▃▂▄█▁▃▅▄▄▂▁▁▃
cost,▄▂▃▇▄▂▁▃▂▄█▁▃▆▄▄▃▁▂▃
latency,▃█▃▅▄▁▁▂▂▄▇▂▂▅▃▃▃▁▁▂
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▄▂▃▇▄▂▁▃▂▄█▁▃▆▄▄▃▁▂▃

0,1
completion_tokens,107.0
cost,2e-05
latency,2.35062
prompt_tokens,53.0
total_tokens,160.0


 29%|████████████████████████▏                                                          | 7/24 [09:32<23:01, 81.28s/it]

0,1
completion_tokens,▄▂▅▆▄▂▂▂▃▃█▁▅▆▅▄▄▁▂▄
cost,▄▂▅▇▅▂▂▃▃▄█▁▅▆▅▄▅▁▂▄
latency,▂▂▄▄▄▂▂▂▄▅▇▁█▅▃▄▃▁▂▃
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▄▂▅▇▅▂▂▃▃▄█▁▅▆▅▄▅▁▂▄

0,1
completion_tokens,115.0
cost,3e-05
latency,2.68166
prompt_tokens,53.0
total_tokens,168.0


 33%|███████████████████████████▋                                                       | 8/24 [10:51<21:27, 80.45s/it]

0,1
completion_tokens,▄▂▄█▅▂▁▂▂▅▇▁▄▇▅▃▃▁▁▃
cost,▄▂▄█▅▂▁▂▂▅▇▁▄▇▅▄▄▁▁▃
latency,▅▂▄█▆▂▁▂▂▄█▁▅▇▅▄▃▁▁▃
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▄▂▄█▅▂▁▂▂▅▇▁▄▇▅▄▄▁▁▃

0,1
completion_tokens,104.0
cost,2e-05
latency,2.44091
prompt_tokens,57.0
total_tokens,161.0


 38%|███████████████████████████████▏                                                   | 9/24 [12:06<19:41, 78.79s/it]

0,1
completion_tokens,▃▂▃█▆▂▂▂▃▆▇▁▃▃▃▃▆▁▁▃
cost,▃▂▃█▆▃▂▃▃▆▇▁▄▃▄▃▇▁▂▃
latency,▂▂▃▄▃▂▂▃▃▄█▁▂▂▂▂▁▁▁▂
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▂▃█▆▃▂▃▃▆▇▁▄▃▄▃▇▁▂▃

0,1
completion_tokens,100.0
cost,2e-05
latency,2.41638
prompt_tokens,52.0
total_tokens,152.0


 42%|██████████████████████████████████▏                                               | 10/24 [13:16<17:42, 75.88s/it]

0,1
completion_tokens,▄▂▄█▄▂▁▂▂▄█▁▄▄▄▄▄▁▂▃
cost,▄▂▄█▄▂▁▂▂▄▇▁▅▄▄▄▄▁▂▃
latency,▅▂▄▇▄▂▁▂▃▄█▂▄▄▃▄▄▁▂▃
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▄▂▄█▄▂▁▂▂▄▇▁▅▄▄▄▄▁▂▃

0,1
completion_tokens,128.0
cost,3e-05
latency,2.78043
prompt_tokens,56.0
total_tokens,184.0


 46%|█████████████████████████████████████▌                                            | 11/24 [14:41<17:03, 78.71s/it]

0,1
completion_tokens,▃▂▄█▅▃▂▂▃▄▆▁▃▆▄▃▆▂▂▄
cost,▃▂▄█▅▃▂▃▃▄▆▁▃▆▄▃▆▂▂▄
latency,▃▂▃▅▃▂▁▃▂▃▃▁▃▅▂▄█▂▃▂
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▂▄█▅▃▂▃▃▄▆▁▃▆▄▃▆▂▂▄

0,1
completion_tokens,121.0
cost,3e-05
latency,2.47914
prompt_tokens,56.0
total_tokens,177.0


 50%|█████████████████████████████████████████                                         | 12/24 [15:59<15:41, 78.47s/it]

0,1
completion_tokens,▄▁▄█▅▃▁▃▂▄▆▁▅▇▅▄▃▁▁▅
cost,▄▁▄█▅▃▂▃▂▄▆▁▅▆▅▄▄▁▁▅
latency,▅▁▅█▄▃▂▂▂▅█▁▆▆▃▄▃▁▂▇
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▄▁▄█▅▃▂▃▂▄▆▁▅▆▅▄▄▁▁▅

0,1
completion_tokens,216.0
cost,4e-05
latency,5.7304
prompt_tokens,58.0
total_tokens,274.0


 54%|████████████████████████████████████████████▍                                     | 13/24 [17:27<14:56, 81.52s/it]

0,1
completion_tokens,▃▂▃█▅▂▂▂▃▃▇▁▄▆▂▃▄▁▂▃
cost,▃▂▃█▅▂▂▃▃▃▇▁▅▆▃▃▅▁▂▃
latency,▄▂▄█▇▃▂▆▃▄█▁▄▆▃▄▄▁▂▃
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▂▃█▅▂▂▃▃▃▇▁▅▆▃▃▅▁▂▃

0,1
completion_tokens,95.0
cost,2e-05
latency,2.21271
prompt_tokens,51.0
total_tokens,146.0


 58%|███████████████████████████████████████████████▊                                  | 14/24 [18:39<13:06, 78.66s/it]

0,1
completion_tokens,▄▂▃█▆▂▂▂▄▃▄▁▅▄▆▃▃▂▂▃
cost,▄▂▃█▆▃▂▂▄▃▄▁▅▄▆▃▄▁▂▃
latency,▅▂▄▆█▂▂▂▃▁▆▂▅▃▃▁▁▂▁▃
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▄▂▃█▆▃▂▂▄▃▄▁▅▄▆▃▄▁▂▃

0,1
completion_tokens,110.0
cost,3e-05
latency,2.42585
prompt_tokens,58.0
total_tokens,168.0


 62%|███████████████████████████████████████████████████▎                              | 15/24 [19:45<11:13, 74.87s/it]

0,1
completion_tokens,▃▂▄█▅▃▂▄▃▄▇▁▅▇▄▅▃▁▂▄
cost,▃▂▄█▅▃▂▄▃▄▇▁▅▇▅▅▄▁▂▄
latency,▂▂▅█▃▁▃▁▁▅▂▂▆▄▂▄▂▁▂▄
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▂▄█▅▃▂▄▃▄▇▁▅▇▅▅▄▁▂▄

0,1
completion_tokens,151.0
cost,3e-05
latency,3.41868
prompt_tokens,58.0
total_tokens,209.0


 67%|██████████████████████████████████████████████████████▋                           | 16/24 [20:58<09:53, 74.13s/it]

0,1
completion_tokens,▅▂▄█▇▂▂▄▆▆█▄▄▇▆▅█▁▁▃
cost,▅▂▄█▇▂▂▄▆▅▇▃▄▆▆▅█▁▂▃
latency,▄▂▃▅█▂▁▆▆▃▅▆▂▄█▃▇▂▁▂
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▅▂▄█▇▂▂▄▆▅▇▃▄▆▆▅█▁▂▃

0,1
completion_tokens,158.0
cost,3e-05
latency,1.93479
prompt_tokens,56.0
total_tokens,214.0


 71%|██████████████████████████████████████████████████████████                        | 17/24 [22:19<08:54, 76.39s/it]

0,1
completion_tokens,▅▂▄▇▆▂▂▂▂▅█▁▃▅▇▅▅▄▂▅
cost,▅▂▄▇▆▂▂▃▃▅█▁▃▅▇▅▆▃▂▅
latency,▇▂▄▅▅▂▄▃▂▄▅▁▃▅█▄▄▅▃▅
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▅▂▄▇▆▂▂▃▃▅█▁▃▅▇▅▆▃▂▅

0,1
completion_tokens,318.0
cost,6e-05
latency,5.54172
prompt_tokens,79.0
total_tokens,397.0


 75%|█████████████████████████████████████████████████████████████▌                    | 18/24 [24:18<08:55, 89.18s/it]

0,1
completion_tokens,▆▂▄▅▃▄▁▂▃▄█▁▄█▃▅▃▁▂▂
cost,▆▂▄▆▃▄▂▃▃▄█▁▅▇▄▅▄▁▂▂
latency,▄▁▅▅▂▃▁▁▃▃▅▂▅█▆▃▂▁▂▁
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▆▂▄▆▃▄▂▃▃▄█▁▅▇▄▅▄▁▂▂

0,1
completion_tokens,76.0
cost,2e-05
latency,1.92259
prompt_tokens,78.0
total_tokens,154.0


 79%|████████████████████████████████████████████████████████████████▉                 | 19/24 [25:35<07:06, 85.36s/it]

0,1
completion_tokens,▃▃▃▆▄▃▁▅▄▃▆▁▇▇█▃█▂▁▄
cost,▃▂▃▆▄▃▁▄▄▃▅▁▆▆▇▃█▁▂▃
latency,▂▄▃▃▃▂▁▃▃▂█▂▆▆█▃▆▂▂▇
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▂▃▆▄▃▁▄▄▃▅▁▆▆▇▃█▁▂▃

0,1
completion_tokens,64.0
cost,3e-05
latency,1.87681
prompt_tokens,162.0
total_tokens,226.0


 83%|████████████████████████████████████████████████████████████████████▎             | 20/24 [26:22<04:55, 73.85s/it]

0,1
completion_tokens,▃▁▂█▄▂▂▂▅▂▆▃▅▆▇▃▅▃▃▂
cost,▃▁▂█▄▃▂▃▅▂▆▂▅▆▇▃▇▂▃▂
latency,▂▁█▄▄▁▁▁▅▂▄▄▄▂▇▂▃▂▃▂
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▁▂█▄▃▂▃▅▂▆▂▅▆▇▃▇▂▃▂

0,1
completion_tokens,42.0
cost,3e-05
latency,1.27252
prompt_tokens,136.0
total_tokens,178.0


 88%|███████████████████████████████████████████████████████████████████████▊          | 21/24 [27:13<03:21, 67.10s/it]

0,1
completion_tokens,▅▃▃██▃▂▂▃▃▅▁▄█▅▂▃▁▂▃
cost,▅▂▃█▇▃▂▃▃▃▅▁▄▇▆▃▅▁▂▃
latency,▅▂▃▇█▃▂▃▃▃▄▁▂▆▄▂▂▁▃▃
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▅▂▃█▇▃▂▃▃▃▅▁▄▇▆▃▅▁▂▃

0,1
completion_tokens,95.0
cost,3e-05
latency,2.21753
prompt_tokens,111.0
total_tokens,206.0


 92%|███████████████████████████████████████████████████████████████████████████▏      | 22/24 [28:19<02:13, 66.72s/it]

0,1
completion_tokens,▂▂▂▇▆▃▂▄▃▃▆▁▅█▄▄▃▄▂▄
cost,▃▂▃▇▆▃▂▄▄▃▆▁▅█▄▄▄▄▂▄
latency,▂▂▂▆█▃▁▄▃▂▅▁▆▆▄▃▃▄▂▄
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▃▂▃▇▆▃▂▄▄▃▆▁▅█▄▄▄▄▂▄

0,1
completion_tokens,166.0
cost,4e-05
latency,3.15354
prompt_tokens,84.0
total_tokens,250.0


 96%|██████████████████████████████████████████████████████████████████████████████▌   | 23/24 [29:40<01:11, 71.03s/it]

0,1
completion_tokens,▄▃▃█▆▃▂▃▃▅▇▁▅▆▅▄▄▄▂▃
cost,▄▂▃█▆▃▂▄▄▅▇▁▅▆▅▄▅▄▂▃
latency,▅▄▅▇█▂▃▂▅▅▆▁▅▄▆▃▄▄▂▄
prompt_tokens,▃▂▃▆▃▄▃▄▄▃▄▁▄▄▆▃█▁▃▃
total_tokens,▄▂▃█▆▃▂▄▄▅▇▁▅▆▅▄▅▄▂▃

0,1
completion_tokens,112.0
cost,3e-05
latency,2.84139
prompt_tokens,103.0
total_tokens,215.0


100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [30:55<00:00, 77.32s/it]
