### Lendo o arquivo

In [3]:
import pandas as pd
dataset = pd.read_csv('files/supermarket_sales.csv')
dataset.head(10)

Unnamed: 0,Invoice ID,Branch,City,Customer type,Gender,Product line,Unit price,Quantity,Tax 5%,Total,Date,Time,Payment,cogs,gross margin percentage,gross income,Rating
0,750-67-8428,A,Yangon,Member,Female,Health and beauty,74.69,7,26.1415,548.9715,1/5/2019,13:08,Ewallet,522.83,4.761905,26.1415,9.1
1,226-31-3081,C,Naypyitaw,Normal,Female,Electronic accessories,15.28,5,3.82,80.22,3/8/2019,10:29,Cash,76.4,4.761905,3.82,9.6
2,631-41-3108,A,Yangon,Normal,Male,Home and lifestyle,46.33,7,16.2155,340.5255,3/3/2019,13:23,Credit card,324.31,4.761905,16.2155,7.4
3,123-19-1176,A,Yangon,Member,Male,Health and beauty,58.22,8,23.288,489.048,1/27/2019,20:33,Ewallet,465.76,4.761905,23.288,8.4
4,373-73-7910,A,Yangon,Normal,Male,Sports and travel,86.31,7,30.2085,634.3785,2/8/2019,10:37,Ewallet,604.17,4.761905,30.2085,5.3
5,699-14-3026,C,Naypyitaw,Normal,Male,Electronic accessories,85.39,7,29.8865,627.6165,3/25/2019,18:30,Ewallet,597.73,4.761905,29.8865,4.1
6,355-53-5943,A,Yangon,Member,Female,Electronic accessories,68.84,6,20.652,433.692,2/25/2019,14:36,Ewallet,413.04,4.761905,20.652,5.8
7,315-22-5665,C,Naypyitaw,Normal,Female,Home and lifestyle,73.56,10,36.78,772.38,2/24/2019,11:38,Ewallet,735.6,4.761905,36.78,8.0
8,665-32-9167,A,Yangon,Member,Female,Health and beauty,36.26,2,3.626,76.146,1/10/2019,17:15,Credit card,72.52,4.761905,3.626,7.2
9,692-92-5582,B,Mandalay,Member,Female,Food and beverages,54.84,3,8.226,172.746,2/20/2019,13:27,Credit card,164.52,4.761905,8.226,5.9


In [1]:

import openai
import os
from dotenv import find_dotenv, load_dotenv

# load the api key
_ = load_dotenv(find_dotenv())
client = openai.Client(api_key=os.getenv("API_KEY"))

In [4]:
file = client.files.create(
    file=open("files/supermarket_sales.csv", "rb"),
    purpose="assistants"
)

In [6]:
file.id

'file-EcUEzp7wAso56trYptqHCu'

### criando o assistente

In [23]:
assistant = client.beta.assistants.create(
    name="Analista financeiro",
    instructions="Você é um analista financeiro de um supermercado. Vcoê é responsável por analisar os dados financeiros do arquivo CSV de vendas do supermercado e fornecer insights para ajudar a tomar decisões estratégicas.",
    tools=[{"type": "code_interpreter"}],
    tool_resources=
        {
            "code_interpreter": {"file_ids": [file.id]}
        },
    model="gpt-4o"
)

### Criando uma Thread

In [25]:
thread = client.beta.threads.create()

In [34]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="Qual cidade tem o maior número de vendas?"
)

### Rodando a threead

In [35]:
run = client.beta.threads.runs.create(
    thread_id=thread.id,
    assistant_id=assistant.id,
    instructions="Analise o arquivo CSV informado."
)

### Aguardart a thread rodar

In [36]:
import time
# roda várias vezes para identificar o status
while run.status in  ["queued", "in_progress", "cancelling" ]:
    time.sleep(1)
    run = client.beta.threads.runs.retrieve(
        thread_id=thread.id,
        run_id=run.id
    )
    print(run.status)

in_progress
in_progress
completed


### Quando terminar de roddar

In [37]:
if run.status == "completed":
    messages = client.beta.threads.messages.list(
        thread_id=thread.id,
    )
    print(messages)
else:
    print(run.error)

SyncCursorPage[Message](data=[Message(id='msg_MMSoqxVtAlZfl1q0oTj3alG3', assistant_id='asst_aYk5MmHNLzR3rsIAR401TECJ', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='A cidade com o maior número de vendas é Yangon, com 340 transações.'), type='text')], created_at=1741717916, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assistant', run_id='run_S2mesb22SvFuhpa35H3nvTtp', status=None, thread_id='thread_qk90So22Q7LBTloIkPyMEYeN'), Message(id='msg_E8b8ImF8hhy3xW43mDQThsEx', assistant_id='asst_aYk5MmHNLzR3rsIAR401TECJ', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='Para determinar qual cidade tem o maior número de vendas, podemos analisar o número de transações (linhas) associadas a cada cidade. Vamos calcular isso agora.'), type='text')], created_at=1741717912, incomplete_at=None, incomplete_details=None, metadata={}, object='thread.message', role='assist

In [38]:
print(messages.data[0].content[0].text.value)

A cidade com o maior número de vendas é Yangon, com 340 transações.


### Analisando os passos do modelo

In [39]:
run_steps = client.beta.threads.runs.steps.list(
    thread_id=thread.id,
    run_id=run.id
)

In [32]:
run_steps.data[0].step_details

MessageCreationStepDetails(message_creation=MessageCreation(message_id='msg_bKv3lZo4WFkMVXXPCHz8Tub3'), type='message_creation')

In [40]:
for step in run_steps.data[::-1]:
    print("#step:", step.step_details.type)
    if step.step_details.type == "tool_calls":
        for tool_call in step.step_details.tool_calls:
            print("> tool_call type:", tool_call.type)
            print("> input:")
            print(tool_call.code_interpreter.input)
            print("> outputs:", tool_call.code_interpreter.outputs)
    if step.step_details.type == "message_creation":
        message = client.beta.threads.messages.retrieve(
            thread_id=thread.id, message_id=step.step_details.message_creation.message_id
        )
        print("--- message_text:", message.content[0].text.value)


#step: message_creation
--- message_text: Para determinar qual cidade tem o maior número de vendas, podemos analisar o número de transações (linhas) associadas a cada cidade. Vamos calcular isso agora.
#step: tool_calls
> tool_call type: code_interpreter
> input:
# Contar o número de vendas (transações) por cidade
sales_by_city = data['City'].value_counts()
sales_by_city
> outputs: []
#step: message_creation
--- message_text: A cidade com o maior número de vendas é Yangon, com 340 transações.
