# Identify products contained in each sale

Read sales and costs DataFrames

In [1]:
import pandas as pd
from unidecode import unidecode
from functions.df_helpers import replace_decimal_character
from services.read_data import read_tabular
from services.save_data import save_df
from utils.constants import *

In [2]:
sales_df = read_tabular('gold', 'sales_final', '2025', '11')
products_costs_df = read_tabular('bronze', 'products_costs', '2025', '11')
other_costs_df = read_tabular('bronze', 'other_costs', '2025', '11')

# Create normalized column names
normalized_columns_names = [unidecode(col_name.lower()) for col_name in COSTS_COLS]

# Normalize and lower costs columns names
products_costs_df.columns = normalized_columns_names
other_costs_df.columns = normalized_columns_names

In [3]:
# Create dictionary with values per product
product_costs_dict = dict(zip(products_costs_df['descricao'], products_costs_df['custo']))

# Identify products in each sale

Select columns and filter rows of interest in sales df

In [4]:
# Define list of columns to select
columns_to_select = ['id_venda', 'unidades_vendidas', 'titulo_anuncio', 'count_produto']

# Select columns and filter
sales_df = sales_df[columns_to_select]
sales_df = sales_df[sales_df['count_produto']==True]

# Remove count_produto column, which will no longer be needed
columns_to_select.remove('count_produto')
sales_df = sales_df[columns_to_select]

Create products per sale DataFrame

In [5]:
# This dataframe will split a sale row into more rows representing the products contained in the sale
products_per_sale_df = pd.DataFrame(columns=['id_venda', 'unidades_vendidas', 'categoria_produto', 'custo_producao'])

Iterate through sales DataFrame to identify products in each sale

In [6]:
for row in sales_df.itertuples():

    # Define auxiliary list to store sale products and ad title
    product_list = []
    ad_title = row.titulo_anuncio.lower()

    # According to products found in ad title, add product to product list
    if ('akai' in ad_title) or ('feminin' in ad_title):
        product_list.append('AKAI/FEM')
    else:
        product_list.append('STR/BZ')
    if 'faixa' in ad_title:
        product_list.append('FX')

    # For each product, create a row in products per sale df
    for product in product_list:
        products_per_sale_df.loc[len(products_per_sale_df)] = {
            'id_venda': row.id_venda,
            'unidades_vendidas': row.unidades_vendidas,
            'categoria_produto': product,
            'custo_producao': row.unidades_vendidas * product_costs_dict[product]
        }

Save DataFrames

In [7]:
# Apply decimal character correction to each dataframe
other_costs_df = replace_decimal_character(other_costs_df)
products_per_sale_df = replace_decimal_character(products_per_sale_df)

# Save dataframes
save_df(other_costs_df, 'gold', 'other_costs', '2025', '11')
save_df(products_per_sale_df, 'gold', 'products_per_sale', '2025', '11')