In [241]:
# packages gerais
import pandas as pd
import joblib


# df_sales_ dataset
df_sales_filtered_2019 = joblib.load('df_sales_filtered_2019.pkl')
df_sales_filtered = joblib.load('df_sales_filtered.pkl')

In [242]:
    df_sales_filtered_2019.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61507 entries, 0 to 61506
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   store_id    61507 non-null  object 
 1   product_id  61507 non-null  object 
 2   year        61507 non-null  UInt32 
 3   week        61507 non-null  UInt32 
 4   sales       61507 non-null  float64
 5   revenue     61507 non-null  float64
 6   stock       61507 non-null  float64
 7   price       61507 non-null  float64
dtypes: UInt32(2), float64(4), object(2)
memory usage: 3.4+ MB


In [243]:
import datetime
# Function to get the last day of the week using ISO calendar
def get_last_day_of_iso_week(year, week):
    first_day_of_year = datetime.datetime(year, 1, 4)  # 4th January is always in the first ISO week
    first_monday_of_year = first_day_of_year - datetime.timedelta(days=first_day_of_year.weekday())
    week_start_date = first_monday_of_year + datetime.timedelta(weeks=week-1)
    return week_start_date + datetime.timedelta(days=6)

# Applying function to DataFrame
df_sales_filtered_2019['last_day_of_week'] = df_sales_filtered_2019.apply(
    lambda x: get_last_day_of_iso_week(x['year'], x['week']), axis=1
)

In [244]:
df_sales_filtered_2019[(df_sales_filtered_2019['store_id'] == 'S0097') & (df_sales_filtered_2019['product_id'] == 'P0704')].tail(30)

df_1 = df_sales_filtered_2019[(df_sales_filtered_2019['store_id'] == 'S0097') & (df_sales_filtered_2019['product_id'].isin(['', 'P0001']))]

In [245]:
df_sales_filtered_2019 = df_1

df_1

Unnamed: 0,store_id,product_id,year,week,sales,revenue,stock,price,last_day_of_week
49063,S0097,P0001,2019,33,0.0,0.0,1.0,10.95,2019-08-18
49064,S0097,P0001,2019,34,1.0,9.28,0.0,10.95,2019-08-25


In [246]:
# Set 'date' column as index and sort by date
df_sales_filtered_2019.set_index('last_day_of_week', inplace=True)
df_sales_filtered_2019.sort_index(inplace=True)

In [247]:
# Group by 'store_id' and 'product_id'
grouped = df_sales_filtered_2019.groupby(['store_id', 'product_id'])

# Create a DataFrame to store forecasts and a data frame to have those product/store with error
df_forecasts = pd.DataFrame(columns=['store_id', 'product_id', 'forecast_week_1', 'forecast_week_2', 'forecast_week_3'])
df_product_error = pd.DataFrame(columns=['store_id', 'product_id', 'error_message'])

In [248]:
from statsmodels.tsa.statespace.sarimax import SARIMAX
import warnings
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
import pandas as pd
import logging


# Criar o DataFrame exog a partir das colunas 'price' e 'stock'
df_exog = df_sales_filtered_2019[['price']]

# Codificar 'price' se necessário (caso tenha valores categóricos)
df_exog = pd.get_dummies(df_exog, columns=['price'], drop_first=True)

# Agrupar por store_id e product_id
grouped = df_sales_filtered_2019.groupby(['store_id', 'product_id'])

# Função para realizar busca de parâmetros para ARIMAX
def optimize_arimax(series, exog, p_values, d_value, q_values):
    best_aic = float("inf")
    best_order = None
    best_model = None
    for p, q in product(p_values, q_values):
        try:
            model = SARIMAX(series, exog=exog, order=(p, d_value, q))
            model_fit = model.fit(disp=False)
            aic = model_fit.aic
            if aic < best_aic:
                best_aic = aic
                best_order = (p, d_value, q)
                best_model = model_fit
        except ValueError as ve:
            logging.error(f"ValueError ao ajustar o modelo ARIMAX: {str(ve)}")
            continue
        except Exception as e:
            logging.error(f"Erro ao ajustar o modelo ARIMAX: {str(e)}")
            continue
    return best_order, best_model

# Função para calcular métricas
def calculate_metrics(actual, predicted):
    mse = mean_squared_error(actual, predicted)
    rmse = mean_squared_error(actual, predicted, squared=False)
    mae = mean_absolute_error(actual, predicted)
    mape = mean_absolute_percentage_error(actual, predicted)
    return mse, rmse, mae, mape

# Parâmetros para a busca de grid
p_values = range(0, 3)
d_value = 1
q_values = range(0, 3)

# DataFrames para armazenar resultados
df_forecasts_arimax = pd.DataFrame(columns=['store_id', 'product_id', 'forecast_week_1', 'forecast_week_2', 'forecast_week_3', 'ARIMAX'])
df_product_error_arimax = pd.DataFrame(columns=['store_id', 'product_id', 'error_message'])
df_metrics_arimax = pd.DataFrame(columns=['store_id', 'product_id', 'mse', 'rmse', 'mae', 'mape'])
print('antes do for')
# Iterar sobre cada grupo
for (store_id, product_id), group in grouped:
    # Reindexar para garantir intervalos semanais completos
    group = group.asfreq('W-SUN', method='pad')
    print('depois do for')    
    # Garantir que há pontos de dados suficientes para ajustar o modelo (mínimo 5 neste exemplo)
    if len(group) < 3:
        print('depois do len')
        print(store_id)    
        print(product_id)
        
        df_product_error_arimax = pd.concat([df_product_error_arimax, pd.DataFrame([{
            'store_id': 'S097',
            'product_id': 'P001',
            'error_message': 'Not enough data points to fit ARIMAX model'
        }])], ignore_index=False)
        continue
    print('antes do warning')
    warnings.filterwarnings("ignore")  # especificar para ignorar mensagens de aviso
    print('antes do exog')
    # Preparar variáveis exógenas (certificar-se de que 'df_exog' tenha o mesmo índice que 'group')
    exog = df_exog.loc[group.index]
    print('depois do exog')
    # Ajustar o modelo ARIMAX
    try:
        best_order, best_model = optimize_arimax(group['sales'], exog, p_values, d_value, q_values)
       
        if best_model is not None:
            # Prever vendas futuras (próximas 3 semanas)
            forecast = best_model.get_forecast(steps=3, exog=exog[-3:])
            print('antes do predict')
            forecast_values = forecast.predicted_mean
            print('depois do predict')
            # Adicionar a previsão ao DataFrame
            df_forecasts_arimax = pd.concat([df_forecasts_arimax, pd.DataFrame([{
                'store_id': store_id,
                'product_id': product_id,
                'forecast_week_1': forecast_values.iloc[0],
                'forecast_week_2': forecast_values.iloc[1],
                'forecast_week_3': forecast_values.iloc[2],
                'ARIMAX': best_order
            }])], ignore_index=True)
            
            # Calcular métricas (assumindo que você tenha vendas futuras reais para comparação)
            # Substituir 'actual_future_sales' pelos seus dados reais de vendas para as próximas 3 semanas
            actual_future_sales = group['sales'][-3:]  # Ajustar com base na disponibilidade dos dados reais
            if len(actual_future_sales) == 3:
                mse, rmse, mae, mape = calculate_metrics(actual_future_sales, forecast_values)
                # Adicionar as métricas ao DataFrame
                df_metrics_arimax = pd.concat([df_metrics_arimax, pd.DataFrame([{
                    'store_id': store_id,
                    'product_id': product_id,
                    'mse': mse,
                    'rmse': rmse,
                    'mae': mae,
                    'mape': mape
                }])], ignore_index=True)
            else:
                df_product_error_arimax = pd.concat([df_product_error_arimax, pd.DataFrame([{
                    'store_id': store_id,
                    'product_id': product_id,
                    'error_message': 'Not enough actual future data to calculate metrics'
                }])], ignore_index=True)
        else:
            df_product_error_arimax = pd.concat([df_product_error_arimax, pd.DataFrame([{
                'store_id': store_id,
                'product_id': product_id,
                'error_message': 'Failed to find suitable ARIMAX model'
            }])], ignore_index=True)
    except ValueError as ve:
        logging.error(f"ValueError fitting ARIMAX for Store: {store_id}, Product: {product_id}")
        logging.error(str(ve))
        df_product_error_arimax = pd.concat([df_product_error_arimax, pd.DataFrame([{
            'store_id': store_id,
            'product_id': product_id,
            'error_message': str(ve)
        }])], ignore_index=True)
    except Exception as e:
        logging.error(f"Error fitting ARIMAX for Store: {store_id}, Product: {product_id}")
        logging.error(str(e))
        df_product_error_arimax = pd.concat([df_product_error_arimax, pd.DataFrame([{
            'store_id': store_id,
            'product_id': product_id,
            'error_message': str(e)
        }])], ignore_index=True)

# Salvar os DataFrames em arquivos CSV (ou qualquer outro formato desejado)
df_forecasts_arimax.to_csv('forecasts_arimax.csv', index=False)
df_product_error_arimax.to_csv('product_errors_arimax.csv', index=False)
df_metrics_arimax.to_csv('metrics_results_arimax.csv', index=False)

# Exibir as primeiras linhas dos DataFrames de resultados
print("Previsões ARIMAX:")
print(df_forecasts_arimax.head())

print("\nErros de Produtos ARIMAX:")
print(df_product_error_arimax.head())

print("\nMétricas ARIMAX:")
print(df_metrics_arimax.head())


antes do for
depois do for
depois do len
S0097
P0001
Previsões ARIMAX:
Empty DataFrame
Columns: [store_id, product_id, forecast_week_1, forecast_week_2, forecast_week_3, ARIMAX]
Index: []

Erros de Produtos ARIMAX:
  store_id product_id                               error_message
0     S097       P001  Not enough data points to fit ARIMAX model

Métricas ARIMAX:
Empty DataFrame
Columns: [store_id, product_id, mse, rmse, mae, mape]
Index: []


In [249]:
df_metrics_arimax

Unnamed: 0,store_id,product_id,mse,rmse,mae,mape
