In [None]:
# packages gerais
import pandas as pd
import joblib


# df_sales_ dataset
df_sales_filtered_all = joblib.load('df_sales_filtered_all.pkl')
df_sales_filtered_2018_2019 = joblib.load('df_sales_filtered_2018_2019.pkl')
df_sales_filtered_2019 = joblib.load('df_sales_filtered_2019.pkl')
df_sales_filtered_last_6_month = joblib.load('df_sales_filtered_last_6_month.pkl')

## 1 Create a date field based in Year and Week of the year

### 1.1 But the date to make sense should be the last day in that specific week

In [None]:
import datetime
# Function to get the last day of the week using ISO calendar
def get_last_day_of_iso_week(year, week):
    first_day_of_year = datetime.datetime(year, 1, 4)  # 4th January is always in the first ISO week
    first_monday_of_year = first_day_of_year - datetime.timedelta(days=first_day_of_year.weekday())
    week_start_date = first_monday_of_year + datetime.timedelta(weeks=week-1)
    return week_start_date + datetime.timedelta(days=6)

# Applying function to DataFrame
df_sales_filtered_last_6_month['last_day_of_week'] = df_sales_filtered_last_6_month.apply(
    lambda x: get_last_day_of_iso_week(x['year'], x['week']), axis=1
)

In [None]:
df_sales_filtered_last_6_month[(df_sales_filtered_last_6_month['store_id'] == 'S0097') & (df_sales_filtered_last_6_month['product_id'] == 'P0704')].tail(30)

### 1.2 Convert date to time series by set as index and sort that

In [None]:
# Set 'date' column as index and sort by date
df_sales_filtered_last_6_month.set_index('last_day_of_week', inplace=True)
df_sales_filtered_last_6_month.sort_index(inplace=True)

In [None]:
df_sales_filtered_last_6_month.info()

### 1.3 Prepare to apply ARIMA

In [None]:
# Group by 'store_id' and 'product_id'
grouped = df_sales_filtered_last_6_month.groupby(['store_id', 'product_id'])

# Create a DataFrame to store forecasts and a data frame to have those product/store with error
df_forecasts = pd.DataFrame(columns=['store_id', 'product_id', 'forecast_week_1', 'forecast_week_2', 'forecast_week_3'])
df_product_error = pd.DataFrame(columns=['store_id', 'product_id', 'error_message'])

In [None]:
from statsmodels.tsa.arima.model import ARIMA
import warnings

# Iterate over each group
for (store_id, product_id), group in grouped:
    # Reindex to ensure complete weekly intervals
    group = group.asfreq('W-SUN', method='pad')
    
    # Ensure there are enough data points to fit the model
    if len(group) < 2:
        df_product_error = pd.concat([df_product_error, pd.DataFrame([{
            'store_id': store_id,
            'product_id': product_id,
            'error_message': 'Not enough data points to fit ARIMA model'
        }])], ignore_index=True)
        continue
    
    warnings.filterwarnings("ignore")  # specify to ignore warning messages

    # Fit ARIMA model
    try:
        model = ARIMA(group['sales'], order=(1, 1, 1))
        model_fit = model.fit()
        
        # Forecast future sales (next 3 weeks)
        forecast = model_fit.forecast(steps=3)
        
        # Append the forecast to the DataFrame
        df_forecasts = pd.concat([df_forecasts, pd.DataFrame([{
            'store_id': store_id,
            'product_id': product_id,
            'forecast_week_1': forecast[0],
            'forecast_week_2': forecast[1],
            'forecast_week_3': forecast[2]
        }])], ignore_index=True)
    except Exception as e:
        print(f"Error fitting ARIMA for Store: {store_id}, Product: {product_id}")
        print(str(e))
        df_product_error = pd.concat([df_product_error, pd.DataFrame([{
            'store_id': store_id,
            'product_id': product_id,
            'error_message': str(e)
        }])], ignore_index=True)


In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima
from sklearn.metrics import mean_squared_error
import warnings
import joblib

# Carregar o DataFrame
df_sales_filtered_last_6_month = joblib.load('df_sales_filtered_last_6_month.pkl')

# DataFrames para armazenar resultados
df_forecasts = pd.DataFrame()
df_product_error = pd.DataFrame()

# Agrupar os dados por store_id e product_id
grouped = df_sales_filtered_last_6_month.groupby(['store_id', 'product_id'])

# Função para converter ano e semana em uma data
def year_week_to_date(year, week):
    return pd.to_datetime(f'{year}-W{week}-1', format='%Y-W%U-%w')

# Iteração sobre cada grupo
for (store_id, product_id), group in grouped:
    # Definir a coluna 'date' como índice e reindexar para garantir intervalos semanais completos
    group['date'] = group.apply(lambda row: year_week_to_date(row['year'], row['week']), axis=1)
    group = group.set_index('date').asfreq('W-SUN', method='pad')

    # Verifica se há dados suficientes
    if len(group) < 2:
        df_product_error = pd.concat([df_product_error, pd.DataFrame([{
            'store_id': store_id,
            'product_id': product_id,
            'error_message': 'Not enough data points to fit ARIMA model'
        }])], ignore_index=True)
        continue

    warnings.filterwarnings("ignore")  # Ignorar avisos

    try:
        # Ajustar modelo ARIMA
        model_arima = ARIMA(group['sales'], order=(1, 1, 1))
        model_fit_arima = model_arima.fit()
        forecast_arima = model_fit_arima.forecast(steps=3)

        # Ajustar modelo SARIMA
        model_sarima = SARIMAX(group['sales'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
        model_fit_sarima = model_sarima.fit()
        forecast_sarima = model_fit_sarima.forecast(steps=3)

        # Ajustar modelo ARIMAX
        exog_vars = group[['revenue', 'stock', 'price']]  # Supondo que essas são as variáveis exógenas
        model_arimax = SARIMAX(group['sales'], order=(1, 1, 1), exog=exog_vars)
        model_fit_arimax = model_arimax.fit()
        forecast_arimax = model_fit_arimax.forecast(steps=3, exog=exog_vars[-3:])

        # Usar pmdarima para encontrar o melhor modelo ARIMA automaticamente
        auto_model = auto_arima(group['sales'], seasonal=False, trace=True)
        auto_forecast = auto_model.predict(n_periods=3)

        # Comparação dos modelos
        mse_arima = mean_squared_error(group['sales'][-3:], forecast_arima)
        mse_sarima = mean_squared_error(group['sales'][-3:], forecast_sarima)
        mse_arimax = mean_squared_error(group['sales'][-3:], forecast_arimax)
        mse_auto_arima = mean_squared_error(group['sales'][-3:], auto_forecast)

        # Imprimir resultados de MSE
        print(f'Store: {store_id}, Product: {product_id}')
        print(f'MSE ARIMA: {mse_arima}')
        print(f'MSE SARIMA: {mse_sarima}')
        print(f'MSE ARIMAX: {mse_arimax}')
        print(f'MSE Auto ARIMA: {mse_auto_arima}')

        # Adicionar previsões ao DataFrame
        df_forecasts = pd.concat([df_forecasts, pd.DataFrame([{
            'store_id': store_id,
            'product_id': product_id,
            'forecast_arima_week_1': forecast_arima[0],
            'forecast_arima_week_2': forecast_arima[1],
            'forecast_arima_week_3': forecast_arima[2],
            'forecast_sarima_week_1': forecast_sarima[0],
            'forecast_sarima_week_2': forecast_sarima[1],
            'forecast_sarima_week_3': forecast_sarima[2],
            'forecast_arimax_week_1': forecast_arimax[0],
            'forecast_arimax_week_2': forecast_arimax[1],
            'forecast_arimax_week_3': forecast_arimax[2],
            'forecast_auto_arima_week_1': auto_forecast[0],
            'forecast_auto_arima_week_2': auto_forecast[1],
            'forecast_auto_arima_week_3': auto_forecast[2]
        }])], ignore_index=True)
    except Exception as e:
        print(f"Error fitting models for Store: {store_id}, Product: {product_id}")
        print(str(e))
        df_product_error = pd.concat([df_product_error, pd.DataFrame([{
            'store_id': store_id,
            'product_id': product_id,
            'error_message': str(e)
        }])], ignore_index=True)


In [None]:
df_forecasts