# **SARIMA MODEL**

## **Libraries**

In [1]:
# Install seaborn
%pip install seaborn
import pandas as pd
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error
import matplotlib.pyplot as plt
import seaborn as sns
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 8, Finished, Available, Finished)


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.



## **Functions**

In [2]:
# This function tests if a time series is stationary

def check_stationarity(ts, name="series"):
    df_test = adfuller(ts) #Runs ADF test for stationarity
    adf = df_test[0] # Test statistic
    pvalue = df_test[1] # if pvalue < 0.05 reject H0 (The series is non-stationary)
    critical_value = df_test[4]["5%"]
    if (pvalue < 0.05) and (adf < critical_value):
        print(f"The {name} series is stationary")
    else:
        print(f"The {name} series is not stationary")

# This function makes the series differenciated and plots

def differencing(diff, name="series"):
    diff_data = diff.diff().dropna()
    diff_data.plot(title=f"Differenciated Series {name}")
    plt.show()

# This function checks again if the series is differenciated

def check_diff(diff_df, name="series"):
    diff_data = diff_df.diff().dropna() # apply diff and dropna to remove na on first row
    df_test = adfuller(diff_data) #Runs ADF test for stationarity
    adf = df_test[0] # Test statistic
    pvalue = df_test[1] # if pvalue < 0.05 reject H0 (The series is non-stationary)
    critical_value = df_test[4]["5%"]
    if (pvalue < 0.05) and (adf < critical_value):
        print(f"The {name} series is stationary")
    else:
        print(f"The {name} series is not stationary")

# This function plot auto & partial autocorrelation for each hierarchy df:

def auto_partial_corr(df, name="series"):
    diff_df = df.diff().dropna()
    plot_acf(diff_df, title=f"Autocorrelation {name}")
    plot_pacf(diff_df, title=f"Partial Autocorrelation {name}")


# This function creates train_test split to each hierarchy

def train_test_split(df, name="series"):
    train_data = df.iloc[:-4] # Train data without the last 4 weeks
    test_data = df.iloc[-4:] # Test data with the last 4 weeks

    print(f"DataFrame {name} train/test split done")
    print("\tTrain_data:",len(train_data)) #check how many observations on train set
    print("\tTest_data:",len(test_data)) #check how many observations on test set
    print("\tLast Obs. Train set",train_data.index.max()) #check last observations on train set
    print("\tFirst Obs. Test set",test_data.index.min()) #check first observations on test set

    return train_data, test_data

# Creation of a SARIMA _grid
def evaluate_sarima_model(y_train, y_test, order, seasonal_order):
    try:
        #Fit Sarima on training data
        smodel = SARIMAX(
            y_train,
            order=order,
            seasonal_order=seasonal_order,
            enforce_stationarity=False,
            enforce_invertibility=False
        )
        results_smodel = smodel.fit(disp=False)

        #Forecast the same number of weeks as in test set
        forecast_smodel = results_smodel.forecast(steps=len(y_test))

        #Evaluate forecast_grid against the true values
        mae_smodel = mean_absolute_error(y_test, forecast_smodel)
        return mae_smodel, results_smodel, forecast_smodel
    except:
        return float("inf")


# Sarima Manual Hyperparameter tuning (Find the best parameters)
def tune_sarima_model(
    y_train,
    y_test,
    #Example of parameters to test given PACF and ACF
    p_values=[0, 1, 2], 
    d_values=[1], 
    q_values=[0, 1], 
    P_values=[0, 1], 
    D_values=[1], 
    Q_values=[0, 1], 
    s=52, #weekly, assumes yearly sazonality
    name="series"
):
    best_mae = float("inf")
    best_parameters = None
    # For loop to run the order and seasonal parameters
    for p in p_values:
        for d in d_values:
            for q in q_values:
                for P in P_values:
                    for D in D_values:
                        for Q in Q_values:
                            order = (p, d, q)
                            seasonal_order = (P, D, Q, s)
                            try:
                                # SARIMA model implementation
                                model_grid = SARIMAX(
                                    y_train, # h0*["weekly_sales"] Target variable
                                    order=order, #(p, d, q)
                                    seasonal_order=seasonal_order, #(P, D, Q, s)
                                    enforce_stationarity=False,
                                    enforce_invertibility=False
                                )
                                results_model_grid = model_grid.fit(disp=False) # fit the model
                                forecast_grid = results_model_grid.forecast(steps=len(y_test)) # selection of time to forecast (4 weeks)
                                mae = mean_absolute_error(y_test, forecast_grid) # get mean absolute error
                                print(f"SARIMA{order}x{seasonal_order} - MAE: {mae:.2f}")
                                if mae < best_mae: 
                                    best_mae = mae # selects the lower mae since best_mae will always be greater since its "inf"
                                    best_parameters = (order, seasonal_order) # selects the best mae
                            except: # in case of error the model does not stop
                                print(f" Failed for SARIMA{order}x{seasonal_order}")
                                continue

    print(f"\n Best SARIMA config for {name}: {best_parameters} with MAE = {best_mae:.2f}")
    return best_parameters, best_mae, forecast_grid, results_model_grid

# Creation of a funtion to plot Forecast vs actual values
def plt_forecast_actuals(y_train, y_test, forecast_smodel, forecast_grid, name="series"):
    plt.figure(figsize=(10, 4))
    plt.plot(y_train.index, y_train, label="Train")
    plt.plot(y_test.index, y_test, label="Test")
    plt.plot(y_test.index, forecast_smodel, label="Forescast_Smodel")
    plt.plot(y_test.index, forecast_grid, label="Forecast_Grid")
    plt.title(f"SARIMA Forecast vs Actuals {name}")
    plt.legend()
    plt.show()

#Let comments on functions

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 10, Finished, Available, Finished)

## **Load Tables**

In [3]:
# Load tables
df = spark.read.table("ML_Area_.features.features_table")
df.show()

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 11, Finished, Available, Finished)

+--------+----------+----+----+-------------------+------------------+------------------+----------------+----------------+-------+-------------------+------------------+-------------------+-------------------+-------------------+--------------+-------------+-------------+----------+-------------+-------------+-------------+-------------+-------------+------------+----------+---------+----------+
|store_id|product_id|year|week|       weekly_sales|         avg_stock|         avg_price|promo_bin_1_rate|promo_bin_2_rate|num_obs|       holiday_rate|      weekend_rate|       lag_sales_1w|       lag_sales_2w|       lag_sales_3w|product_length|product_depth|product_width|cluster_id|hierarchy1_id|hierarchy2_id|hierarchy3_id|hierarchy4_id|hierarchy5_id|storetype_id|store_size|city_name|week_start|
+--------+----------+----+----+-------------------+------------------+------------------+----------------+----------------+-------+-------------------+------------------+-------------------+----------

## **Convert to Pandas**

In [4]:
#To able to manipulate the data, conversion to pandas was made
features_table = df.toPandas()
features_table.head()

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 12, Finished, Available, Finished)

Unnamed: 0,store_id,product_id,year,week,weekly_sales,avg_stock,avg_price,promo_bin_1_rate,promo_bin_2_rate,num_obs,...,cluster_id,hierarchy1_id,hierarchy2_id,hierarchy3_id,hierarchy4_id,hierarchy5_id,storetype_id,store_size,city_name,week_start
0,S0002,P0001,2017,4,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-01-22
1,S0002,P0001,2017,5,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-01-29
2,S0002,P0001,2017,6,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-02-05
3,S0002,P0001,2017,7,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-02-12
4,S0002,P0001,2017,8,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-02-19


## **Filter Hierarchy & Store**

### **features_table_all**

In [5]:
features_table.head()

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 13, Finished, Available, Finished)

Unnamed: 0,store_id,product_id,year,week,weekly_sales,avg_stock,avg_price,promo_bin_1_rate,promo_bin_2_rate,num_obs,...,cluster_id,hierarchy1_id,hierarchy2_id,hierarchy3_id,hierarchy4_id,hierarchy5_id,storetype_id,store_size,city_name,week_start
0,S0002,P0001,2017,4,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-01-22
1,S0002,P0001,2017,5,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-01-29
2,S0002,P0001,2017,6,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-02-05
3,S0002,P0001,2017,7,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-02-12
4,S0002,P0001,2017,8,0.0,1.0,6.5,0.0,0.0,7,...,cluster_5,H01,H0105,H010501,H01050100,H0105010006,ST04,39,Adana,2017-02-19


In [6]:
features_table["store_id"].unique()

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 14, Finished, Available, Finished)

array(['S0002', 'S0003', 'S0005', 'S0007', 'S0010', 'S0012', 'S0014',
       'S0015', 'S0016', 'S0020', 'S0022', 'S0023', 'S0026', 'S0030',
       'S0032', 'S0036', 'S0038', 'S0039', 'S0040', 'S0041', 'S0045',
       'S0046', 'S0050', 'S0052', 'S0055', 'S0056', 'S0058', 'S0059',
       'S0061', 'S0062', 'S0067', 'S0068', 'S0071', 'S0072', 'S0073',
       'S0076', 'S0077', 'S0080', 'S0083', 'S0085', 'S0086', 'S0088',
       'S0089', 'S0091', 'S0092', 'S0094', 'S0095', 'S0097', 'S0099',
       'S0102', 'S0104', 'S0107', 'S0108', 'S0109', 'S0120', 'S0122',
       'S0126', 'S0131', 'S0132', 'S0136', 'S0141', 'S0142', 'S0143'],
      dtype=object)

### **features_table_h00**

In [7]:
# Creation of Hierarchy per store features_table's
features_table_h00_s0085 = features_table[(features_table["store_id"].isin(["S0085"])) & (features_table["hierarchy1_id"] == "H00")]
features_table_h00_s0095 = features_table[(features_table["store_id"].isin(["S0095"])) & (features_table["hierarchy1_id"] == "H00")]
features_table_h00_s0026 = features_table[(features_table["store_id"].isin(["S0026"])) & (features_table["hierarchy1_id"] == "H00")]
features_table_h00_s0080 = features_table[(features_table["store_id"].isin(["S0080"])) & (features_table["hierarchy1_id"] == "H00")]

print(f"S0085",features_table_h00_s0085.shape)
print(f"S0095",features_table_h00_s0095.shape)
print(f"S0026",features_table_h00_s0026.shape)
print(f"S0080",features_table_h00_s0080.shape)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 15, Finished, Available, Finished)

S0085 (15414, 28)
S0095 (14349, 28)
S0026 (15214, 28)
S0080 (5499, 28)


### **features_table_h01**

In [8]:
# Creation of Hierarchy per store features_table's
features_table_h01_s0085 = features_table[(features_table["store_id"].isin(["S0085"])) & (features_table["hierarchy1_id"] == "H01")]
features_table_h01_s0095 = features_table[(features_table["store_id"].isin(["S0095"])) & (features_table["hierarchy1_id"] == "H01")]
features_table_h01_s0026 = features_table[(features_table["store_id"].isin(["S0026"])) & (features_table["hierarchy1_id"] == "H01")]
features_table_h01_s0080 = features_table[(features_table["store_id"].isin(["S0080"])) & (features_table["hierarchy1_id"] == "H01")]

print(f"S0085",features_table_h01_s0085.shape)
print(f"S0095",features_table_h01_s0095.shape)
print(f"S0026",features_table_h01_s0026.shape)
print(f"S0080",features_table_h01_s0080.shape)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 16, Finished, Available, Finished)

S0085 (13842, 28)
S0095 (6426, 28)
S0026 (11710, 28)
S0080 (1459, 28)


### **features_table_h02**

In [9]:
# Creation of Hierarchy per store features_table's
features_table_h02_s0085 = features_table[(features_table["store_id"].isin(["S0085"])) & (features_table["hierarchy1_id"] == "H02")]
features_table_h02_s0095 = features_table[(features_table["store_id"].isin(["S0095"])) & (features_table["hierarchy1_id"] == "H02")]
features_table_h02_s0026 = features_table[(features_table["store_id"].isin(["S0026"])) & (features_table["hierarchy1_id"] == "H02")]
features_table_h02_s0080 = features_table[(features_table["store_id"].isin(["S0080"])) & (features_table["hierarchy1_id"] == "H02")]

print(f"S0085",features_table_h02_s0085.shape)
print(f"S0095",features_table_h02_s0095.shape)
print(f"S0026",features_table_h02_s0026.shape)
print(f"S0080",features_table_h02_s0080.shape)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 17, Finished, Available, Finished)

S0085 (132, 28)
S0095 (21, 28)
S0026 (136, 28)
S0080 (30, 28)


### **features_table_h03**

In [10]:
# Creation of Hierarchy per store features_table's
features_table_h03_s0085 = features_table[(features_table["store_id"].isin(["S0085"])) & (features_table["hierarchy1_id"] == "H03")]
features_table_h03_s0095 = features_table[(features_table["store_id"].isin(["S0095"])) & (features_table["hierarchy1_id"] == "H03")]
features_table_h03_s0026 = features_table[(features_table["store_id"].isin(["S0026"])) & (features_table["hierarchy1_id"] == "H03")]
features_table_h03_s0080 = features_table[(features_table["store_id"].isin(["S0080"])) & (features_table["hierarchy1_id"] == "H03")]

print(f"S0085",features_table_h03_s0085.shape)
print(f"S0095",features_table_h03_s0095.shape)
print(f"S0026",features_table_h03_s0026.shape)
print(f"S0080",features_table_h03_s0080.shape)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 18, Finished, Available, Finished)

S0085 (16695, 28)
S0095 (4090, 28)
S0026 (10754, 28)
S0080 (2127, 28)


## **Modelling Tables**

### **features_table_model_full_dataset**

In [11]:
features_table_model = (round(features_table
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", # total weekly weekly_sales 
           "avg_stock": "mean", #avg avg_stock for the week #maybe test median
          "promo_bin_1_rate": "sum", # % of products with promo
           "promo_bin_2_rate": "sum", # % of products with promo
           "avg_price": "sum", # average avg_price in the week
           "product_id": pd.Series.nunique, # how many unique products were sold this week (breadth)
           "store_size": "mean", # average store size
           "holiday_rate": "sum", # rate of holidays per week 
            "weekend_rate": "mean", #rate of weekend per week
            "lag_sales_1w": "mean", # sales from one week prior
            "lag_sales_2w": "mean", # sales from two weeks prior
            "lag_sales_3w": "mean", # sales from three weeks prior
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 19, Finished, Available, Finished)

### **features_table_h00_model**

In [12]:
# Creation of modelling tables grouped by weekly_sales of each hierarchy_id

features_table_h00_model_s0085 = (round(features_table_h00_s0085
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", # total weekly weekly_sales 
           "avg_stock": "mean", #avg avg_stock for the week #maybe test median
          "promo_bin_1_rate": "sum", # % of products with promo
           "promo_bin_2_rate": "sum", # % of products with promo
           "avg_price": "sum", # average avg_price in the week
           "product_id": pd.Series.nunique, # how many unique products were sold this week (breadth)
           "store_size": "mean", # average store size
           "holiday_rate": "sum", # rate of holidays per week 
            "weekend_rate": "mean", #rate of weekend per week
            "lag_sales_1w": "mean", # sales from one week prior
            "lag_sales_2w": "mean", # sales from two weeks prior
            "lag_sales_3w": "mean", # sales from three weeks prior
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))


features_table_h00_model_s0095 = (round(features_table_h00_s0095
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum",  
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum", 
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum",
            "weekend_rate": "mean",
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean",
            "lag_sales_3w": "mean",
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h00_model_s0026 = (round(features_table_h00_s0026
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", 
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum", 
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum", 
            "weekend_rate": "mean", 
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean", 
            "lag_sales_3w": "mean", 
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h00_model_s0080 = (round(features_table_h00_s0080
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", 
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum",
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum",
            "weekend_rate": "mean",
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean",
            "lag_sales_3w": "mean",
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h00_model_s0085.head()

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 20, Finished, Available, Finished)

Unnamed: 0_level_0,weekly_sales,avg_stock,promo_bin_1_rate,promo_bin_2_rate,avg_price,n_unique_products,store_size,holiday_rate,weekend_rate,lag_sales_1w,lag_sales_2w,lag_sales_3w
week_start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-01-22,421.6,56.7,0.0,0.0,587.05,93,60.0,0.0,0.28,4.37,3.97,3.15
2017-01-29,400.24,48.66,0.0,0.0,587.16,93,60.0,0.0,0.28,4.58,4.44,3.98
2017-02-05,489.18,75.88,0.0,0.0,587.33,94,60.0,0.0,0.28,5.36,5.63,5.29
2017-02-12,393.42,64.03,0.0,0.0,572.95,92,60.0,0.0,0.29,5.3,5.47,5.72
2017-02-19,369.52,69.12,0.0,0.0,573.0,92,60.0,0.0,0.29,4.28,5.3,5.47


### **features_table_h01_model**

In [13]:
# Creation of modelling tables grouped by weekly_sales of each hierarchy_id

features_table_h01_model_s0085 = (round(features_table_h01_s0085
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", # total weekly weekly_sales 
           "avg_stock": "mean", #avg avg_stock for the week #maybe test median
          "promo_bin_1_rate": "sum", # % of products with promo
           "promo_bin_2_rate": "sum", # % of products with promo
           "avg_price": "sum", # average avg_price in the week
           "product_id": pd.Series.nunique, # how many unique products were sold this week (breadth)
           "store_size": "mean", # average store size
           "holiday_rate": "sum", # rate of holidays per week 
            "weekend_rate": "mean", #rate of weekend per week
            "lag_sales_1w": "mean", # sales from one week prior
            "lag_sales_2w": "mean", # sales from two weeks prior
            "lag_sales_3w": "mean", # sales from three weeks prior
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))


features_table_h01_model_s0095 = (round(features_table_h01_s0095
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum",  
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum", 
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum",
            "weekend_rate": "mean",
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean",
            "lag_sales_3w": "mean",
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h01_model_s0026 = (round(features_table_h01_s0026
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", 
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum", 
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum", 
            "weekend_rate": "mean", 
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean", 
            "lag_sales_3w": "mean", 
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h01_model_s0080 = (round(features_table_h01_s0080
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", 
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum",
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum",
            "weekend_rate": "mean",
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean",
            "lag_sales_3w": "mean",
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h01_model_s0085.head()

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 21, Finished, Available, Finished)

Unnamed: 0_level_0,weekly_sales,avg_stock,promo_bin_1_rate,promo_bin_2_rate,avg_price,n_unique_products,store_size,holiday_rate,weekend_rate,lag_sales_1w,lag_sales_2w,lag_sales_3w
week_start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-01-22,34.48,17.79,0.0,0.0,1268.52,73,60.0,0.0,0.29,1.38,0.43,0.38
2017-01-29,44.34,19.07,0.0,0.0,1278.49,73,60.0,0.0,0.28,0.47,1.38,0.43
2017-02-05,50.61,19.45,0.0,0.0,1323.81,75,60.0,0.0,0.28,0.6,0.46,1.35
2017-02-12,36.55,22.21,0.0,0.0,1303.42,74,60.0,0.0,0.28,0.67,0.6,0.45
2017-02-19,34.06,22.11,0.0,0.0,1312.13,74,60.0,0.0,0.29,0.47,0.66,0.6


### **features_table_h02_model**

In [14]:
# Creation of modelling tables grouped by weekly_sales of each hierarchy_id

features_table_h02_model_s0085 = (round(features_table_h02_s0085
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", # total weekly weekly_sales 
           "avg_stock": "mean", #avg avg_stock for the week #maybe test median
          "promo_bin_1_rate": "sum", # % of products with promo
           "promo_bin_2_rate": "sum", # % of products with promo
           "avg_price": "sum", # average avg_price in the week
           "product_id": pd.Series.nunique, # how many unique products were sold this week (breadth)
           "store_size": "mean", # average store size
           "holiday_rate": "sum", # rate of holidays per week 
            "weekend_rate": "mean", #rate of weekend per week
            "lag_sales_1w": "mean", # sales from one week prior
            "lag_sales_2w": "mean", # sales from two weeks prior
            "lag_sales_3w": "mean", # sales from three weeks prior
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))


features_table_h02_model_s0095 = (round(features_table_h02_s0095
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum",  
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum", 
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum",
            "weekend_rate": "mean",
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean",
            "lag_sales_3w": "mean",
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h02_model_s0026 = (round(features_table_h02_s0026
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", 
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum", 
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum", 
            "weekend_rate": "mean", 
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean", 
            "lag_sales_3w": "mean", 
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h02_model_s0080 = (round(features_table_h02_s0080
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", 
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum",
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum",
            "weekend_rate": "mean",
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean",
            "lag_sales_3w": "mean",
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h02_model_s0085.head()

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 22, Finished, Available, Finished)

Unnamed: 0_level_0,weekly_sales,avg_stock,promo_bin_1_rate,promo_bin_2_rate,avg_price,n_unique_products,store_size,holiday_rate,weekend_rate,lag_sales_1w,lag_sales_2w,lag_sales_3w
week_start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-01-22,10.51,7.64,0.0,0.0,12.99,2,60.0,0.0,0.14,4.81,5.03,5.03
2017-01-29,5.22,25.01,0.0,0.0,2.99,1,60.0,0.0,0.29,8.99,8.27,9.56
2017-02-05,6.94,5.32,0.0,0.0,12.99,2,60.0,0.0,0.39,3.37,5.17,4.39
2017-02-12,7.05,32.87,0.0,0.0,3.28,1,60.0,0.0,0.29,6.18,5.22,8.99
2017-02-19,8.58,5.63,0.0,0.0,12.99,2,60.0,0.0,0.14,3.91,3.85,3.28


### **features_table_h03_model**

In [15]:
# Creation of modelling tables grouped by weekly_sales of each hierarchy_id

features_table_h03_model_s0085 = (round(features_table_h03_s0085
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", # total weekly weekly_sales 
           "avg_stock": "mean", #avg avg_stock for the week #maybe test median
          "promo_bin_1_rate": "sum", # % of products with promo
           "promo_bin_2_rate": "sum", # % of products with promo
           "avg_price": "sum", # average avg_price in the week
           "product_id": pd.Series.nunique, # how many unique products were sold this week (breadth)
           "store_size": "mean", # average store size
           "holiday_rate": "sum", # rate of holidays per week 
            "weekend_rate": "mean", #rate of weekend per week
            "lag_sales_1w": "mean", # sales from one week prior
            "lag_sales_2w": "mean", # sales from two weeks prior
            "lag_sales_3w": "mean", # sales from three weeks prior
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))


features_table_h03_model_s0095 = (round(features_table_h03_s0095
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum",  
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum", 
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum",
            "weekend_rate": "mean",
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean",
            "lag_sales_3w": "mean",
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h03_model_s0026 = (round(features_table_h03_s0026
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", 
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum", 
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum", 
            "weekend_rate": "mean", 
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean", 
            "lag_sales_3w": "mean", 
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h03_model_s0080 = (round(features_table_h03_s0080
       .groupby("week_start")[["weekly_sales", "avg_stock", "promo_bin_1_rate", "promo_bin_2_rate", "avg_price", "store_id", "product_id", "store_size", "holiday_rate", "weekend_rate", "lag_sales_1w", "lag_sales_2w", "lag_sales_3w"]]
       .agg({
           "weekly_sales": "sum", 
           "avg_stock": "mean", 
          "promo_bin_1_rate": "sum",
           "promo_bin_2_rate": "sum", 
           "avg_price": "sum", 
           "product_id": pd.Series.nunique, 
           "store_size": "mean", 
           "holiday_rate": "sum",
            "weekend_rate": "mean",
            "lag_sales_1w": "mean", 
            "lag_sales_2w": "mean",
            "lag_sales_3w": "mean",
       })
       .rename(columns={"product_id": "n_unique_products"})
       ,2))

features_table_h03_model_s0085.head()

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 23, Finished, Available, Finished)

Unnamed: 0_level_0,weekly_sales,avg_stock,promo_bin_1_rate,promo_bin_2_rate,avg_price,n_unique_products,store_size,holiday_rate,weekend_rate,lag_sales_1w,lag_sales_2w,lag_sales_3w
week_start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2017-01-22,25.77,26.8,0.0,0.0,1350.06,71,60.0,0.0,0.28,0.39,0.57,0.35
2017-01-29,31.59,25.57,0.0,0.0,1385.59,75,60.0,0.0,0.29,0.37,0.4,0.58
2017-02-05,20.65,25.33,0.0,0.0,1385.19,75,60.0,0.0,0.27,0.42,0.37,0.4
2017-02-12,24.63,25.52,0.0,0.0,1374.13,74,60.0,0.0,0.28,0.27,0.37,0.32
2017-02-19,20.26,25.06,0.0,0.0,1403.74,76,60.0,0.0,0.28,0.36,0.31,0.39


## **Stationarity, Diff, ACF PACF** 

### **Check Stationarity** 

In [16]:
#check_stationarity(features_table_h00_model["weekly_sales"].values, name="H00")
#check_stationarity(features_table_h01_model["weekly_sales"].values, name="H01")
#check_stationarity(features_table_h02_model["weekly_sales"].values, name="H02")
#check_stationarity(features_table_h03_model["weekly_sales"].values, name="H03")

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 24, Finished, Available, Finished)

### **Differencing** 

In [17]:
#differencing(features_table_h00_model["weekly_sales"], name="H00")
#differencing(features_table_h01_model["weekly_sales"], name="H01")
#differencing(features_table_h02_model["weekly_sales"], name="H02")
#differencing(features_table_h03_model["weekly_sales"], name="H03")

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 25, Finished, Available, Finished)

In [18]:
# Checking again after diff:
#check_diff(features_table_h00_model['weekly_sales'], name="H00")
#check_diff(features_table_h01_model['weekly_sales'], name="H01")
#check_diff(features_table_h02_model['weekly_sales'], name="H02")
#check_diff(features_table_h03_model['weekly_sales'], name="H03")

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 26, Finished, Available, Finished)

### **Auto & Partial Correlation**

In [19]:
#auto_partial_corr(features_table_h00_model["weekly_sales"], name="H00")
#auto_partial_corr(features_table_h01_model["weekly_sales"], name="H01")
#auto_partial_corr(features_table_h02_model["weekly_sales"], name="H02")
#auto_partial_corr(features_table_h03_model["weekly_sales"], name="H03")

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 27, Finished, Available, Finished)

## **Train/Test-Split**

#### **features_table_h00_model**

In [20]:
#Set train and test variables to each hierarchy

h00_s0095_y_train, h00_s0095_y_test = train_test_split(features_table_h00_model_s0095["weekly_sales"], name="H00")
h00_s0085_y_train, h00_s0085_y_test = train_test_split(features_table_h00_model_s0085["weekly_sales"], name="H00")
h00_s0026_y_train, h00_s0026_y_test = train_test_split(features_table_h00_model_s0026["weekly_sales"], name="H00")
h00_s0080_y_train, h00_s0080_y_test = train_test_split(features_table_h00_model_s0080["weekly_sales"], name="H00")


StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 28, Finished, Available, Finished)

DataFrame H00 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H00 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H00 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H00 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06


#### **features_table_h01_model**

In [21]:
#Set train and test variables to each hierarchy

h01_s0095_y_train, h01_s0095_y_test = train_test_split(features_table_h01_model_s0095["weekly_sales"], name="H01")
h01_s0085_y_train, h01_s0085_y_test = train_test_split(features_table_h01_model_s0085["weekly_sales"], name="H01")
h01_s0026_y_train, h01_s0026_y_test = train_test_split(features_table_h01_model_s0026["weekly_sales"], name="H01")
h01_s0080_y_train, h01_s0080_y_test = train_test_split(features_table_h01_model_s0080["weekly_sales"], name="H01")

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 29, Finished, Available, Finished)

DataFrame H01 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H01 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H01 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H01 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06


#### **features_table_h02_model**

In [22]:
#Set train and test variables to each hierarchy

h02_s0095_y_train, h02_s0095_y_test = train_test_split(features_table_h02_model_s0095["weekly_sales"], name="H02")
h02_s0085_y_train, h02_s0085_y_test = train_test_split(features_table_h02_model_s0085["weekly_sales"], name="H02")
h02_s0026_y_train, h02_s0026_y_test = train_test_split(features_table_h02_model_s0026["weekly_sales"], name="H02")
h02_s0080_y_train, h02_s0080_y_test = train_test_split(features_table_h02_model_s0080["weekly_sales"], name="H02")

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 30, Finished, Available, Finished)

DataFrame H02 train/test split done
	Train_data: 17
	Test_data: 4
	Last Obs. Train set 2017-05-14
	First Obs. Test set 2017-06-25
DataFrame H02 train/test split done
	Train_data: 98
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H02 train/test split done
	Train_data: 109
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H02 train/test split done
	Train_data: 26
	Test_data: 4
	Last Obs. Train set 2019-03-03
	First Obs. Test set 2019-03-10


#### **features_table_h03_model**

In [23]:
#Set train and test variables to each hierarchy

h03_s0095_y_train, h03_s0095_y_test = train_test_split(features_table_h03_model_s0095["weekly_sales"], name="H03")
h03_s0085_y_train, h03_s0085_y_test = train_test_split(features_table_h03_model_s0085["weekly_sales"], name="H03")
h03_s0026_y_train, h03_s0026_y_test = train_test_split(features_table_h03_model_s0026["weekly_sales"], name="H03")
h03_s0080_y_train, h03_s0080_y_test = train_test_split(features_table_h03_model_s0080["weekly_sales"], name="H03")

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 31, Finished, Available, Finished)

DataFrame H03 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H03 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H03 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06
DataFrame H03 train/test split done
	Train_data: 141
	Test_data: 4
	Last Obs. Train set 2019-09-29
	First Obs. Test set 2019-10-06


## **SARIMA** 

### **Sarima - Model** 

#### **H00** 

In [24]:
# Set a start values for order and seasonal_orders
order = (1, 1, 1)
seasonal_order = (1, 1, 1, 52)

h00_s0095_mae_smodel, h00_s0095_results_smodel, h00_s0095_forecast_smodel = evaluate_sarima_model(h00_s0095_y_train, h00_s0095_y_test, order, seasonal_order)
h00_s0085_mae_smodel, h00_s0085_results_smodel, h00_s0085_forecast_smodel = evaluate_sarima_model(h00_s0085_y_train, h00_s0085_y_test, order, seasonal_order)
h00_s0026_mae_smodel, h00_s0026_results_smodel, h00_s0026_forecast_smodel = evaluate_sarima_model(h00_s0026_y_train, h00_s0026_y_test, order, seasonal_order)
h00_s0080_mae_smodel, h00_s0080_results_smodel, h00_s0080_forecast_smodel = evaluate_sarima_model(h00_s0080_y_train, h00_s0080_y_test, order, seasonal_order)


StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 32, Finished, Available, Finished)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


#### **H01** 

In [25]:
h01_s0095_mae_smodel, h01_s0095_results_smodel, h01_s0095_forecast_smodel = evaluate_sarima_model(h01_s0095_y_train, h01_s0095_y_test, order, seasonal_order)
h01_s0085_mae_smodel, h01_s0085_results_smodel, h01_s0085_forecast_smodel = evaluate_sarima_model(h01_s0085_y_train, h01_s0085_y_test, order, seasonal_order)
h01_s0026_mae_smodel, h01_s0026_results_smodel, h01_s0026_forecast_smodel = evaluate_sarima_model(h01_s0026_y_train, h01_s0026_y_test, order, seasonal_order)
h01_s0080_mae_smodel, h01_s0080_results_smodel, h01_s0080_forecast_smodel = evaluate_sarima_model(h01_s0080_y_train, h01_s0080_y_test, order, seasonal_order)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 33, Finished, Available, Finished)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


#### **H02** 

In [26]:
h02_s0095_mae_smodel, h02_s0095_results_smodel, h02_s0095_forecast_smodel = evaluate_sarima_model(h02_s0095_y_train, h02_s0095_y_test, order, seasonal_order)
h02_s0085_mae_smodel, h02_s0085_results_smodel, h02_s0085_forecast_smodel = evaluate_sarima_model(h02_s0085_y_train, h02_s0085_y_test, order, seasonal_order)
h02_s0026_mae_smodel, h02_s0026_results_smodel, h02_s0026_forecast_smodel = evaluate_sarima_model(h02_s0026_y_train, h02_s0026_y_test, order, seasonal_order)
h02_s0080_mae_smodel, h02_s0080_results_smodel, h02_s0080_forecast_smodel = evaluate_sarima_model(h02_s0080_y_train, h02_s0080_y_test, order, seasonal_order)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 34, Finished, Available, Finished)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  return get_prediction_index(
  return get_prediction_index(
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'
  warn('Too few observations to estimate starting parameters%s.'


  return get_prediction_index(
  return get_prediction_index(


#### **H03** 

In [27]:
h03_s0095_mae_smodel, h03_s0095_results_smodel, h03_s0095_forecast_smodel = evaluate_sarima_model(h03_s0095_y_train, h03_s0095_y_test, order, seasonal_order)
h03_s0085_mae_smodel, h03_s0085_results_smodel, h03_s0085_forecast_smodel = evaluate_sarima_model(h03_s0085_y_train, h03_s0085_y_test, order, seasonal_order)
h03_s0026_mae_smodel, h03_s0026_results_smodel, h03_s0026_forecast_smodel = evaluate_sarima_model(h03_s0026_y_train, h03_s0026_y_test, order, seasonal_order)
h03_s0080_mae_smodel, h03_s0080_results_smodel, h03_s0080_forecast_smodel = evaluate_sarima_model(h03_s0080_y_train, h03_s0080_y_test, order, seasonal_order)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 35, Finished, Available, Finished)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Too few observations to estimate starting parameters%s.'


### **Hyperparameter Tuning (Manual Grid Search)** 

In [28]:
# Sarima Hyperparameter tuning
#best_parameters_H00, best_mae_H00, forecast_grid_h00, results_model_grid_h00 = tune_sarima_model(h00_train_data["weekly_sales"], h00_test_data["weekly_sales"], name="H00")
#best_parameters_H01, best_mae_H01, forecast_grid_h01, results_model_grid_h01 = tune_sarima_model(h01_train_data["weekly_sales"], h01_test_data["weekly_sales"], name="H01")
#best_parameters_H02, best_mae_H02, forecast_grid_h02, results_model_grid_h02 = tune_sarima_model(h02_train_data["weekly_sales"], h02_test_data["weekly_sales"], name="H02")
#best_parameters_H03, best_mae_H03, forecast_grid_h03, results_model_grid_h03 = tune_sarima_model(h03_train_data["weekly_sales"], h03_test_data["weekly_sales"], name="H03")

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 36, Finished, Available, Finished)

### **Plot Forecast vs Actual Values** 

In [29]:
# Create variables to plot forecasting vs actuals
#plt_h00 = plt_forecast_actuals(h00_train_data["weekly_sales"], h00_test_data["weekly_sales"], h00_forecast_smodel, forecast_grid_h00, name="H00")
#plt_h01 = plt_forecast_actuals(h01_train_data["weekly_sales"], h01_test_data["weekly_sales"], h01_forecast_smodel, forecast_grid_h01, name="H01")
#plt_h02 = plt_forecast_actuals(h02_train_data["weekly_sales"], h02_test_data["weekly_sales"], h02_forecast_smodel, forecast_grid_h02, name="H02")
#plt_h03 = plt_forecast_actuals(h03_train_data["weekly_sales"], h03_test_data["weekly_sales"], h03_forecast_smodel, forecast_grid_h03, name="H03")


StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 37, Finished, Available, Finished)

### **Results** 

#### **H00** 

In [30]:
# Compile results

smodel_results = [
    {
        "Store": "S0095",
        "MAE": h00_s0095_mae_smodel,
        "AIC": h00_s0095_results_smodel.aic,
        "BIC": h00_s0095_results_smodel.bic
    },

    {
        "Store": "S0085",
        "MAE": h00_s0085_mae_smodel,
        "AIC": h00_s0085_results_smodel.aic,
        "BIC": h00_s0085_results_smodel.bic
    },

    {
        "Store": "S0026",
        "MAE": h00_s0026_mae_smodel,
        "AIC": h00_s0026_results_smodel.aic,
        "BIC": h00_s0026_results_smodel.bic
    },
    {
        "Store": "S0080",
        "MAE": h00_s0080_mae_smodel,
        "AIC": h00_s0080_results_smodel.aic,
        "BIC": h00_s0080_results_smodel.bic
    }
]


#Creation of Dataframes
smodel_results_df_h00 = round(pd.DataFrame(smodel_results),2)
#smodel_grid_results_df_h00 = round(pd.DataFrame(smodel_grid_results),2)

# Results without manual search grid
print("H00")
smodel_results_df_h00

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 38, Finished, Available, Finished)

H00


Unnamed: 0,Store,MAE,AIC,BIC
0,S0095,62.3,310.58,318.21
1,S0085,123.76,390.45,398.08
2,S0026,41.01,336.07,343.7
3,S0080,81.46,302.73,310.36


#### **H01** 

In [31]:
# Compile results

smodel_results = [
    {
        "Store": "S0095",
        "MAE": h01_s0095_mae_smodel,
        "AIC": h01_s0095_results_smodel.aic,
        "BIC": h01_s0095_results_smodel.bic
    },

    {
        "Store": "S0095",
        "MAE": h01_s0085_mae_smodel,
        "AIC": h01_s0085_results_smodel.aic,
        "BIC": h01_s0085_results_smodel.bic
    },

    {
        "Store": "S0026",
        "MAE": h01_s0026_mae_smodel,
        "AIC": h01_s0026_results_smodel.aic,
        "BIC": h01_s0026_results_smodel.bic
    },
    {
        "Store": "S0080",
        "MAE": h01_s0080_mae_smodel,
        "AIC": h01_s0080_results_smodel.aic,
        "BIC": h01_s0080_results_smodel.bic
    }
]

#Creation of Dataframes
smodel_results_df_h01 = round(pd.DataFrame(smodel_results),2)
#smodel_grid_results_df_h01 = round(pd.DataFrame(smodel_grid_results),2)

# Results without manual search grid
print("H01")
smodel_results_df_h01


StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 39, Finished, Available, Finished)

H01


Unnamed: 0,Store,MAE,AIC,BIC
0,S0095,6.65,209.5,217.13
1,S0095,32.12,287.07,294.7
2,S0026,29.72,295.72,303.35
3,S0080,1.86,162.65,170.28


#### **H02** 

In [32]:
# Compile results

smodel_results = [
    {
        "Store": "S0095",
        "MAE": h02_s0095_mae_smodel,
        "AIC": h02_s0095_results_smodel.aic,
        "BIC": h02_s0095_results_smodel.bic
    },

    {
        "Store": "S0095",
        "MAE": h02_s0085_mae_smodel,
        "AIC": h02_s0085_results_smodel.aic,
        "BIC": h02_s0085_results_smodel.bic
    },

    {
        "Store": "S0026",
        "MAE": h02_s0026_mae_smodel,
        "AIC": h02_s0026_results_smodel.aic,
        "BIC": h02_s0026_results_smodel.bic
    },
    {
        "Store": "S0080",
        "MAE": h02_s0080_mae_smodel,
        "AIC": h02_s0080_results_smodel.aic,
        "BIC": h02_s0080_results_smodel.bic
    }
]

#Creation of Dataframes
smodel_results_df_h01 = round(pd.DataFrame(smodel_results),2)
#smodel_grid_results_df_h01 = round(pd.DataFrame(smodel_grid_results),2)

# Results without manual search grid
print("H02")
smodel_results_df_h01


StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 40, Finished, Available, Finished)

H02


Unnamed: 0,Store,MAE,AIC,BIC
0,S0095,5.21,10.0,
1,S0095,1.6,10.0,
2,S0026,16.27,2.17,-4.37
3,S0080,0.87,10.0,


#### **H03** 

In [33]:
# Compile results

smodel_results = [
    {
        "Store": "S0095",
        "MAE": h03_s0095_mae_smodel,
        "AIC": h03_s0095_results_smodel.aic,
        "BIC": h03_s0095_results_smodel.bic
    },

    {
        "Store": "S0085",
        "MAE": h03_s0085_mae_smodel,
        "AIC": h03_s0085_results_smodel.aic,
        "BIC": h03_s0085_results_smodel.bic
    },
    {
        "Store": "S0026",
        "MAE": h03_s0026_mae_smodel,
        "AIC": h03_s0026_results_smodel.aic,
        "BIC": h03_s0026_results_smodel.bic
    },
    {
        "Store": "S0080",
        "MAE": h03_s0080_mae_smodel,
        "AIC": h03_s0080_results_smodel.aic,
        "BIC": h03_s0080_results_smodel.bic
    }
]

#Creation of Dataframes
smodel_results_df_h03 = round(pd.DataFrame(smodel_results),2)
#smodel_grid_results_df_h03 = round(pd.DataFrame(smodel_grid_results),2)

# Results without manual search grid
print("H03")
smodel_results_df_h03

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 41, Finished, Available, Finished)

H03


Unnamed: 0,Store,MAE,AIC,BIC
0,S0095,9.97,222.41,230.04
1,S0085,18.11,273.03,280.66
2,S0026,12.72,246.78,254.42
3,S0080,11.72,201.71,209.34


### **Check Overfitting:**

#### **H00** 

In [34]:
# S0095
h00_s0095_train_forecast = h00_s0095_results_smodel.predict(start=h00_s0095_y_train.index[0], end=h00_s0095_y_train.index[-1])
h00_s0095_mae_train = mean_absolute_error(h00_s0095_y_train, h00_s0095_train_forecast)

h00_s0095_test_forecast = h00_s0095_results_smodel.forecast(steps=len(h00_s0095_y_test))
h00_s0095_mae_test = mean_absolute_error(h00_s0095_y_test, h00_s0095_test_forecast)

# S0085
h00_s0085_train_forecast = h00_s0085_results_smodel.predict(start=h00_s0085_y_train.index[0], end=h00_s0085_y_train.index[-1])
h00_s0085_mae_train = mean_absolute_error(h00_s0085_y_train, h00_s0085_train_forecast)

h00_s0085_test_forecast = h00_s0085_results_smodel.forecast(steps=len(h00_s0085_y_test))
h00_s0085_mae_test = mean_absolute_error(h00_s0085_y_test, h00_s0085_test_forecast)

# S0026
h00_s0026_train_forecast = h00_s0026_results_smodel.predict(start=h00_s0026_y_train.index[0], end=h00_s0026_y_train.index[-1])
h00_s0026_mae_train = mean_absolute_error(h00_s0026_y_train, h00_s0026_train_forecast)

h00_s0026_test_forecast = h00_s0026_results_smodel.forecast(steps=len(h00_s0026_y_test))
h00_s0026_mae_test = mean_absolute_error(h00_s0026_y_test, h00_s0026_test_forecast)

# S0080
h00_s0080_train_forecast = h00_s0080_results_smodel.predict(start=h00_s0080_y_train.index[0], end=h00_s0080_y_train.index[-1])
h00_s0080_mae_train = mean_absolute_error(h00_s0080_y_train, h00_s0080_train_forecast)

h00_s0080_test_forecast = h00_s0080_results_smodel.forecast(steps=len(h00_s0080_y_test))
h00_s0080_mae_test = mean_absolute_error(h00_s0080_y_test, h00_s0080_test_forecast)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 42, Finished, Available, Finished)

#### **H01** 

In [35]:
# S0095
h01_s0095_train_forecast = h01_s0095_results_smodel.predict(start=h01_s0095_y_train.index[0], end=h01_s0095_y_train.index[-1])
h01_s0095_mae_train = mean_absolute_error(h01_s0095_y_train, h01_s0095_train_forecast)

h01_s0095_test_forecast = h01_s0095_results_smodel.forecast(steps=len(h01_s0095_y_test))
h01_s0095_mae_test = mean_absolute_error(h01_s0095_y_test, h01_s0095_test_forecast)

# S0085
h01_s0085_train_forecast = h01_s0085_results_smodel.predict(start=h01_s0085_y_train.index[0], end=h01_s0085_y_train.index[-1])
h01_s0085_mae_train = mean_absolute_error(h01_s0085_y_train, h01_s0085_train_forecast)

h01_s0085_test_forecast = h01_s0085_results_smodel.forecast(steps=len(h01_s0085_y_test))
h01_s0085_mae_test = mean_absolute_error(h01_s0085_y_test, h01_s0085_test_forecast)

# S0026
h01_s0026_train_forecast = h01_s0026_results_smodel.predict(start=h01_s0026_y_train.index[0], end=h01_s0026_y_train.index[-1])
h01_s0026_mae_train = mean_absolute_error(h01_s0026_y_train, h01_s0026_train_forecast)

h01_s0026_test_forecast = h01_s0026_results_smodel.forecast(steps=len(h01_s0026_y_test))
h01_s0026_mae_test = mean_absolute_error(h01_s0026_y_test, h01_s0026_test_forecast)

# S0080
h01_s0080_train_forecast = h01_s0080_results_smodel.predict(start=h01_s0080_y_train.index[0], end=h01_s0080_y_train.index[-1])
h01_s0080_mae_train = mean_absolute_error(h01_s0080_y_train, h01_s0080_train_forecast)

h01_s0080_test_forecast = h01_s0080_results_smodel.forecast(steps=len(h01_s0080_y_test))
h01_s0080_mae_test = mean_absolute_error(h01_s0080_y_test, h01_s0080_test_forecast)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 43, Finished, Available, Finished)

#### **H02** 

In [36]:
# S0095
h02_s0095_train_forecast = h02_s0095_results_smodel.predict(start=h02_s0095_y_train.index[0], end=h02_s0095_y_train.index[-1])
h02_s0095_mae_train = mean_absolute_error(h02_s0095_y_train, h02_s0095_train_forecast)

h02_s0095_test_forecast = h02_s0095_results_smodel.forecast(steps=len(h02_s0095_y_test))
h02_s0095_mae_test = mean_absolute_error(h02_s0095_y_test, h02_s0095_test_forecast)

# S0085
h02_s0085_train_forecast = h02_s0085_results_smodel.predict(start=h02_s0085_y_train.index[0], end=h02_s0085_y_train.index[-1])
h02_s0085_mae_train = mean_absolute_error(h02_s0085_y_train, h02_s0085_train_forecast)

h02_s0085_test_forecast = h02_s0085_results_smodel.forecast(steps=len(h02_s0085_y_test))
h02_s0085_mae_test = mean_absolute_error(h02_s0085_y_test, h02_s0085_test_forecast)

# S0026
h02_s0026_train_forecast = h02_s0026_results_smodel.predict(start=h02_s0026_y_train.index[0], end=h02_s0026_y_train.index[-1])
h02_s0026_mae_train = mean_absolute_error(h02_s0026_y_train, h02_s0026_train_forecast)

h02_s0026_test_forecast = h02_s0026_results_smodel.forecast(steps=len(h02_s0026_y_test))
h02_s0026_mae_test = mean_absolute_error(h02_s0026_y_test, h02_s0026_test_forecast)

# S0080
h02_s0080_train_forecast = h02_s0080_results_smodel.predict(start=h02_s0080_y_train.index[0], end=h02_s0080_y_train.index[-1])
h02_s0080_mae_train = mean_absolute_error(h02_s0080_y_train, h02_s0080_train_forecast)

h02_s0080_test_forecast = h02_s0080_results_smodel.forecast(steps=len(h02_s0080_y_test))
h02_s0080_mae_test = mean_absolute_error(h02_s0080_y_test, h02_s0080_test_forecast)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 44, Finished, Available, Finished)

  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(
  return get_prediction_index(


#### **H03** 

In [37]:
# S0095
h03_s0095_train_forecast = h03_s0095_results_smodel.predict(start=h03_s0095_y_train.index[0], end=h03_s0095_y_train.index[-1])
h03_s0095_mae_train = mean_absolute_error(h03_s0095_y_train, h03_s0095_train_forecast)

h03_s0095_test_forecast = h03_s0095_results_smodel.forecast(steps=len(h03_s0095_y_test))
h03_s0095_mae_test = mean_absolute_error(h03_s0095_y_test, h03_s0095_test_forecast)

# S0085
h03_s0085_train_forecast = h03_s0085_results_smodel.predict(start=h03_s0085_y_train.index[0], end=h03_s0085_y_train.index[-1])
h03_s0085_mae_train = mean_absolute_error(h03_s0085_y_train, h03_s0085_train_forecast)

h03_s0085_test_forecast = h03_s0085_results_smodel.forecast(steps=len(h03_s0085_y_test))
h03_s0085_mae_test = mean_absolute_error(h03_s0085_y_test, h03_s0085_test_forecast)

# S0026
h03_s0026_train_forecast = h03_s0026_results_smodel.predict(start=h03_s0026_y_train.index[0], end=h03_s0026_y_train.index[-1])
h03_s0026_mae_train = mean_absolute_error(h03_s0026_y_train, h03_s0026_train_forecast)

h03_s0026_test_forecast = h03_s0026_results_smodel.forecast(steps=len(h03_s0026_y_test))
h03_s0026_mae_test = mean_absolute_error(h03_s0026_y_test, h03_s0026_test_forecast)

# S0080
h03_s0080_train_forecast = h03_s0080_results_smodel.predict(start=h03_s0080_y_train.index[0], end=h03_s0080_y_train.index[-1])
h03_s0080_mae_train = mean_absolute_error(h03_s0080_y_train, h03_s0080_train_forecast)

h03_s0080_test_forecast = h03_s0080_results_smodel.forecast(steps=len(h03_s0080_y_test))
h03_s0080_mae_test = mean_absolute_error(h03_s0080_y_test, h03_s0080_test_forecast)

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 45, Finished, Available, Finished)

### **Results Mae_Values** 

In [38]:
mae_values = [
    {
        "Hierarchy":"H00",
        "Store": "S0095",
        "Train_MAE": h00_s0095_mae_train,
        "Test_MAE": h00_s0095_mae_test,
        "Obs": "⚠️ Severe overfitting"
    },

    {
        "Hierarchy": "H00",
        "Store": "S0085",
        "Train_MAE": h00_s0085_mae_train,
        "Test_MAE": h00_s0085_mae_test,
        "Obs": "⚠️ high errors with overfitting"
    },

    {
        "Hierarchy": "H00",
        "Store": "S0026",
        "Train_MAE": h00_s0026_mae_train,
        "Test_MAE": h00_s0026_mae_test,
        "Obs": "⚠️ high errors week generalization "
    },
    {
        "Hierarchy": "H00",
        "Store": "S0080",
        "Train_MAE": h00_s0080_mae_train,
        "Test_MAE": h00_s0080_mae_test,
        "Obs": "❌ Overfitting"
    },
    {
        "Hierarchy":"H01",
        "Store": "S0095",
        "Train_MAE": h01_s0095_mae_train,
        "Test_MAE": h01_s0095_mae_test,
        "Obs": "✅"
    },

    {
        "Hierarchy": "H01",
        "Store": "S0085",
        "Train_MAE": h01_s0085_mae_train,
        "Test_MAE": h01_s0085_mae_test,
        "Obs": "⚠️ Potential underfitting"

    },

    {
        "Hierarchy": "H01",
        "Store": "S0026",
        "Train_MAE": h01_s0026_mae_train,
        "Test_MAE": h01_s0026_mae_test,
        "Obs": "✅"
    },
    {
        "Hierarchy": "H01",
        "Store": "S0080",
        "Train_MAE": h01_s0080_mae_train,
        "Test_MAE": h01_s0080_mae_test,
        "Obs": "✅"
    },
    {
        "Hierarchy":"H02",
        "Store": "S0095",
        "Train_MAE": h02_s0095_mae_train,
        "Test_MAE": h02_s0095_mae_test,
        "Obs": "✅"
    },

    {
        "Hierarchy": "H02",
        "Store": "S0085",
        "Train_MAE": h02_s0085_mae_train,
        "Test_MAE": h02_s0085_mae_test,
        "Obs": "✅"
    },

    {
        "Hierarchy": "H02",
        "Store": "S0026",
        "Train_MAE": h02_s0026_mae_train,
        "Test_MAE": h02_s0026_mae_test,
        "Obs": "❌ Overfitting"
    },
    {
        "Hierarchy": "H02",
        "Store": "S0080",
        "Train_MAE": h02_s0080_mae_train,
        "Test_MAE": h02_s0080_mae_test,
        "Obs": "✅"
    },
    {
        "Hierarchy":"H03",
        "Store": "S0095",
        "Train_MAE": h03_s0095_mae_train,
        "Test_MAE": h03_s0095_mae_test,
        "Obs": "✅"
    },

    {
        "Hierarchy": "H03",
        "Store": "S0085",
        "Train_MAE": h03_s0085_mae_train,
        "Test_MAE": h03_s0085_mae_test,
        "Obs": "✅"
    },

    {
        "Hierarchy": "H03",
        "Store": "S0026",
        "Train_MAE": h03_s0026_mae_train,
        "Test_MAE": h03_s0026_mae_test,
        "Obs": "✅"
    },
    {
        "Hierarchy": "H03",
        "Store": "S0080",
        "Train_MAE": h03_s0080_mae_train,
        "Test_MAE": h03_s0080_mae_test,
        "Obs": "⚠️ light overfitting"
    }
]

mae_values = round(pd.DataFrame(mae_values),2)
mae_values

StatementMeta(, 34c24fc6-0597-43fd-b475-5ecdef6e2a3a, 46, Finished, Available, Finished)

Unnamed: 0,Hierarchy,Store,Train_MAE,Test_MAE,Obs
0,H00,S0095,14.47,62.3,⚠️ Severe overfitting
1,H00,S0085,53.47,123.76,⚠️ high errors with overfitting
2,H00,S0026,20.79,41.01,⚠️ high errors week generalization
3,H00,S0080,12.63,81.46,❌ Overfitting
4,H01,S0095,3.73,6.65,✅
5,H01,S0085,11.51,32.12,⚠️ Potential underfitting
6,H01,S0026,7.63,29.72,✅
7,H01,S0080,1.49,1.86,✅
8,H02,S0095,3.79,5.21,✅
9,H02,S0085,2.91,1.6,✅
