In [None]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import RandomForest
from darts.models import XGBModel
from darts.models import Prophet
from darts.models import LightGBMModel
from sklearn.metrics import mean_squared_error, mean_squared_log_error
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("../../Data/Kaggle/StoreSales/train_darts_univariate.csv", parse_dates=["date"])

In [None]:
df.head()

In [None]:
unique_combinations = df[['store_nbr', 'family']].drop_duplicates()

In [None]:
total_rmse = 0
total_rmsle = 0
num_models = 0

In [None]:
trained_models_rf = {}
trained_models_xgb = {}
trained_models_p = {}
trained_models_lgbm = {}

In [None]:
for index, row in unique_combinations.iterrows():
    store_nbr = row['store_nbr']
    family = row['family']
    
    print(f"\nTraining model for store {store_nbr} and family {family}...")
    
    df_subset = df[(df['store_nbr'] == store_nbr) & (df['family'] == family)]
    
    df_subset_p = df_subset.copy()
    
    df_subset_p.set_index('date', inplace=True)
    
    ts = TimeSeries.from_dataframe(df_subset, value_cols=["sales"])
    
    ts_p = TimeSeries.from_dataframe(df_subset_p, value_cols=["sales"], freq='d')

    split_ratio = 0.8
    training_size = int(len(ts) * split_ratio)
    train = ts[:training_size]
    val = ts[training_size:]
    
    train_p = ts_p[:training_size]
    val_p = ts_p[training_size:]
    
    model_rf = RandomForest(lags=2)
    model_xgb = XGBModel(lags=2)
    model_p = Prophet()
    model_lgbm = LightGBMModel(lags=2)
    
    model_rf.fit(train)
    model_xgb.fit(train)
    model_p.fit(train_p)
    model_lgbm.fit(train)

    trained_models_rf[(store_nbr, family)] = model_rf
    trained_models_xgb[(store_nbr, family)] = model_xgb
    trained_models_p[(store_nbr, family)] = model_p
    trained_models_lgbm[(store_nbr, family)] = model_lgbm

    predictions_rf = model_rf.predict(n=len(val))
    predictions_xgb = model_xgb.predict(n=len(val))
    predictions_p = model_p.predict(n=len(val))
    predictions_lgbm = model_lgbm.predict(n=len(val))

    val = val.pd_series().tolist()
    val_p = val_p.pd_series().tolist()
    
    predictions_rf = predictions_rf.pd_series().tolist()
    predictions_xgb = predictions_xgb.pd_series().tolist()
    predictions_p = predictions_p.pd_series().tolist()
    predictions_lgbm = predictions_lgbm.pd_series().tolist()

    predictions_rf = np.maximum(predictions_rf, 0)
    predictions_xgb = np.maximum(predictions_xgb, 0)
    predictions_p = np.maximum(predictions_p, 0)
    predictions_lgbm = np.maximum(predictions_lgbm, 0)

    rmse_rf = np.sqrt(mean_squared_error(val, predictions_rf))
    rmsle_rf = np.sqrt(mean_squared_error(np.log1p(val), np.log1p(predictions_rf)))
    
    rmse_xgb = np.sqrt(mean_squared_error(val, predictions_xgb))
    rmsle_xgb = np.sqrt(mean_squared_error(np.log1p(val), np.log1p(predictions_xgb)))
    
    rmse_p = np.sqrt(mean_squared_error(val, predictions_p))
    rmsle_p = np.sqrt(mean_squared_error(np.log1p(val), np.log1p(predictions_p)))
    
    rmse_lgbm = np.sqrt(mean_squared_error(val, predictions_lgbm))
    rmsle_lgbm = np.sqrt(mean_squared_error(np.log1p(val), np.log1p(predictions_lgbm)))
    
    rmse = min(rmse_rf, rmse_xgb, rmse_p, rmse_lgbm)
    rmsle = min(rmsle_rf, rmsle_xgb, rmse_p, rmsle_lgbm)

    total_rmse += rmse
    total_rmsle += rmsle
    num_models += 1

    print(f"Root Mean Squared Error (RMSE): {rmse}")
    print(f"Root Mean Squared Logarithmic Error (RMSLE): {rmsle}")

#     plt.figure(figsize=(12, 6))
#     plt.plot(val, label='Actual')
#     plt.plot(predictions, label='Predictions')
#     plt.title(f"Model for store {store_nbr} and family {family}")
#     plt.legend()
#     plt.savefig(f"plots/M09.5store{store_nbr}_family{family}.png")
#     plt.close()

In [None]:
avg_rmse = total_rmse / num_models
avg_rmsle = total_rmsle / num_models

In [None]:
print(f"Average Root Mean Squared Error (RMSE) across all models: {avg_rmse}")
print(f"Average Root Mean Squared Logarithmic Error (RMSLE) across all models: {avg_rmsle}")