In [None]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import XGBModel
from sklearn.metrics import mean_squared_error, mean_squared_log_error
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("../../Data/Kaggle/StoreSales/train_darts.csv", parse_dates=["date"])

In [None]:
df.head()

In [None]:
unique_combinations = df[['store_nbr', 'family']].drop_duplicates()

In [None]:
total_rmse = 0
total_rmsle = 0
num_models = 0

In [None]:
trained_models = {}

In [None]:
for index, row in unique_combinations.iterrows():
    store_nbr = row['store_nbr']
    family = row['family']
    
    print(f"\nTraining model for store {store_nbr} and family {family}...")
    
    df_subset = df[(df['store_nbr'] == store_nbr) & (df['family'] == family)]
    
    dcoilwtico_series = TimeSeries.from_dataframe(df_subset, value_cols=["dcoilwtico"])
    
    ts = TimeSeries.from_dataframe(df_subset, value_cols=["sales"])

    training_size = int(len(ts) - 30)
    
    train = ts[:training_size]
    val = ts[training_size:]
    
    len_future_covariates = len(val)
    future_covariates_lags = list(range(len_future_covariates))

    model = XGBModel(lags=30, lags_future_covariates=future_covariates_lags, output_chunk_length=30)
    model.fit(train, future_covariates=dcoilwtico_series)

    trained_models[(store_nbr, family)] = model

    predictions = model.predict(n=len(val))

    val = val.pd_series().tolist()
    predictions = predictions.pd_series().tolist()

    predictions = np.maximum(predictions, 0)

    rmse = np.sqrt(mean_squared_error(val, predictions))
    rmsle = np.sqrt(mean_squared_error(np.log1p(val), np.log1p(predictions)))

    total_rmse += rmse
    total_rmsle += rmsle
    num_models += 1

    print(f"Root Mean Squared Error (RMSE): {rmse}")
    print(f"Root Mean Squared Logarithmic Error (RMSLE): {rmsle}")

    plt.figure(figsize=(12, 6))
    plt.plot(val, label='Actual')
    plt.plot(predictions, label='Predictions')
    plt.title(f"Model for store {store_nbr} and family {family}")
    plt.legend()
    plt.savefig(f"XGBoost/Monthly/Oil_M06.6_store{store_nbr}_family{family}.png")
    plt.close()

In [None]:
avg_rmse = total_rmse / num_models
avg_rmsle = total_rmsle / num_models

In [None]:
print(f"Average Root Mean Squared Error (RMSE) across all models: {avg_rmse}")
print(f"Average Root Mean Squared Logarithmic Error (RMSLE) across all models: {avg_rmsle}")