In [None]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import XGBModel
from sklearn.metrics import mean_squared_error, mean_squared_log_error
import matplotlib.pyplot as plt
from darts import concatenate

In [None]:
df = pd.read_csv("../../Data/Kaggle/StoreSales/train_darts.csv", parse_dates=["date"])

In [None]:
df.head()

In [None]:
unique_combinations = df[['store_nbr', 'family']].drop_duplicates()

In [None]:
total_rmse = 0
total_rmsle = 0
num_models = 0

In [None]:
all_train_series = []
all_val_series = []
all_future_covariates = []

In [None]:
for index, row in unique_combinations.iterrows():
    store_nbr = row['store_nbr']
    family = row['family']
    
    df_subset = df[(df['store_nbr'] == store_nbr) & (df['family'] == family)]
    
    ts = TimeSeries.from_dataframe(df_subset, value_cols=["sales"])
    
    dcoilwtico_ts = TimeSeries.from_dataframe(df_subset, value_cols=["dcoilwtico"])
    onpromotion_ts = TimeSeries.from_dataframe(df_subset, value_cols=["onpromotion"])
    typeholiday_ts = TimeSeries.from_dataframe(df_subset, value_cols=["typeholiday"])
    day_of_week_ts = TimeSeries.from_dataframe(df_subset, value_cols=["day_of_week"])
    day_ts = TimeSeries.from_dataframe(df_subset, value_cols=["day"])
    month_ts = TimeSeries.from_dataframe(df_subset, value_cols=["month"])
    year_ts = TimeSeries.from_dataframe(df_subset, value_cols=["year"])
    
    future_covariates = concatenate([dcoilwtico_ts,
                          onpromotion_ts,
                          typeholiday_ts,
                          day_of_week_ts,
                          day_ts,
                          month_ts,
                          year_ts], axis=1)

    training_size = int(len(ts) - 30)
    
    train = ts[:training_size]
    val = ts[training_size:]
    
    all_train_series.append(train)
    all_val_series.append(val)
    all_future_covariates.append(future_covariates)

In [None]:
len_future_covariates = 30
future_covariates_lags = list(range(len_future_covariates))

In [None]:
batch_size = 1

In [None]:
num_samples = len(all_train_series)
num_batches = (num_samples + batch_size - 1) // batch_size

In [None]:
model = XGBModel(lags=2, lags_future_covariates=future_covariates_lags, output_chunk_length=30)

In [None]:
for i in range(num_batches):
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, num_samples)
    
    batch_train_series = all_train_series[start_idx:end_idx]
    batch_future_covariates = all_future_covariates[start_idx:end_idx]
    
    model.fit(batch_train_series, future_covariates=batch_future_covariates)
    
    print(f"Batch {i + 1}/{num_batches} completed.")

In [None]:
for i in range(1782):
    predictions = model.predict(n=30, series=all_train_series[i])
    
    val = all_val_series[i].pd_series().tolist()
    predictions = predictions.pd_series().tolist()
    
    predictions = np.maximum(predictions, 0)
    
    rmse = np.sqrt(mean_squared_error(val, predictions))
    rmsle = np.sqrt(mean_squared_error(np.log1p(val), np.log1p(predictions)))
    
    total_rmse += rmse
    total_rmsle += rmsle
    num_models += 1
    
    print(f"\nRoot Mean Squared Error (RMSE): {rmse}")
    print(f"Root Mean Squared Logarithmic Error (RMSLE): {rmsle}")

In [None]:
avg_rmse = total_rmse / num_models
avg_rmsle = total_rmsle / num_models

In [None]:
print(f"Average Root Mean Squared Error (RMSE) across all models: {avg_rmse}")
print(f"Average Root Mean Squared Logarithmic Error (RMSLE) across all models: {avg_rmsle}")