In [1]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import XGBModel
from sklearn.metrics import mean_squared_error, mean_squared_log_error

In [2]:
df = pd.read_csv("../../Data/Kaggle/StoreSales/train_darts.csv", parse_dates=["date"])

In [3]:
df.head()

Unnamed: 0,date,store_nbr,family,sales,onpromotion,typeholiday,dcoilwtico,city,state,typestores,cluster,day_of_week,day,month,year
0,2013-01-01,1,0,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
1,2013-01-01,1,1,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
2,2013-01-01,1,2,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
3,2013-01-01,1,3,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
4,2013-01-01,1,4,0.0,0,3,93.14,18,12,3,13,2,1,1,2013


In [4]:
unique_combinations = df[['store_nbr', 'family']].drop_duplicates()

In [5]:
total_rmse = 0
total_rmsle = 0
num_models = 0

In [6]:
trained_models = {}

In [7]:
included_stores = {5, 8, 16, 19, 22, 25, 33, 37, 41, 47, 51}
excluded_families = {1, 4, 14, 17, 19, 20, 31}

In [8]:
for index, row in unique_combinations.iterrows():
    store_nbr = row['store_nbr']
    family = row['family']
    
    if store_nbr in included_stores and family not in excluded_families:

        print(f"\nTraining model for store {store_nbr} and family {family}...")

        df_subset = df[(df['store_nbr'] == store_nbr) & (df['family'] == family)]

        ts = TimeSeries.from_dataframe(df_subset, value_cols=["sales"])

        training_size = int(len(ts) - 30)

        train = ts[:training_size]
        val = ts[training_size:]

        model = XGBModel(lags=2)
        model.fit(train)

        trained_models[(store_nbr, family)] = model

        predictions = model.predict(n=len(val))

        val = val.pd_series().tolist()
        predictions = predictions.pd_series().tolist()

        predictions = np.maximum(predictions, 0)

        rmse = np.sqrt(mean_squared_error(val, predictions))
        rmsle = np.sqrt(mean_squared_error(np.log1p(val), np.log1p(predictions)))

        total_rmse += rmse
        total_rmsle += rmsle
        num_models += 1
        
        model.save(f"SavedModels/XGBoost/XGB_S{store_nbr}P{family}.pkl")

        print(f"Root Mean Squared Error (RMSE): {rmse}")
        print(f"Root Mean Squared Logarithmic Error (RMSLE): {rmsle}")


Training model for store 16 and family 0...
Root Mean Squared Error (RMSE): 3.043767365201265
Root Mean Squared Logarithmic Error (RMSLE): 0.6141556098924779

Training model for store 16 and family 2...
Root Mean Squared Error (RMSE): 1.9985477827396958
Root Mean Squared Logarithmic Error (RMSLE): 0.7294992285096012

Training model for store 16 and family 3...
Root Mean Squared Error (RMSE): 415.9741894788598
Root Mean Squared Logarithmic Error (RMSLE): 0.3041269544067559

Training model for store 16 and family 5...
Root Mean Squared Error (RMSE): 56.4582044841987
Root Mean Squared Logarithmic Error (RMSLE): 0.3670454769742482

Training model for store 16 and family 6...
Root Mean Squared Error (RMSE): 4.219543669773333
Root Mean Squared Logarithmic Error (RMSLE): 0.7777147908764733

Training model for store 16 and family 7...
Root Mean Squared Error (RMSE): 584.7808738622896
Root Mean Squared Logarithmic Error (RMSLE): 0.5391219139495663

Training model for store 16 and family 8...
R

Root Mean Squared Error (RMSE): 65.3474530620624
Root Mean Squared Logarithmic Error (RMSLE): 0.3747250581879677

Training model for store 22 and family 6...
Root Mean Squared Error (RMSE): 10.063357534158538
Root Mean Squared Logarithmic Error (RMSLE): 0.7938381244393927

Training model for store 22 and family 7...
Root Mean Squared Error (RMSE): 371.31427830788493
Root Mean Squared Logarithmic Error (RMSLE): 0.4409388734877543

Training model for store 22 and family 8...
Root Mean Squared Error (RMSE): 77.25717363541864
Root Mean Squared Logarithmic Error (RMSLE): 0.2681380331174334

Training model for store 22 and family 9...
Root Mean Squared Error (RMSE): 38.14237060464998
Root Mean Squared Logarithmic Error (RMSLE): 0.2437169512000763

Training model for store 22 and family 10...
Root Mean Squared Error (RMSE): 35.334140176172966
Root Mean Squared Logarithmic Error (RMSLE): 0.6025480485609939

Training model for store 22 and family 11...
Root Mean Squared Error (RMSE): 317.481318

Root Mean Squared Error (RMSE): 9.510353383255287
Root Mean Squared Logarithmic Error (RMSLE): 0.8623494639720684

Training model for store 33 and family 7...
Root Mean Squared Error (RMSE): 297.5429624491121
Root Mean Squared Logarithmic Error (RMSLE): 0.28497798605721947

Training model for store 33 and family 8...
Root Mean Squared Error (RMSE): 117.46910896220908
Root Mean Squared Logarithmic Error (RMSLE): 0.28553764848705354

Training model for store 33 and family 9...
Root Mean Squared Error (RMSE): 47.23092422733807
Root Mean Squared Logarithmic Error (RMSLE): 0.25360158237661135

Training model for store 33 and family 10...
Root Mean Squared Error (RMSE): 225.90685459383204
Root Mean Squared Logarithmic Error (RMSLE): 0.7761084881486348

Training model for store 33 and family 11...
Root Mean Squared Error (RMSE): 54.37375797337288
Root Mean Squared Logarithmic Error (RMSLE): 0.6623032913137752

Training model for store 33 and family 12...
Root Mean Squared Error (RMSE): 695.92

Root Mean Squared Error (RMSE): 53.97792556620358
Root Mean Squared Logarithmic Error (RMSLE): 0.3923570927733235

Training model for store 41 and family 11...
Root Mean Squared Error (RMSE): 65.89585040621934
Root Mean Squared Logarithmic Error (RMSLE): 0.45520125404212125

Training model for store 41 and family 12...
Root Mean Squared Error (RMSE): 1575.5365121513028
Root Mean Squared Logarithmic Error (RMSLE): 0.4060223197614

Training model for store 41 and family 13...
Root Mean Squared Error (RMSE): 42.540273119272904
Root Mean Squared Logarithmic Error (RMSLE): 0.973030751774226

Training model for store 41 and family 15...
Root Mean Squared Error (RMSE): 12.148282842559828
Root Mean Squared Logarithmic Error (RMSLE): 0.4600183656972975

Training model for store 41 and family 16...
Root Mean Squared Error (RMSE): 28.84378479512778
Root Mean Squared Logarithmic Error (RMSLE): 0.635521851201794

Training model for store 41 and family 18...
Root Mean Squared Error (RMSE): 126.47474

Root Mean Squared Error (RMSE): 377.5636666164663
Root Mean Squared Logarithmic Error (RMSLE): 0.7524199630713434

Training model for store 5 and family 12...
Root Mean Squared Error (RMSE): 510.59178380616214
Root Mean Squared Logarithmic Error (RMSLE): 0.17100243814563632

Training model for store 5 and family 13...
Root Mean Squared Error (RMSE): 67.73515605518055
Root Mean Squared Logarithmic Error (RMSLE): 1.529917637581919

Training model for store 5 and family 15...
Root Mean Squared Error (RMSE): 11.886873568965363
Root Mean Squared Logarithmic Error (RMSLE): 0.408368975290627

Training model for store 5 and family 16...
Root Mean Squared Error (RMSE): 7.891101024697799
Root Mean Squared Logarithmic Error (RMSLE): 0.3415397289718022

Training model for store 5 and family 18...
Root Mean Squared Error (RMSE): 67.18641124017314
Root Mean Squared Logarithmic Error (RMSLE): 0.275924031651676

Training model for store 5 and family 21...
Root Mean Squared Error (RMSE): 5.990789618060

Root Mean Squared Error (RMSE): 50.760877833339514
Root Mean Squared Logarithmic Error (RMSLE): 0.5087405548175886

Training model for store 8 and family 18...
Root Mean Squared Error (RMSE): 83.42099468673766
Root Mean Squared Logarithmic Error (RMSLE): 0.23101442468655417

Training model for store 8 and family 21...
Root Mean Squared Error (RMSE): 11.18428738067642
Root Mean Squared Logarithmic Error (RMSLE): 0.4982407312495091

Training model for store 8 and family 22...
Root Mean Squared Error (RMSE): 91.22991783919416
Root Mean Squared Logarithmic Error (RMSLE): 0.6134380600254118

Training model for store 8 and family 23...
Root Mean Squared Error (RMSE): 10.470116609010086
Root Mean Squared Logarithmic Error (RMSLE): 0.5416235442843802

Training model for store 8 and family 24...
Root Mean Squared Error (RMSE): 228.45005060179815
Root Mean Squared Logarithmic Error (RMSLE): 0.3989768287565148

Training model for store 8 and family 25...
Root Mean Squared Error (RMSE): 89.0260013

In [9]:
avg_rmse = total_rmse / num_models
avg_rmsle = total_rmsle / num_models

In [10]:
print(f"Average Root Mean Squared Error (RMSE) across all models: {avg_rmse}")
print(f"Average Root Mean Squared Logarithmic Error (RMSLE) across all models: {avg_rmsle}")

Average Root Mean Squared Error (RMSE) across all models: 195.59242160391832
Average Root Mean Squared Logarithmic Error (RMSLE) across all models: 0.5333629225864212
