In [1]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import RandomForest
from sklearn.metrics import mean_squared_error, mean_squared_log_error

In [2]:
df = pd.read_csv("../../Data/Kaggle/StoreSales/train_darts.csv", parse_dates=["date"])

In [3]:
df.head()

Unnamed: 0,date,store_nbr,family,sales,onpromotion,typeholiday,dcoilwtico,city,state,typestores,cluster,day_of_week,day,month,year
0,2013-01-01,1,0,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
1,2013-01-01,1,1,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
2,2013-01-01,1,2,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
3,2013-01-01,1,3,0.0,0,3,93.14,18,12,3,13,2,1,1,2013
4,2013-01-01,1,4,0.0,0,3,93.14,18,12,3,13,2,1,1,2013


In [4]:
unique_combinations = df[['store_nbr', 'family']].drop_duplicates()

In [5]:
total_rmse = 0
total_rmsle = 0
num_models = 0

In [6]:
trained_models = {}

In [7]:
included_stores = {5, 8, 16, 19, 22, 25, 33, 37, 41, 47, 51}
excluded_families = {1, 4, 14, 17, 19, 20, 31}

In [8]:
for index, row in unique_combinations.iterrows():
    store_nbr = row['store_nbr']
    family = row['family']
    
    if store_nbr in included_stores and family not in excluded_families:
    
        print(f"\nTraining model for store {store_nbr} and family {family}...")

        df_subset = df[(df['store_nbr'] == store_nbr) & (df['family'] == family)]

        ts = TimeSeries.from_dataframe(df_subset, value_cols=["sales"])

        training_size = int(len(ts) - 30)
        
        train = ts[:training_size]
        val = ts[training_size:]

        model = RandomForest(lags=2)
        model.fit(train)

        trained_models[(store_nbr, family)] = model

        predictions = model.predict(n=len(val))

        val = val.pd_series().tolist()
        predictions = predictions.pd_series().tolist()

        predictions = np.maximum(predictions, 0)

        rmse = np.sqrt(mean_squared_error(val, predictions))
        rmsle = np.sqrt(mean_squared_error(np.log1p(val), np.log1p(predictions)))

        total_rmse += rmse
        total_rmsle += rmsle
        num_models += 1
        
        model.save(f"SavedModels/RandomForest/RF_S{store_nbr}P{family}.pkl")

        print(f"Root Mean Squared Error (RMSE): {rmse}")
        print(f"Root Mean Squared Logarithmic Error (RMSLE): {rmsle}")


Training model for store 16 and family 0...
Root Mean Squared Error (RMSE): 2.8558369933723973
Root Mean Squared Logarithmic Error (RMSLE): 0.577224642456334

Training model for store 16 and family 2...
Root Mean Squared Error (RMSE): 1.9984948238108096
Root Mean Squared Logarithmic Error (RMSLE): 0.7294715318184355

Training model for store 16 and family 3...
Root Mean Squared Error (RMSE): 372.47882869142154
Root Mean Squared Logarithmic Error (RMSLE): 0.259786426100219

Training model for store 16 and family 5...
Root Mean Squared Error (RMSE): 72.73734675497022
Root Mean Squared Logarithmic Error (RMSLE): 0.43481936877880145

Training model for store 16 and family 6...
Root Mean Squared Error (RMSE): 5.06180058670016
Root Mean Squared Logarithmic Error (RMSLE): 0.91585165432302

Training model for store 16 and family 7...
Root Mean Squared Error (RMSE): 541.2414055927663
Root Mean Squared Logarithmic Error (RMSLE): 0.46109512156557675

Training model for store 16 and family 8...
R

Root Mean Squared Error (RMSE): 2.3438292310291735
Root Mean Squared Logarithmic Error (RMSLE): 0.6344324230697953

Training model for store 22 and family 3...
Root Mean Squared Error (RMSE): 659.2192931743831
Root Mean Squared Logarithmic Error (RMSLE): 0.4229376208700166

Training model for store 22 and family 5...
Root Mean Squared Error (RMSE): 65.0857239820784
Root Mean Squared Logarithmic Error (RMSLE): 0.3555408219340373

Training model for store 22 and family 6...
Root Mean Squared Error (RMSE): 9.55699988952658
Root Mean Squared Logarithmic Error (RMSLE): 0.8105596546945859

Training model for store 22 and family 7...
Root Mean Squared Error (RMSE): 360.0916646216627
Root Mean Squared Logarithmic Error (RMSLE): 0.4216621064091586

Training model for store 22 and family 8...
Root Mean Squared Error (RMSE): 70.08251705694536
Root Mean Squared Logarithmic Error (RMSLE): 0.22517120161070164

Training model for store 22 and family 9...
Root Mean Squared Error (RMSE): 27.11178035589

Root Mean Squared Error (RMSE): 1244.3519093702284
Root Mean Squared Logarithmic Error (RMSLE): 0.4983525473535027

Training model for store 33 and family 5...
Root Mean Squared Error (RMSE): 73.0225689459544
Root Mean Squared Logarithmic Error (RMSLE): 0.30384180395168225

Training model for store 33 and family 6...
Root Mean Squared Error (RMSE): 10.070967630681425
Root Mean Squared Logarithmic Error (RMSLE): 0.9682907686649082

Training model for store 33 and family 7...
Root Mean Squared Error (RMSE): 315.0072215412306
Root Mean Squared Logarithmic Error (RMSLE): 0.2897090370033804

Training model for store 33 and family 8...
Root Mean Squared Error (RMSE): 130.68044417444267
Root Mean Squared Logarithmic Error (RMSLE): 0.3126755742733172

Training model for store 33 and family 9...
Root Mean Squared Error (RMSE): 53.92740786311039
Root Mean Squared Logarithmic Error (RMSLE): 0.29309176018087063

Training model for store 33 and family 10...
Root Mean Squared Error (RMSE): 201.08350

Root Mean Squared Error (RMSE): 1426.6440013355352
Root Mean Squared Logarithmic Error (RMSLE): 0.5039095059751614

Training model for store 41 and family 5...
Root Mean Squared Error (RMSE): 127.94882232460468
Root Mean Squared Logarithmic Error (RMSLE): 0.2337644148198097

Training model for store 41 and family 6...
Root Mean Squared Error (RMSE): 7.234562423804142
Root Mean Squared Logarithmic Error (RMSLE): 0.7324786890405437

Training model for store 41 and family 7...
Root Mean Squared Error (RMSE): 667.0460907038834
Root Mean Squared Logarithmic Error (RMSLE): 0.4507762458367594

Training model for store 41 and family 8...
Root Mean Squared Error (RMSE): 194.51737429040455
Root Mean Squared Logarithmic Error (RMSLE): 0.26467407412522015

Training model for store 41 and family 9...
Root Mean Squared Error (RMSE): 70.40605545887158
Root Mean Squared Logarithmic Error (RMSLE): 0.29109117676561086

Training model for store 41 and family 10...
Root Mean Squared Error (RMSE): 47.64543

Root Mean Squared Error (RMSE): 477.01342392361806
Root Mean Squared Logarithmic Error (RMSLE): 0.2394039754461589

Training model for store 5 and family 5...
Root Mean Squared Error (RMSE): 74.96586772853696
Root Mean Squared Logarithmic Error (RMSLE): 0.20594575179594196

Training model for store 5 and family 6...
Root Mean Squared Error (RMSE): 11.223872465204789
Root Mean Squared Logarithmic Error (RMSLE): 0.4698031358718103

Training model for store 5 and family 7...
Root Mean Squared Error (RMSE): 244.25520077028725
Root Mean Squared Logarithmic Error (RMSLE): 0.2725526855707515

Training model for store 5 and family 8...
Root Mean Squared Error (RMSE): 114.1636230010989
Root Mean Squared Logarithmic Error (RMSLE): 0.19654655083330888

Training model for store 5 and family 9...
Root Mean Squared Error (RMSE): 61.087857189768826
Root Mean Squared Logarithmic Error (RMSLE): 0.2611763039606941

Training model for store 5 and family 10...
Root Mean Squared Error (RMSE): 40.4736789354

Root Mean Squared Error (RMSE): 1062.1814425354391
Root Mean Squared Logarithmic Error (RMSLE): 0.21958602357363183

Training model for store 8 and family 5...
Root Mean Squared Error (RMSE): 115.81656769286418
Root Mean Squared Logarithmic Error (RMSLE): 0.14492134768519688

Training model for store 8 and family 6...
Root Mean Squared Error (RMSE): 10.622065302242094
Root Mean Squared Logarithmic Error (RMSLE): 0.5018858377614553

Training model for store 8 and family 7...
Root Mean Squared Error (RMSE): 232.7525209673714
Root Mean Squared Logarithmic Error (RMSLE): 0.1943118861825564

Training model for store 8 and family 8...
Root Mean Squared Error (RMSE): 351.439421198378
Root Mean Squared Logarithmic Error (RMSLE): 0.24385833319177852

Training model for store 8 and family 9...
Root Mean Squared Error (RMSE): 69.45423293090249
Root Mean Squared Logarithmic Error (RMSLE): 0.2372776034696485

Training model for store 8 and family 10...
Root Mean Squared Error (RMSE): 103.5503927370

In [9]:
avg_rmse = total_rmse / num_models
avg_rmsle = total_rmsle / num_models

In [10]:
print(f"Average Root Mean Squared Error (RMSE) across all models: {avg_rmse}")
print(f"Average Root Mean Squared Logarithmic Error (RMSLE) across all models: {avg_rmsle}")

Average Root Mean Squared Error (RMSE) across all models: 179.20772421600785
Average Root Mean Squared Logarithmic Error (RMSLE) across all models: 0.499004359134454
