In [1]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import LightGBMModel
from sklearn.metrics import mean_squared_error, mean_squared_log_error
import matplotlib.pyplot as plt

  "ds": pd.date_range(start="1949-01-01", periods=len(AirPassengers), freq="M"),


In [2]:
df = pd.read_csv("train_darts_univariate.csv", parse_dates=["date"])

In [3]:
df.head()

Unnamed: 0,date,store_nbr,family,sales
0,2013-01-01,1,0,0.0
1,2013-01-01,1,1,0.0
2,2013-01-01,1,2,0.0
3,2013-01-01,1,3,0.0
4,2013-01-01,1,4,0.0


In [4]:
unique_combinations = df[['store_nbr', 'family']].drop_duplicates()

In [5]:
total_rmse = 0
total_rmsle = 0
num_models = 0

In [6]:
trained_models = {}

In [7]:
for index, row in unique_combinations.iterrows():
    store_nbr = row['store_nbr']
    family = row['family']
    
    print(f"\nTraining model for store {store_nbr} and family {family}...")
    
    df_subset = df[(df['store_nbr'] == store_nbr) & (df['family'] == family)]
    
    ts = TimeSeries.from_dataframe(df_subset, value_cols=["sales"])

    split_ratio = 0.8
    training_size = int(len(ts) * split_ratio)
    train = ts[:training_size]
    val = ts[training_size:]

    model = LightGBMModel(lags=30)
    model.fit(train)

    trained_models[(store_nbr, family)] = model

    predictions = model.predict(n=len(val))

    val = val.pd_series().tolist()
    predictions = predictions.pd_series().tolist()

    predictions = np.maximum(predictions, 0)

    rmse = np.sqrt(mean_squared_error(val, predictions))
    rmsle = np.sqrt(mean_squared_error(np.log1p(val), np.log1p(predictions)))

    total_rmse += rmse
    total_rmsle += rmsle
    num_models += 1

    print(f"Root Mean Squared Error (RMSE): {rmse}")
    print(f"Root Mean Squared Logarithmic Error (RMSLE): {rmsle}")

    plt.figure(figsize=(12, 6))
    plt.plot(val, label='Actual')
    plt.plot(predictions, label='Predictions')
    plt.title(f"Model for store {store_nbr} and family {family}")
    plt.legend()
    plt.savefig(f"plots/M09.4store{store_nbr}_family{family}.png")
    plt.close()


Training model for store 1 and family 0...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.002767 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 450
[LightGBM] [Info] Number of data points in the train set: 1317, number of used features: 30
[LightGBM] [Info] Start training from score 3.079727
Root Mean Squared Error (RMSE): 3.6169842363469056
Root Mean Squared Logarithmic Error (RMSLE): 0.7787457173856289

Training model for store 1 and family 1...
[LightGBM] [Info] Total Bins 0
[LightGBM] [Info] Number of data points in the train set: 1317, number of used features: 0
Root Mean Squared Error (RMSE): 0.0
Root Mean Squared Logarithmic Error (RMSLE): 0.0

Training model for store 1 and family 2...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001686 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 287
[LightGBM] 

In [8]:
avg_rmse = total_rmse / num_models
avg_rmsle = total_rmsle / num_models

In [9]:
print(f"Average Root Mean Squared Error (RMSE) across all models: {avg_rmse}")
print(f"Average Root Mean Squared Logarithmic Error (RMSLE) across all models: {avg_rmsle}")

Average Root Mean Squared Error (RMSE) across all models: 226.54231068700406
Average Root Mean Squared Logarithmic Error (RMSLE) across all models: 0.7310139638439718
