In [1]:
# tools
import numpy as np

# data
from mlforecast.utils import generate_daily_series

# models
from sklearn.linear_model import LinearRegression
from lightgbm import LGBMRegressor

# forecast
from mlforecast import MLForecast

# data

In [9]:
series = generate_daily_series(n_series = 5)
print(series.loc[series["unique_id"] == "id_0", ].head())
print()
print(series.loc[series["unique_id"] == "id_0", ].tail())
print()
print(series.shape)
print()
print(series["unique_id"].value_counts())

  unique_id         ds         y
0      id_0 2000-01-01  0.428973
1      id_0 2000-01-02  1.423626
2      id_0 2000-01-03  2.311782
3      id_0 2000-01-04  3.192191
4      id_0 2000-01-05  4.148767

    unique_id         ds         y
217      id_0 2000-08-05  0.259856
218      id_0 2000-08-06  1.000028
219      id_0 2000-08-07  2.155930
220      id_0 2000-08-08  3.212726
221      id_0 2000-08-09  4.442669

(1101, 3)

unique_id
id_4    373
id_3    242
id_0    222
id_2    167
id_1     97
Name: count, dtype: int64


# forecast

In [3]:
fcst = MLForecast(
    models = {
        "lr": LinearRegression(),
        "lgbm": LGBMRegressor(verbosity = -1),
    },
    freq = "D",
    lags = [7, 14],
    date_features = ["dayofweek"],
)

# training

In [4]:
fcst.fit(series, as_numpy=True)

MLForecast(models=[lr, lgbm], freq=D, lag_features=['lag7', 'lag14'], date_features=['dayofweek'], num_threads=1)

# predicting

In [5]:
fcst.predict(h = 1)

Unnamed: 0,unique_id,ds,lr,lgbm
0,id_0,2000-08-10,5.268787,5.286476
1,id_1,2000-04-07,4.437316,4.13962
2,id_2,2000-06-16,3.246518,3.219793
3,id_3,2000-08-30,0.14486,0.298443
4,id_4,2001-01-08,2.211318,2.172097


# cross validation

In [7]:
cv_res = fcst.cross_validation(series, n_windows = 2, h = 2, as_numpy = True)
cv_res

Unnamed: 0,unique_id,ds,cutoff,y,lr,lgbm
0,id_0,2000-08-06,2000-08-05,1.000028,1.161935,1.21301
1,id_0,2000-08-07,2000-08-05,2.15593,2.241249,2.308215
2,id_1,2000-04-03,2000-04-02,0.414801,0.164123,0.16786
3,id_1,2000-04-04,2000-04-02,1.189517,1.420101,1.331728
4,id_2,2000-06-12,2000-06-11,6.189163,6.174459,6.365293
5,id_2,2000-06-13,2000-06-11,0.050016,0.22913,0.219905
6,id_3,2000-08-26,2000-08-25,3.096359,3.331848,3.253805
7,id_3,2000-08-27,2000-08-25,4.459046,4.323154,4.274378
8,id_4,2001-01-04,2001-01-03,5.178157,5.24519,5.147514
9,id_4,2001-01-05,2001-01-03,6.133142,6.118392,6.324345


# preprocess method

In [15]:
series_pl = generate_daily_series(5, n_static_features=1, engine='polars')
series_pl.head(2)

In [None]:
fcst = MLForecast(
    models=[],
    freq='1d',
    lags=[7, 14],
    date_features=['weekday'],
)

In [None]:
X, y = fcst.preprocess(series_pl, return_X_y=True, as_numpy=True)
X[:2]

In [None]:
fcst.ts.features_order_

In [None]:
model = LGBMRegressor(verbosity=-1)
model.fit(
    X=X,
    y=y,
    feature_name=fcst.ts.features_order_,
    categorical_feature=['static_0', 'weekday'],
);

In [None]:
fcst.models_ = {'lgbm': model}

In [None]:
fcst.predict(1)