In [2]:
# data
from mlforecast.utils import generate_daily_series

# forecast
from mlforecast import MLForecast

# feature transform
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

# model and pipeline
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

# data

In [3]:
series = generate_daily_series(n_series = 5)
series

Unnamed: 0,unique_id,ds,y
0,id_0,2000-01-01,0.428973
1,id_0,2000-01-02,1.423626
2,id_0,2000-01-03,2.311782
3,id_0,2000-01-04,3.192191
4,id_0,2000-01-05,4.148767
...,...,...,...
1096,id_4,2001-01-03,4.058910
1097,id_4,2001-01-04,5.178157
1098,id_4,2001-01-05,6.133142
1099,id_4,2001-01-06,0.403709


# pipeline

## lag and date features

In [4]:
fcst = MLForecast(
    models = [],
    freq = "D",
    lags = [1],
    date_features = ["dayofweek"],
)
X, y = fcst.preprocess(series, return_X_y = True)
print(X.head())
print(X.shape)
print(y)
print(len(y))

       lag1  dayofweek
1  0.428973          6
2  1.423626          0
3  2.311782          1
4  3.192191          2
5  4.148767          3
(1096, 2)
[1.42362587 2.31178185 3.19219085 ... 6.13314199 0.40370926 1.0817794 ]
1096


## date feature one-hot-encoding

In [5]:
ohe = ColumnTransformer(
    transformers = [
        ("encoder", OneHotEncoder(drop = "first"), ["dayofweek"]),
    ],
    remainder = "passthrough",  # lag1
)
X_transformed = ohe.fit_transform(X)
print(X_transformed)
print(type(X_transformed))
print(X_transformed.shape)
print(ohe.get_feature_names_out())

  (0, 5)	1.0
  (0, 6)	0.4289728088113784
  (1, 6)	1.4236258693920627
  (2, 0)	1.0
  (2, 6)	2.311781848392986
  (3, 1)	1.0
  (3, 6)	3.19219085364635
  (4, 2)	1.0
  (4, 6)	4.148767303272236
  (5, 3)	1.0
  (5, 6)	5.028356488658722
  (6, 4)	1.0
  (6, 6)	6.136328147290056
  (7, 5)	1.0
  (7, 6)	0.23883255866067493
  (8, 6)	1.4060843643877465
  (9, 0)	1.0
  (9, 6)	2.2399885861875286
  (10, 1)	1.0
  (10, 6)	3.1963923980504148
  (11, 2)	1.0
  (11, 6)	4.418039381768689
  (12, 3)	1.0
  (12, 6)	5.168698080208634
  (13, 4)	1.0
  :	:
  (1083, 0)	1.0
  (1083, 6)	2.078941935130301
  (1084, 1)	1.0
  (1084, 6)	3.393430013657649
  (1085, 2)	1.0
  (1085, 6)	4.486508316378672
  (1086, 3)	1.0
  (1086, 6)	5.031017910260789
  (1087, 4)	1.0
  (1087, 6)	6.050001338497742
  (1088, 5)	1.0
  (1088, 6)	0.15894893467048016
  (1089, 6)	1.2629754692951607
  (1090, 0)	1.0
  (1090, 6)	2.352135646965559
  (1091, 1)	1.0
  (1091, 6)	3.4777752742732244
  (1092, 2)	1.0
  (1092, 6)	4.058909869016852
  (1093, 3)	1.0
  (1093, 6

# training

In [6]:
# one-hot encoding
ohe = ColumnTransformer(
    transformers = [
        ("encoder", OneHotEncoder(drop = "first"), ["dayofweek"]),
    ],
    remainder = "passthrough",  # lag1
)

# feature transform and model pipeline
model = make_pipeline(
    ohe, 
    LinearRegression()
)

# model
fcst = MLForecast(
    models = {"ohe-lr": model},
    freq = "D",
    lags = [1],
    date_features = ["dayofweek"],
)

# model training
fcst.fit(series)

MLForecast(models=[ohe-lr], freq=D, lag_features=['lag1'], date_features=['dayofweek'], num_threads=1)

# forecasting

In [7]:
fcst.predict(h = 1)

Unnamed: 0,unique_id,ds,ohe-lr
0,id_0,2000-08-10,4.312748
1,id_1,2000-04-07,4.537019
2,id_2,2000-06-16,4.160505
3,id_3,2000-08-30,3.77704
4,id_4,2001-01-08,2.676933
