In [9]:
# data
from mlforecast.utils import generate_series, generate_prices_for_series

# features
from mlforecast.lag_transforms import ExpandingMean
from mlforecast.feature_engineering import transform_exog

# model
from sklearn.linear_model import LinearRegression

# forecast
from mlforecast import MLForecast

# data

In [33]:
series = generate_series(n_series=10, equal_ends = True)
print(series.head())
print()
print(series.shape)
print()
print(series["unique_id"].value_counts())
print("-" * 80)
prices = generate_prices_for_series(series)
print(prices.head())
print()
print(prices.shape)
print()
print(prices["unique_id"].value_counts())

  unique_id         ds         y
0         0 2000-10-05  0.322947
1         0 2000-10-06  1.218794
2         0 2000-10-07  2.445887
3         0 2000-10-08  3.481831
4         0 2000-10-09  4.191721

(2376, 3)

unique_id
7    409
4    373
5    301
9    261
6    245
3    242
0    222
2    167
1     97
8     59
Name: count, dtype: int64
--------------------------------------------------------------------------------
          ds  unique_id     price
0 2000-10-05          0  0.548814
1 2000-10-06          0  0.715189
2 2000-10-07          0  0.602763
3 2000-10-08          0  0.544883
4 2000-10-09          0  0.423655

(2446, 3)

unique_id
7    416
4    380
5    308
9    268
6    252
3    249
0    229
2    174
1    104
8     66
Name: count, dtype: int64


# transformations

In [34]:
transformed_prices = transform_exog(
    prices, 
    lags=[7], 
    lag_transforms={
        1: [ExpandingMean()]
    },
)
print(transformed_prices.head())
print()
print(transformed_prices.shape)
print()
print(transformed_prices["unique_id"].value_counts())

          ds  unique_id     price  price_lag7  price_expanding_mean_lag1
0 2000-10-05          0  0.548814         NaN                        NaN
1 2000-10-06          0  0.715189         NaN                   0.548814
2 2000-10-07          0  0.602763         NaN                   0.632001
3 2000-10-08          0  0.544883         NaN                   0.622255
4 2000-10-09          0  0.423655         NaN                   0.602912

(2446, 5)

unique_id
7    416
4    380
5    308
9    268
6    252
3    249
0    229
2    174
1    104
8     66
Name: count, dtype: int64


In [35]:
series_with_prices = series.merge(transformed_prices, on = ["unique_id", "ds"])
print(series_with_prices.head())
print()
print(series_with_prices.shape)
print()
print(series_with_prices["unique_id"].value_counts())

   unique_id         ds         y     price  price_lag7  \
0          0 2000-10-05  0.322947  0.548814         NaN   
1          0 2000-10-06  1.218794  0.715189         NaN   
2          0 2000-10-07  2.445887  0.602763         NaN   
3          0 2000-10-08  3.481831  0.544883         NaN   
4          0 2000-10-09  4.191721  0.423655         NaN   

   price_expanding_mean_lag1  
0                        NaN  
1                   0.548814  
2                   0.632001  
3                   0.622255  
4                   0.602912  

(2376, 6)

unique_id
7    409
4    373
5    301
9    261
6    245
3    242
0    222
2    167
1     97
8     59
Name: count, dtype: int64


# forecast

In [36]:
fcst = MLForecast(
    models = [LinearRegression()],
    freq = "D",
    lags = [1],
    date_features = ["dayofweek"],
)

# features

In [38]:
fcst.preprocess(series_with_prices, static_features=[], dropna = True).head()

Unnamed: 0,unique_id,ds,y,price,price_lag7,price_expanding_mean_lag1,lag1,dayofweek
1,0,2000-10-06,1.218794,0.715189,,0.548814,0.322947,4
2,0,2000-10-07,2.445887,0.602763,,0.632001,1.218794,5
3,0,2000-10-08,3.481831,0.544883,,0.622255,2.445887,6
4,0,2000-10-09,4.191721,0.423655,,0.602912,3.481831,0
5,0,2000-10-10,5.395863,0.645894,,0.567061,4.191721,1


In [41]:
series_with_prices.dropna(inplace=True)
fcst.preprocess(series_with_prices, dropna=True, static_features=[]).head()

Unnamed: 0,unique_id,ds,y,price,price_lag7,price_expanding_mean_lag1,lag1,dayofweek
8,0,2000-10-13,1.462798,0.963663,0.715189,0.60132,0.284022,4
9,0,2000-10-14,2.035518,0.383442,0.602763,0.64158,1.462798,5
10,0,2000-10-15,3.043565,0.791725,0.544883,0.615766,2.035518,6
11,0,2000-10-16,4.010109,0.528895,0.423655,0.631763,3.043565,0
12,0,2000-10-17,5.41631,0.568045,0.645894,0.62319,4.010109,1


# training

In [27]:
fcst.fit(series_with_prices, static_features = [])

MLForecast(models=[LinearRegression], freq=D, lag_features=['lag1'], date_features=['dayofweek'], num_threads=1)

# predicting

In [28]:
fcst.predict(h = 1, X_df = transformed_prices).head()

Unnamed: 0,unique_id,ds,LinearRegression
0,0,2001-05-15,3.803967
1,1,2001-05-15,3.512489
2,2,2001-05-15,3.170019
3,3,2001-05-15,4.307121
4,4,2001-05-15,3.018758


In [29]:
from fastcore.test import test_fail
test_fail(
    lambda: fcst.predict(8, X_df=transformed_prices), 
    contains='Found missing inputs in X_df'
)