In [2]:
import pandas as pd

from mlforecast import MLForecast
from mlforecast.utils import generate_daily_series, generate_prices_for_series
from mlforecast.lag_transforms import ExpandingMean, RollingMean
from utilsforecast.plotting import plot_series
from utilsforecast.feature_engineering import fourier

from sklearn.linear_model import LinearRegression

# data

## 生成模拟数据

In [3]:
series = generate_daily_series(
    n_series=100,
    # max_length=300,
    equal_ends=True,
    n_static_features=2,
    static_as_categorical=False,
    with_trend = True,
).rename(columns = {"static_1": "product_id"})
print(series.head())
print()
print(series.shape)
print()
print(series["unique_id"].value_counts())

  unique_id         ds           y  static_0  product_id
0     id_00 2000-10-05   39.811983        79          45
1     id_00 2000-10-06  103.757103        79          45
2     id_00 2000-10-07  177.540923        79          45
3     id_00 2000-10-08  260.437170        79          45
4     id_00 2000-10-09  346.872764        79          45

(27003, 5)

id_46    499
id_35    498
id_72    492
id_34    481
id_83    480
        ... 
id_51     81
id_73     81
id_58     78
id_25     75
id_08     59
Name: unique_id, Length: 100, dtype: int64


## 添加傅里叶项模拟季节性

In [4]:
transformed_df, future_df = fourier(
    series, 
    freq = "D", 
    season_length = 7, 
    k = 2, 
    h = 7
)

In [5]:
print(transformed_df.head())
print()
print(transformed_df.shape)
print()
print(transformed_df["unique_id"].value_counts())

  unique_id         ds           y  static_0  ...    sin1_7    sin2_7    cos1_7    cos2_7
0     id_00 2000-10-05   39.811983        79  ... -0.974927  0.433893 -0.222526 -0.900964
1     id_00 2000-10-06  103.757103        79  ... -0.781834 -0.974926  0.623486 -0.222530
2     id_00 2000-10-07  177.540923        79  ... -0.000004 -0.000009  1.000000  1.000000
3     id_00 2000-10-08  260.437170        79  ...  0.781829  0.974930  0.623493 -0.222513
4     id_00 2000-10-09  346.872764        79  ...  0.974929 -0.433877 -0.222517 -0.900972

[5 rows x 9 columns]

(27003, 9)

id_46    499
id_35    498
id_72    492
id_34    481
id_83    480
        ... 
id_51     81
id_73     81
id_58     78
id_25     75
id_08     59
Name: unique_id, Length: 100, dtype: int64


In [6]:
print(future_df.head())
print()
print(future_df.shape)
print()
print(future_df["unique_id"].value_counts())

  unique_id         ds    sin1_7    sin2_7    cos1_7    cos2_7
0     id_00 2001-05-15  0.433884 -0.781832 -0.900969  0.623489
1     id_00 2001-05-16 -0.433898  0.781851 -0.900962  0.623466
2     id_00 2001-05-17 -0.974928  0.433883 -0.222521 -0.900969
3     id_00 2001-05-18 -0.781821 -0.974935  0.623502 -0.222490
4     id_00 2001-05-19  0.000001  0.000002  1.000000  1.000000

(700, 6)

id_00    7
id_63    7
id_73    7
id_72    7
id_71    7
        ..
id_30    7
id_29    7
id_28    7
id_27    7
id_99    7
Name: unique_id, Length: 100, dtype: int64


# model training

In [9]:
fcst = MLForecast(
    models = LinearRegression(),
    freq = "D",
)
fcst.fit(transformed_df, static_features = ["static_0", "product_id"])

MLForecast(models=[LinearRegression], freq=D, lag_features=[], date_features=[], num_threads=1)

# model predicting

In [10]:
predicion = fcst.predict(h = 7, X_df = future_df)
print(predicion.head())
print()
print(predicion.shape)
print()
print(predicion["unique_id"].value_counts())

  unique_id         ds  LinearRegression
0     id_00 2001-05-15        329.941452
1     id_00 2001-05-16        321.069495
2     id_00 2001-05-17        328.576132
3     id_00 2001-05-18        348.461299
4     id_00 2001-05-19        358.108337

(700, 3)

id_00    7
id_63    7
id_73    7
id_72    7
id_71    7
        ..
id_30    7
id_29    7
id_28    7
id_27    7
id_99    7
Name: unique_id, Length: 100, dtype: int64
