In [11]:
import numpy as np
import pandas as pd
import operator

from mlforecast import MLForecast
from mlforecast.utils import generate_daily_series
from mlforecast.lag_transforms import ExpandingStd, RollingMean
from mlforecast.lag_transforms import Combine, Offset
from numba import njit
from window_ops.expanding import expanding_mean
from window_ops.shift import shift_array

# data

In [2]:
data = generate_daily_series(n_series = 10)
print(data.head())
print()
print(data.shape)
print()
print(data["unique_id"].value_counts())

  unique_id         ds         y
0      id_0 2000-01-01  0.322947
1      id_0 2000-01-02  1.218794
2      id_0 2000-01-03  2.445887
3      id_0 2000-01-04  3.481831
4      id_0 2000-01-05  4.191721

(2376, 3)

id_7    409
id_4    373
id_5    301
id_9    261
id_6    245
id_3    242
id_0    222
id_2    167
id_1     97
id_8     59
Name: unique_id, dtype: int64


# 内置 transformation

In [7]:
fcst = MLForecast(
    models = [],
    freq = "D",
    lag_transforms = {
        1: [ExpandingStd()],
        7: [
            RollingMean(window_size = 7, min_samples = 1),
            RollingMean(window_size = 14),
        ]
    }
)
prep_data = fcst.preprocess(data)

with pd.option_context("display.max_columns", None):
    print(prep_data.head())
    print()
    print(prep_data.shape)
    print()
    print(prep_data["unique_id"].value_counts())

   unique_id         ds         y  expanding_std_lag1  \
20      id_0 2000-01-21  6.319961            1.956363   
21      id_0 2000-01-22  0.071677            2.028545   
22      id_0 2000-01-23  1.472334            2.095850   
23      id_0 2000-01-24  2.260924            2.077423   
24      id_0 2000-01-25  3.207331            2.038621   

    rolling_mean_lag7_window_size7_min_samples1  \
20                                     3.234486   
21                                     3.256055   
22                                     3.259842   
23                                     3.311851   
24                                     3.338590   

    rolling_mean_lag7_window_size14  
20                         3.283064  
21                         3.291068  
22                         3.310390  
23                         3.307083  
24                         3.289148  

(2176, 6)

id_7    389
id_4    353
id_5    281
id_9    241
id_6    225
id_3    222
id_0    202
id_2    147
id_1     77
id

In [12]:
fcst = MLForecast(
    models = [],
    freq = "D",
    lag_transforms = {
        1: [
            RollingMean(window_size = 7),
            RollingMean(window_size = 14),
            Combine(
                RollingMean(window_size = 7),
                RollingMean(window_size = 14),
                operator.truediv,
            )
        ],
    },
)
prep_data = fcst.preprocess(data)

with pd.option_context("display.max_columns", None):
    print(prep_data.head())
    print()
    print(prep_data.shape)
    print()
    print(prep_data["unique_id"].value_counts())

   unique_id         ds         y  rolling_mean_lag1_window_size7  \
14      id_0 2000-01-15  0.435006                        3.234486   
15      id_0 2000-01-16  1.489309                        3.256055   
16      id_0 2000-01-17  2.399579                        3.259842   
17      id_0 2000-01-18  3.230740                        3.311851   
18      id_0 2000-01-19  4.390265                        3.338590   

    rolling_mean_lag1_window_size14  \
14                         3.283064   
15                         3.291068   
16                         3.310390   
17                         3.307083   
18                         3.289148   

    rolling_mean_lag1_window_size7_truediv_rolling_mean_lag1_window_size14  
14                                           0.985204                       
15                                           0.989361                       
16                                           0.984730                       
17                                        

In [13]:
fcst = MLForecast(
    models=[],
    freq = 'D',
    lag_transforms = {
        1: [
            RollingMean(window_size = 7),
            Combine(
                RollingMean(window_size = 7),
                Offset(RollingMean(window_size = 7), n = 1),
                operator.truediv,
            )
        ],
        2: [RollingMean(window_size = 7)]
    },
)
prep_data = fcst.preprocess(data)

with pd.option_context("display.max_columns", None):
    print(prep_data.head())
    print()
    print(prep_data.shape)
    print()
    print(prep_data["unique_id"].value_counts())

   unique_id         ds         y  rolling_mean_lag1_window_size7  \
8       id_0 2000-01-09  1.462798                        3.326081   
9       id_0 2000-01-10  2.035518                        3.360938   
10      id_0 2000-01-11  3.043565                        3.302314   
11      id_0 2000-01-12  4.010109                        3.239705   
12      id_0 2000-01-13  5.416310                        3.213760   

    rolling_mean_lag1_window_size7_truediv_rolling_mean_lag2_window_size7  \
8                                            0.998331                       
9                                            1.010480                       
10                                           0.982557                       
11                                           0.981041                       
12                                           0.991992                       

    rolling_mean_lag2_window_size7  
8                         3.331641  
9                         3.326081  
10         

# 基于 numba 的 transformation