In [1]:
"""
Auto Forecast:
    - large collections of univariate time series
    - ARIMA, ETS, Theta, CES
Exponential Smoothing:
    - data with no clear trend or seasonality
    - SES, Holt's Winters, SSO
Benchmark models:
    - Mean, Navie, Random Walk
Intermittent or Sparse models:
    - series with very few non-zero observations
    - CROSTON, ADIDA, IMAPA
Multiple Seasonalities:
    - signals with more than one clear seasonality
    - low-frequency data
    - MSTL
Theta Models:
    - deseasonalized time series
    - Theta, DynamicTheta
"""

"\nAuto Forecast:\n    - large collections of univariate time series\n    - ARIMA, ETS, Theta, CES\nExponential Smoothing:\n    - data with no clear trend or seasonality\n    - SES, Holt's Winters, SSO\nBenchmark models:\n    - Mean, Navie, Random Walk\nIntermittent or Sparse models:\n    - series with very few non-zero observations\n    - CROSTON, ADIDA, IMAPA\nMultiple Seasonalities:\n    - signals with more than one clear seasonality\n    - low-frequency data\n    - MSTL\nTheta Models:\n    - deseasonalized time series\n    - Theta, DynamicTheta\n"

In [2]:
# tools
import os
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing as mp

# forecast
from statsforecast import StatsForecast

# models
from statsforecast.models import (
    HoltWinters,
    CrostonClassic as Croston,
    HistoricAverage,
    DynamicOptimizedTheta as DOT,
    SeasonalNaive,
)

os.environ["NIXTLA_ID_AS_COL"] = "1"
os.environ["NUMBA_NUM_THREADS"] = "1"

# data

In [3]:
def load_data():
    # data read
    Y_df = pd.read_parquet("https://datasets-nixtla.s3.amazonaws.com/m4-hourly.parquet")
    # data filter
    uids = Y_df["unique_id"].unique()[:10]
    Y_df = Y_df.query("unique_id in @uids")
    Y_df = Y_df.groupby("unique_id").tail(7 * 24)

    return Y_df


Y_df = load_data()
print(Y_df.head(), "\n")
print(Y_df.shape, "\n")
print(Y_df["unique_id"].value_counts())

    unique_id   ds      y
580        H1  581  587.0
581        H1  582  537.0
582        H1  583  492.0
583        H1  584  464.0
584        H1  585  443.0 

(1680, 3) 

unique_id
H1      168
H10     168
H100    168
H101    168
H102    168
H103    168
H104    168
H105    168
H106    168
H107    168
Name: count, dtype: int64


# EDA

In [4]:
StatsForecast.plot(Y_df, engine="matplotlib")

# model training

In [None]:
models = [
    HoltWinters(),
    Croston(),
    SeasonalNaive(season_length=24),
    HistoricAverage(),
    DOT(season_length=24),
]
sf = StatsForecast(
    models = models,
    freq = 1,
    fallback_model=SeasonalNaive(season_length=7),
    n_jobs = 1,
)

# model evluation

In [None]:
forecasts_df = sf.forecast(df = Y_df, h = 48, level = [90])
print(forecasts_df)
sf.plot(Y_df, forecasts_df)
sf.plot(Y_df, forecasts_df, models = ["HoltWinters", "DynamicOptimizedTheta"], unique_ids = ["H10", "H105"], level = [90])