In [1]:
import numpy as np
import pandas as pd
from autogluon.timeseries.metrics import TimeSeriesScorer
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
from plotnine import *
from statsmodels.tsa.arima_process import ArmaProcess
from typing import Any, Dict, Type

Fill in the function below. Assume that
- The `target` column of `test_data` contains the data.
- `predictions` contains predictions for the last `prediction_length` observations in `test_data`.
- The next-to-last column of `predictions` contains lower bounds for prediction intervals and the last column contains upper bounds.

The function should return the coverage, i.e., the proportion of observations that fall into the corresponding prediction interval.

In [2]:
def help_calc_coverages_for_1_window(
        target: str,
        test_data: TimeSeriesDataFrame,
        prediction_length: int,
        predictions: TimeSeriesDataFrame
    ) -> float:
    actuals = test_data[target].iloc[-prediction_length:]
    are_lower_bounds_right = actuals >= predictions.iloc[:, -2]
    are_upper_bounds_right = actuals <= predictions.iloc[:, -1]
    coverage = (are_lower_bounds_right & are_upper_bounds_right).mean()
    return coverage

Fill in the function below. Assume that

- The `timestamp_col` column of `window_data` contains observation times and the `target` column contains observation values.
- The last `prediction_length` observations in `window_data` are test observations and the prior observations are training observations.
- Predictions are to be evaluated using `eval_metric`.
- Level-`ci_level` prediction intervals are needed. `ci_level` could be 0.95, for example.
- The training of any one model cannot take longer than `time_limit` seconds.
- The models to be used are specified in `hyperparameters`. For example, `hyperparameters` could equal `{"AutoARIMA": {}}`.

The function should compute the test set coverage for each model in `hyperparameters`. It should return a `DataFrame` with one row for each model. The `DataFrame`'s columns should be `test_start_time`, `test_end_time`, `model`, and `coverage`, in that order. Use `help_calc_coverages_for_1_window` to compute the coverage for each model.

In [3]:
def calc_coverages_for_1_window(
        window_data: pd.DataFrame,
        timestamp_col: str,
        target: str | None,
        prediction_length: int,
        eval_metric: str | TimeSeriesScorer | None,
        ci_level: float,
        time_limit: int | None,
        hyperparameters: Dict[str | Type, Any]
    ) -> float:
    test_start_time = window_data[timestamp_col].iloc[-prediction_length]
    test_end_time = window_data[timestamp_col].iloc[-1]

    window_data["item_id"] = 0
    window_data[timestamp_col] = window_data[timestamp_col].dt.tz_localize(None)
    window_data = TimeSeriesDataFrame(window_data, timestamp_column=timestamp_col)
    train_data, test_data = window_data.train_test_split(prediction_length)

    predictor = TimeSeriesPredictor(
        target=target,
        prediction_length=prediction_length,
        eval_metric=eval_metric,
        verbosity=0,
        quantile_levels=[(1 - ci_level) / 2, (1 + ci_level) / 2]
    )
    predictor.fit(train_data, time_limit=time_limit, hyperparameters=hyperparameters, enable_ensemble=False)

    coverages = []
    for model in hyperparameters:
        predictions = predictor.predict(train_data, model=model)
        coverage = help_calc_coverages_for_1_window(target, test_data, prediction_length, predictions)
        coverages.append((model, coverage))
    coverages = pd.DataFrame(coverages, columns=["model", "coverage"])
    coverages.insert(0, "test_start_time", test_start_time)
    coverages.insert(1, "test_end_time", test_end_time)

    return coverages

Fill in the function below. Assume that

- The `timestamp_col` column of `data` contains observation times and the `target` column contains observation values. Windows are to be carved out of `data`, with one or more time series models being trained and tested on each window.
- The first `train_size` observations in a window are training observations and the last `prediction_length` observations are test observations.
- The starting indices of consecutive windows differ by `stride`.
- Predictions are to be evaluated using `eval_metric`.
- Level-`ci_level` prediction intervals are needed. `ci_level` could be 0.95, for example.
- The training of any one model cannot take longer than `time_limit` seconds.
- The models to be used are specified in `hyperparameters`. For example, `hyperparameters` could equal `{"AutoARIMA": {}}`.

The function should compute the test set coverages for the models in `hyperparameters` on all of the windows. It should return a `DataFrame` with one row for each pair of window and model. The `DataFrame`'s columns should be `test_start_time`, `test_end_time`, `model`, and `coverage`, in that order. Use `calc_coverages_for_1_window` to compute the coverages for each window.

In [4]:
def calc_coverages(
        data: pd.DataFrame,
        train_size: int,
        prediction_length: int,
        stride: int,
        timestamp_col: str,
        target: str | None,
        eval_metric: str | TimeSeriesScorer | None,
        ci_level: float,
        time_limit: int | None,
        hyperparameters: Dict[str | Type, Any]
    ) -> pd.DataFrame:
    window_size = train_size + prediction_length
    window_start_indices = list(range(0, len(data) - window_size, stride))
    window_end_indices = [window_start_index + window_size - 1 for window_start_index in window_start_indices]

    results = []
    for window_start_index, window_end_index in zip(window_start_indices, window_end_indices):
        window_data = data[[timestamp_col, target]].iloc[window_start_index:window_end_index]
        window_coverages = calc_coverages_for_1_window(
            window_data, timestamp_col, target, prediction_length, eval_metric, ci_level, time_limit, hyperparameters
        )
        results.append(window_coverages)
    results = pd.concat(results, ignore_index=True)

    return results

When we write a function, we ought to test it to ensure that it works. When a function operates on data, one way to test it is to simulate data from a model and verify that the function returns what it should for the simulated data.

One of the simplest time series models is the AR(1) model, the autoregressive model of order 1. It is defined by the equation
$$
Y_t = \phi Y_{t - 1} + \epsilon_t, \ t \in \mathbb{Z},
$$
where the $\epsilon_t$'s are uncorrelated random variables with common mean zero and common variance $\sigma^2$, and $\epsilon_t$ is independent of $Y_{t - 1}, Y_{t - 2}, Y_{t - 3}, \ldots$. The $\epsilon_t$'s are called *innovations*. For a Gaussian AR(1) model, the innovations are $N(0, \sigma^2)$ random variables.

One desirable property of a time series model is *stationarity*. If the model is stationary, then the mean and variance of $Y_t$ don't depend on $t$. Also, the covariance between $Y_t$ and $Y_u$ depends on $t$ and $u$ only through $|t - u|$, so we can talk about *the* covariance at lag $\ell$, $\text{Cov}(Y_t, Y_{t + \ell})$, which doesn't depend on $t$. The covariances at the various lags are called *autocovariances*. It can be shown that the AR(1) model is stationary if and only if $|\phi| < 1$.

Define a function `simulate_ar1` that uses a given `Generator` instance to draw a sample of size $n$ from a Gaussian AR(1) model with coefficient $\phi$ and innovation standard deviation $\sigma$.
- Check whether $|\phi| < 1$ - raise a `ValueError` if it isn't.
- Use `statsmodels.tsa.arima_process.ArmaProcess` to create an object representing the time series.
- Use the object's `generate_sample` method to generate a sample of size $n$; make sure to use the `Generator` instance to do this.
- Return the sample in a `DataFrame` with two columns:
    - `timestamp`, which contains a sequence of times that starts at `Timestamp("2020-01-01 00:00:00")` and has a one-minute step size.
    - `target`, which contains the sample.

In [5]:
def simulate_ar1(phi: float, sigma: float, n: int, rng: np.random.Generator) -> pd.DataFrame:
    if np.abs(phi) >= 1:
        raise ValueError("The absolute value of phi must be less than one.")
    arma_process = ArmaProcess(ar=np.array([1, -0.5]), ma = np.array([1]))
    targets = arma_process.generate_sample(n, sigma, distrvs=lambda size: rng.standard_normal(size))
    start_time = pd.Timestamp("2020-01-01 00:00:00")
    timestamps = pd.date_range(start_time, periods=n, freq="min")
    df = pd.DataFrame({"timestamp": timestamps, "target": targets})
    return df

For a model of the form $Y = f(X) + \epsilon$, where $X$ and $\epsilon$ are independent, the *signal-to-noise ratio (SNR)* is defined as
$$
\frac{\text{Var}(f(X))}{\text{Var}(\epsilon)}.
$$
The fraction of the variance of $Y$ explained by the signal $f(X)$, which we'll call the FVE, is
$$
\frac{\text{Var}(f(X))}{\text{Var}(Y)} = \frac{\text{Var}(f(X))}{\text{Var}(f(X)) + \text{Var}(\epsilon)} = \frac{\text{SNR}}{\text{SNR} + 1}.
$$

For a stationary AR(1) model, derive expressions for the SNR and FVE. Then define a function `calc_phi_from_fve` that takes an FVE and returns the nonnegative $\phi$ that yields that FVE. The function should raise a `ValueError` if the FVE isn't in $[0, 1)$.

In [6]:
def calc_phi_from_fve(fve: float) -> float:
    if fve < 0 or fve >= 1:
        raise ValueError("fve must be in [0, 1).")
    phi = np.sqrt(fve)
    return phi

Once the functions have been filled in, run the code below to verify that the coverage functions work. Since `ci_level` equals 0.95, the coverage you get should be 0.95.

In [7]:
fve = 0.9
phi = calc_phi_from_fve(fve)
sigma = 1
n = 1000
rng = np.random.default_rng(12345)

window_data = simulate_ar1(phi, sigma, n, rng)
timestamp_col = "timestamp"
target = "target"
prediction_length = 100
eval_metric = "RMSE"
ci_level = 0.95
time_limit = 60
hyperparameters = {"AutoARIMA": {}}

calc_coverages_for_1_window(window_data, timestamp_col, target, prediction_length, eval_metric, ci_level, time_limit, hyperparameters)

Unnamed: 0,test_start_time,test_end_time,model,coverage
0,2020-01-01 15:00:00,2020-01-01 16:39:00,AutoARIMA,0.92
