# Time Series

Parametrize the problem and create a time series pipeline that extracts lagged features.

In [None]:
from evalml.pipelines import TimeSeriesRegressionPipeline
from evalml.problem_types import TimeSeriesProblem

ts_problem = TimeSeriesProblem(gap=2, max_lag=4, n_periods_to_predict=1,
                               estimator_type="regression", date_column="date")

class TsRegressionPipeline(TimeSeriesRegressionPipeline):
    component_graph = ["Lagged Feature Extractor", "Random Forest Regressor"]

    
pl = TsRegressionPipeline(parameters={}, time_series_problem=ts_problem)

Create an autoregressive model of order 1.

In [None]:
import pandas as pd
import numpy as np

X = pd.DataFrame({"features": range(1, 32)})
y = pd.Series(np.random.normal(0, 1, 31))
prev = 10
for i, noise in enumerate(y):
    new_value = 0.2 * prev + noise
    y[i] = new_value
    prev = new_value
    
y.index = pd.date_range("2020-10-01", "2020-10-31")
X.index = pd.date_range("2020-10-01", "2020-10-31")

In [None]:
import plotly.express as px
px.line(y)

In [None]:
from evalml.automl import TimeSeriesSplit

ts_split = TimeSeriesSplit(gap=ts_problem.gap, max_lag=ts_problem.max_lag)

for i, (train, test) in enumerate(ts_split.split(X, y)):
    X_train, y_train = X.iloc[train], y.iloc[train]
    X_test, y_test = X.iloc[test], y.iloc[test]
    pl.fit(X_train, y_train)
    score = pl.score(X_test, y_test, objectives=["Root Mean Squared Error"])["Root Mean Squared Error"]
    print(f"Score on fold {i} is {score:.2f}")

In [None]:
predictions = pd.DataFrame({"value": pd.concat([pl.predict(X_test, y_test)[4:], y_test[4:]], ignore_index=True),
                            "kind": ["prediction"] * 7 + ["target"] * 7})

In [None]:
px.line(predictions, x=list(range(1, 8)) + list(range(1, 8)), y="value", color="kind")