In [None]:
import numpy as np
import pandas as pd
from ISLP import load_data
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

NYSE = load_data('NYSE')
cols = ['DJ_return', 'log_volume', 'log_volatility']

X = pd.DataFrame(
    StandardScaler().fit_transform(NYSE[cols]),
    columns=cols,
    index=NYSE.index
)

lags = 5
X_lagged = pd.concat([X.shift(i) for i in range(1, lags+1)], axis=1)
X_lagged.columns = [f"{col}_lag{i}" for i in range(1, lags+1) for col in cols]

X_lagged = X_lagged.dropna()
Y = X.loc[X_lagged.index, 'log_volume']
X_flat = X_lagged.copy()

In [None]:
train_idx, test_idx = train_test_split(range(len(X_flat)), test_size=0.2, shuffle=False)
X_train, X_test = X_flat.iloc[train_idx], X_flat.iloc[test_idx]
Y_train, Y_test = Y.iloc[train_idx], Y.iloc[test_idx]

In [None]:
lr_flat = LinearRegression()
lr_flat.fit(X_train, Y_train)

r2_flat = lr_flat.score(X_test, Y_test)
print(f"Test R^2 (flattened sequences): {r2_flat:.4f}")

Test R^2 (flattened sequences): 0.3912


In [4]:
lr_standard = LinearRegression()
lr_standard.fit(X_train, Y_train)
r2_standard = lr_standard.score(X_test, Y_test)
print(f"Test R^2 (standard AR): {r2_standard:.4f}")

Test R^2 (standard AR): 0.3912


Both the flattened sequence approach and the standard linear AR model yield the same **test R² of 0.391**, indicating that they explain about 39% of the variance in the target variable **log_volume**. This equality occurs because, for a linear model, flattening the lagged sequences does not change the information content: each approach uses the same lagged predictors. The advantage of flattening sequences lies primarily in its compatibility with **RNNs and other sequence models**, where a 3D input of **(samples, timesteps, features)** is required. In contrast, the standard AR approach is simpler, more interpretable, and sufficient for linear regression on lagged data. Thus, while both methods perform identically in this linear setting, flattening becomes more relevant when extending to nonlinear or deep learning models.