In [3]:
import arch
import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
import statsmodels.api as sm
import warnings
import yfinance as yf
from datetime import date, datetime, timedelta
from operator import itemgetter
from typing import List, Tuple

from etl.load_volatility import get_data_with_vix
from indicators.transform import build_features
from indicators.temporal_features import difference_features


sns.set_style("darkgrid")

In [4]:
def train_and_predict(features, target, num_test, regularise, prediction_colname):
    # Compute rolling predictions at time t, using only data up to t-1.
    predictions = []
    for i in range(num_test):
        test_index = -(num_test-i)
        
        # Train - IS Data.
        X_train = features.iloc[:test_index]
        y_train = target.iloc[:test_index]

        # Linear Regression vs. ElasticNet (GLMNet).
        model = sm.OLS(endog=y_train, exog=X_train)
        model = model.fit_regularized() if regularise else model.fit()

        # Test - OOS Data: note double [[]] to preserve batch_dim==1.
        X_test = features.iloc[[test_index]]
        y_test = target.iloc[[test_index]]

        prediction = model.predict(X_test)
        predictions.append(prediction)

    # Align predictions.
    pred_series = pd.concat(predictions, axis=0)
    pred_series.name = prediction_colname
    pred_df = pred_series.to_frame()
    
    return pred_df, model



def plot_results(df):
    fig = plt.figure()
    fig.set_size_inches(16, 4)

    warmup = 20
    plt.plot(df["target_price"].iloc[warmup:], label="target")
    plt.plot(df["pred"].iloc[warmup:], label="pred")
    plt.plot(df["pred_reg"].iloc[warmup:], label="pred_reg", alpha=0.5)
    plt.title("Prediction vs. Target")
    plt.legend()
    plt.show()

In [9]:
def main(ticker, vix, start_date, end_date, interval, do_difference):

    df = get_data_with_vix(ticker, vix=vix, start_date=start_date, end_date=end_date, interval=interval)
    
    # Perform any differencing of raw input features first s.t. downstream features are differenced by construction.
    if do_difference:
        df = df.pipe(difference_features, colnames=["price", "volume", "vix"])
    
    features, target = build_features(df, features_cols=["price", "volume", "vix"], add_lag_features={"price": [1, 2, 3, 7], "vix": [1, 2]}, add_yearly_features=True)

    # Need at least 1 data point to fit model.
    num_test = len(df) - 1
    pred_df, model = train_and_predict(features, target, num_test, regularise=False, prediction_colname="pred")
    pred_reg_df, model_reg = train_and_predict(features, target, num_test, regularise=True, prediction_colname="pred_reg")

    # Join predictions with input data on date index.
    output_df = df.join(pred_df)
    output_df = output_df.join(pred_reg_df)
    
    print("Final Model")
    model_df = pd.DataFrame.from_dict({"params": model.params, "pvalues": model.pvalues})
    reject_h0 = model_df.query("pvalues <= 0.05")
    fail_reject_h0 = model_df.query("pvalues > 0.05")
    print(model_df)
    print("-"*20)
    print(f"Reject H0:\n {reject_h0}")
    print("-"*20)
    print(f"Fail to reject H0:\n {fail_reject_h0}")

    plot_results(output_df)
    
    return model

# Compare results with and without differencing the raw features

In [10]:
ticker = "SPY"  # S&P 500 ETF
# ticker = "MES=F"  # Micro E-mini S&P 500 Futures
vix = "^VIX"
interval = "1d"
end_date = date.today()
start_date = datetime.strptime("2017-01-01", "%Y-%m-%d")


main(ticker=ticker, vix=vix, start_date=start_date, end_date=end_date, interval=interval, do_difference=False)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
Final Model
                       params        pvalues
const            8.815708e-01   8.177132e-02
price            9.857889e-01  9.993891e-117
volume          -6.946673e-09   6.211677e-02
vix              2.383950e-01   2.010156e-03
price-1         -3.118057e-02   5.704474e-01
price-2          4.041252e-02   3.988647e-01
price-3          5.409250e-03   8.503869e-01
price-7         -3.440312e-03   8.123558e-01
vix-1           -3.735239e-01   1.294946e-04
vix-2            1.757451e-01   1.929880e-02
cos_day_of_year  6.055286e-02   6.825308e-01
sin_day_of_year -6.537509e-02   6.588265e-01
--------------------
Reject H0:
          params        pvalues
price  0.985789  9.993891e-117
vix    0.238395   2.010156e-03
vix-1 -0.373524   1.294946e-04
vix-2  0.175745   1.929880e-02
--------------------
Fail to reject H0:
                        params   pval

KeyError: 'target_price'

<Figure size 1600x400 with 0 Axes>

In [None]:
main(ticker=ticker, vix=vix, start_date=start_date, end_date=end_date, interval=interval, do_difference=True)

# Other Plots

In [None]:
df = get_data_with_vix(ticker, vix="^VIX", start_date=start_date, end_date=end_date, interval=interval)

fig = plt.figure()
fig.set_size_inches(16, 4)

plt.plot(df["vix"], label="VIX")
plt.legend()
plt.show()