In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
import seaborn as sns
import random

import matplotlib.pyplot as plt

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.regression.linear_model import OLS
from statsmodels.tools import add_constant

from arch import arch_model

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t
from scipy.optimize import minimize

from numdifftools import Hessian

import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

import pymc as pm
import pytensor.tensor as pt
import arviz as az

import tensorflow as tf
from tensorflow import keras


#from tensorflow.keras.utils import plot_model


######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.preprocessing import StandardScaler


class NN_RealizedVolatility:
    def __init__(self, df, nfuture, trading_days=252, lags=10, hidden_size=32, epochs=50,
                 lr=0.001, batch_size=32):
        self.df = df.copy()
        self.trading_days = trading_days
        self.lags = lags
        self.hidden_size = hidden_size
        self.epochs = epochs
        self.lr = lr
        self.batch_size = batch_size
        self.nfuture = nfuture

        self.scaler_X = StandardScaler()
        self.scaler_y = StandardScaler()
        self.model = None

    def realized_volatility(self):
        """Compute daily realized volatility"""
        self.df['log_return'] = np.log(self.df['Close'] / self.df['Close'].shift(1))
        self.rolling_rv = self.df['log_return'].groupby(self.df.index.date).apply(
            lambda x: np.sqrt(np.sum((x - x.mean())**2))
        )
        self.rolling_rv.dropna(inplace=True)


    def prepare_data(self):
        """Lag features for supervised learning"""
        y = self.rolling_rv.values
        X = np.column_stack([np.roll(y, i) for i in range(1, self.lags + 1)])
        X = X[self.lags:]
        y = y[self.lags:]

        # Split: train until last nfuture, test = last nfuture
        X_train, X_test = X[:-self.nfuture], X[-self.nfuture:]
        y_train, y_test = y[:-self.nfuture], y[-self.nfuture:]

        # scale
        X_train = self.scaler_X.fit_transform(X_train)
        X_test = self.scaler_X.transform(X_test)
        y_train = self.scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
        y_test = self.scaler_y.transform(y_test.reshape(-1, 1)).flatten()

        return X_train, y_train, X_test, y_test

    def build_model(self, input_dim):
        """1-hidden-layer feedforward NN"""
        model = models.Sequential([
            layers.Dense(self.hidden_size, activation='relu', input_shape=(input_dim,)),
            layers.Dense(1)
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.lr),
                      loss='mse')
        return model

    def fit(self):
        """Train the NN"""
        X_train, y_train, X_test, y_test = self.prepare_data()
        self.model = self.build_model(X_train.shape[1])
        self.model.fit(
            X_train, y_train,
            epochs=self.epochs,
            batch_size=self.batch_size,
            verbose=1
        )
        self.X_test, self.y_test = X_test, y_test
        self.X_train, self.y_train = X_train, y_train
        return self

    def forecast(self):
        """Forecast realized volatility for last nfuture"""
        fits = self.model.predict(self.X_train).flatten()
        preds = self.model.predict(self.X_test).flatten()

        # Inverse scaling
        fits = self.scaler_y.inverse_transform(fits.reshape(-1, 1)).flatten()
        preds = self.scaler_y.inverse_transform(preds.reshape(-1, 1)).flatten()
        actual = self.scaler_y.inverse_transform(self.y_test.reshape(-1, 1)).flatten()

        forecast_index = self.rolling_rv.index[-self.nfuture:]

        # Forecast error
        forecast_error = (np.sqrt(preds) - actual) / actual
        mae = round(np.mean(np.abs(forecast_error)), 4)
        rmse = round(np.sqrt(np.mean(forecast_error**2)), 4)
        print(f"MAE: {mae}, RMSE: {rmse}")

        # Plotting
        fig, axs = plt.subplots(2, 1, figsize=(14, 10), sharex=True)
        start_index = -self.nfuture - 21
        end_index = -1

        # Historical + Forecast
        axs[0].plot(self.rolling_rv * np.sqrt(self.trading_days), color='black', label='Realized Vol')
        axs[0].plot(self.rolling_rv.index[:len(self.y_train)], fits * np.sqrt(self.trading_days), color='orange', linestyle='--', label='NN Fit')
        axs[0].plot(forecast_index, np.sqrt(preds) * np.sqrt(self.trading_days),
                    color='blue', linestyle='--', label='NN Forecasted Vol')
        axs[0].axvline(x=forecast_index[0], color='gray', linestyle='--', linewidth=1.5)
        axs[0].set_title(f'Historical RV and Forecast (NN)\nMAE={mae}, RMSE={rmse}')
        axs[0].set_ylabel('Annualized Volatility')
        axs[0].legend()
        axs[0].grid(True)
        axs[0].set_xlim([self.rolling_rv.index[start_index], self.rolling_rv.index[end_index]])
        axs[0].set_ylim(0, np.amax(np.sqrt(preds) * np.sqrt(self.trading_days)) * 1.5)

        # Forecast error
        axs[1].bar(forecast_index, forecast_error * 100, color='purple')
        axs[1].axvline(x=forecast_index[0], color='gray', linestyle='--', linewidth=1.5)
        axs[1].axhline(y=0, color='gray', linestyle='--', linewidth=1.5)
        axs[1].set_title('Forecast Error: Actual RV − Forecasted NN')
        axs[1].set_xlabel('Date')
        axs[1].set_ylabel('Relative Volatility Difference (%)')
        axs[1].grid(True)
        axs[1].set_xlim([self.rolling_rv.index[start_index], self.rolling_rv.index[end_index]])

        plt.tight_layout()
        plt.show()

