In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
import seaborn as sns
import random

import matplotlib.pyplot as plt

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm
from scipy.stats import t as studentt
from scipy.stats import norm, chi2, skew, kurtosis, binomtest
from scipy.stats import shapiro, anderson, kstest

import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

import pymc as pm
import pytensor.tensor as pt
import arviz as az

import tensorflow as tf
from tensorflow import keras


#from tensorflow.keras.utils import plot_model


######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm, chi2, binomtest

class AnalyticalVaRBacktester:
    def __init__(self, data, alpha=0.05, window=21, horizon=10, var_type='var', dist_type='norm'):
        self.data = data.copy()
        self.alpha = alpha
        self.window = window
        self.horizon = horizon
        self.var_type = var_type
        self.dist_type = dist_type

    def compute_var(self):
        var_series = []
        modvar_series = []
        cvar_series = []
        shapiro_p_series = []
        KS_stat_series = []
        df_series = []

        for t in range(self.window, len(self.data)):
            sample = self.data['LogReturn'].iloc[t-self.window : t].dropna()

            # Skip if not enough data
            if len(sample) < self.window:
                var_series.append(np.nan)
                modvar_series.append(np.nan)
                cvar_series.append(np.nan)
                shapiro_p_series.append(np.nan)
                KS_stat_series.append(np.nan)
                df_series.append(np.nan)
                continue

            # Compute sample stats (same for both distributions)
            mu = sample.mean()
            sigma = sample.std(ddof=1)
            skewness = sample.skew()
            kurt = sample.kurtosis()
            #df_fit = 4 + 6 / (kurt - 3)
            df_fit = 5

            # Shapiro-Wilk test
            shapiro_stat, shapiro_p = shapiro(sample)
            shapiro_p_series.append(shapiro_p)

            if self.dist_type == 'norm':
                # Fit norm-distribution
                #mu, sigma = studentt.fit(sample)

                # Kolmogorov Smirnov test
                ks_stat, ks_p_val = kstest(sample, 'norm', args=(mu, sigma))
                KS_stat_series.append(ks_p_val)

                z_score = norm.ppf(self.alpha)
                z_cf = (
                    z_score
                    + (z_score**2 - 1) * skewness / 6
                    + (z_score**3 - 3 * z_score) * (kurt - 3) / 24
                    - (2 * z_score**3 - 5 * z_score) * (skewness**2) / 36
                )
                var = mu + z_score * sigma * np.sqrt(self.horizon)
                modvar = mu + z_cf * sigma * np.sqrt(self.horizon)
                cvar = mu - sigma * (norm.pdf(z_score) / self.alpha) * np.sqrt(self.horizon)
                var_series.append(var)
                modvar_series.append(modvar)
                cvar_series.append(cvar)

            elif self.dist_type == 't':
                # Fit t-distribution
                #df_fit, mu, sigma = studentt.fit(sample)

                # Kolmogorov Smirnov test
                ks_stat, ks_p = kstest(sample, 't', args=(df_fit, mu, sigma))
                KS_stat_series.append(ks_p)

                # skewness and kurtosis can remain sample estimates
                t_quantile = studentt.ppf(self.alpha, df_fit)
                var = mu + t_quantile * sigma * np.sqrt(self.horizon)
                cvar = mu - sigma * (studentt.pdf(t_quantile, df_fit) / self.alpha) * np.sqrt(self.horizon)
                var_series.append(var)
                modvar_series.append(np.nan)  # no Cornish-Fisher for t
                cvar_series.append(cvar)


        # Assign results to DataFrame
        self.data[f'VaR {self.horizon}D'] = pd.Series([np.nan]*self.window + var_series, index=self.data.index)
        self.data[f'ModVaR {self.horizon}D'] = pd.Series([np.nan]*self.window + modvar_series, index=self.data.index)
        self.data[f'CVaR {self.horizon}D'] = pd.Series([np.nan]*self.window + cvar_series, index=self.data.index)
        self.data['Shapiro_p'] = pd.Series([np.nan]*self.window + shapiro_p_series, index=self.data.index)
        self.data['KS_p'] = pd.Series([np.nan]*self.window + KS_stat_series, index=self.data.index)


    def compute_future_log_return(self):
        self.data[f'Ret {self.horizon}D'] = np.log(self.data['Close'] / self.data['Close'].shift(self.horizon))
        self.data[f'Ret {self.horizon}D'] = self.data[f'Ret {self.horizon}D'].shift(-self.horizon)

    def backtest(self):
        ret_col = f'Ret {self.horizon}D'
        if self.var_type == 'var':

            if self.data['Shapiro_p'].mean() >= 0.05:
                var_col = f'VaR {self.horizon}D'
            else:
                var_col = f'ModVaR {self.horizon}D'

        elif self.var_type == 'cvar':
            var_col = f'CVaR {self.horizon}D'

        self.data['Breach'] = (self.data[ret_col] < self.data[var_col]).where(
            self.data[[ret_col, var_col]].notna().all(axis=1)
        )

        num_breaches = self.data['Breach'].sum()
        total_obs = self.data['Breach'].notna().sum()
        breach_ratio = num_breaches / total_obs
        expected_breaches = total_obs * self.alpha

        # Continuous breaches
        breaches_series = self.data['Breach'].fillna(False).astype(int)
        max_cont_breaches = (breaches_series.groupby((breaches_series != breaches_series.shift()).cumsum()).cumsum()).max()

        # Conditional probability of breaches (lag-1)
        cond_prob_breaches = 0
        if num_breaches > 1:
            consecutive = ((breaches_series.shift(1) == 1) & (breaches_series == 1)).sum()
            cond_prob_breaches = consecutive / (breaches_series.shift(1) == 1).sum()

        # Kupiec's POF test
        p_hat = breach_ratio
        p0 = self.alpha
        if p_hat in [0, 1]:
            LR_pof = np.nan
            p_value = np.nan
        else:
            LR_pof = -2 * (
                np.log((1 - p0) ** (total_obs - num_breaches) * p0 ** num_breaches) -
                np.log((1 - p_hat) ** (total_obs - num_breaches) * p_hat ** num_breaches)
            )
            p_value = 1 - chi2.cdf(LR_pof, df=1)

        # Binomial test
        binom_p_value = binomtest(num_breaches, total_obs, p0, alternative='two-sided').pvalue

        results = pd.DataFrame({
            "Actual Breaches": [num_breaches],
            "Expected Breaches": [expected_breaches],
            "Breach Ratio": [breach_ratio],
            "Number of Continuous Breaches": [max_cont_breaches],
            "Conditional Probability of Breaches": [cond_prob_breaches],
            "kupiec_LR": [LR_pof],
            "kupiec_p_value": [p_value],
            "binomial_test_p_value": [binom_p_value]
        })

        print(results)
        print ('---------------------------------------------------------------------------------')
        print(self.data.head(35))

    def plot_var(self):
        ret_col = f'Ret {self.horizon}D'
        if self.var_type == 'var':

            if self.data['Shapiro_p'].mean() > 0.05:
                var_col = f'VaR {self.horizon}D'
            else:
                var_col = f'ModVaR {self.horizon}D'

        elif self.var_type == 'cvar':
            var_col = f'CVaR {self.horizon}D'

        sns.set_style("whitegrid")
        fig, axs = plt.subplots(1, 1, figsize=(12, 6))

        # Plot returns and VaR
        axs.plot(self.data.index, self.data[var_col], color='orange', linewidth=1,
                 label=f'{int((1-self.alpha)*100)}%/{self.horizon}D M/C VaR')
        axs.plot(self.data.index, self.data[ret_col], color='blue', linewidth=2, alpha=0.5,
                 label=f'Actual {self.horizon}D Return')

        # Plot breaches
        breaches = self.data['Breach'] == True
        axs.scatter(
            self.data.index[breaches],
            self.data.loc[breaches, ret_col],
            color='red', marker='X', s=30, label='Breaches', zorder=3
        )

        axs.axhline(y=0, color='black', linestyle='--')
        axs.set_title(f'Backtesting of {int((1-self.alpha)*100)}%/{self.horizon}D Analytical VaR')
        axs.set_ylabel(f'{self.horizon}D log return')
        axs.legend()
        plt.show()
