In [None]:
from datetime import date
import random
import time
import yfinance as yf
import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from numpy.fft import fft, ifft, fftshift
import numpy as np
from numpy import log, sqrt, exp


from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t, poisson
from scipy.linalg import solve_banded
from scipy.optimize import minimize, differential_evolution
from scipy.integrate import quad
from scipy.special import roots_laguerre
from scipy.interpolate import interp1d
from scipy.sparse import diags, kron, identity, csr_matrix
from scipy.sparse.linalg import spsolve

import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

#import pymc as pm
#import arviz as az

from tensorflow import keras
#from tensorflow.keras.utils import plot_model

#import pyswarms as ps

######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
class Bayesian_PP:
    def __init__(self):
        self.log_returns = None
        self.df=None

        self.mu_prior = None
        self.sigma_prior = None
        self.mu_j_prior = None
        self.sigma_j_prior = None
        self.lamda_j_prior = None

        self.mu_samples = None
        self.sigma_samples = None
        self.mu_j_samples = None
        self.sigma_j_samples = None
        self.lamda_j_samples = None

    def log_return_GBM(self, df):
        self.df=df
        df['log_return'] = np.log(self.df / self.df.shift(1))
        self.log_returns = df['log_return'].dropna()

        log_returns_values = self.log_returns.values
        mu_est, sigma_est = stats.norm.fit(log_returns_values)

        mu = mu_est + 0.5 *sigma_est**2
        sigma = sigma_est/np.sqrt(1)  #since the data is for each day

        # Compute annualized values
        mu_annual = mu * 252
        sigma_annual = sigma * np.sqrt(252)

        mu_daily_rounded, sigma_daily_rounded, mu_annual_rounded, sigma_annual_rounded  = round(mu * 100, 2), round(sigma * 100, 2), round(mu_annual * 100, 2), round(sigma_annual * 100, 2)
        print(f"Daily mu (sigma) = {mu_daily_rounded}% ± {sigma_daily_rounded}%")
        print(f"Annual mu (sigma) = {mu_annual_rounded}% ± {sigma_annual_rounded}%")

        # Setup subplots
        fig, axs = plt.subplots(3, 1, figsize=(12, 12), constrained_layout=True)

        # ─────────────────────────────
        # 1️⃣ Time Series Plot
        axs[0].plot(self.log_returns, label='Log Returns')
        axs[0].axhline(mu - 1*sigma, color='b', linestyle='--', label=f'μ - 1σ = {mu - 3*sigma:.4f}')
        axs[0].axhline(mu + 1*sigma, color='b', linestyle='--', label=f'μ + 1σ = {mu + 3*sigma:.4f}')
        axs[0].set_title('Daily Log Returns (Time Series)', fontsize=14)
        axs[0].set_xlabel('Date')
        axs[0].set_ylabel('Log Return')
        axs[0].legend()
        axs[0].grid(True)

        # ─────────────────────────────
        # 2️⃣ Histogram + Gaussian Fit
        x = np.linspace(self.log_returns.min(), self.log_returns.max(), 1000)
        pdf = stats.norm.pdf(x, mu, sigma)

        axs[1].hist(log_returns_values, bins=50, density=True, alpha=0.6, edgecolor='k', label='Histogram')
        axs[1].plot(x, pdf, 'r-', label=f'Gaussian Fit\nμ={mu:.4f}, σ={sigma:.4f}')
        axs[1].axvline(mu - 1*sigma, color='b', linestyle='--', label=f'μ - 1σ = {mu - 3*sigma:.4f}')
        axs[1].axvline(mu + 1*sigma, color='b', linestyle='--', label=f'μ + 1σ = {mu + 3*sigma:.4f}')
        axs[1].set_title('Log Returns Distribution & Gaussian Fit', fontsize=14)
        axs[1].set_xlabel('Log Return')
        axs[1].set_ylabel('Density')
        axs[1].legend()
        axs[1].grid(True)

        # ─────────────────────────────
        # 3️⃣ Q-Q Plot
        stats.probplot(log_returns_values, dist="norm", plot=axs[2])
        axs[2].get_lines()[1].set_color('red')  # Line of best fit
        axs[2].set_title('Q-Q Plot of Log Returns', fontsize=14)
        axs[2].grid(True)

        # Show all
        plt.show()

        self.mu_prior = mu_annual
        self.sigma_prior = sigma_annual

        return self.mu_prior, self.sigma_prior,

    def log_return_Merton(self, df):
        self.df=df
        df['log_return'] = np.log(self.df / self.df.shift(1))
        self.log_returns = df['log_return'].dropna()

        sorted = self.log_returns.sort_index()

        # Remove outliers using IQR method
        qv1 = sorted.quantile(0.25)
        qv2 = sorted.quantile(0.75)
        iqr = qv2 - qv1
        lower_bound = qv1 - 1.5 * iqr
        upper_bound = qv2 + 1.5 * iqr

        # Filter out outliers
        filtered_sorted = sorted[(sorted >= lower_bound) & (sorted <= upper_bound)]
        log_returns_values = filtered_sorted.values

        mu_est, sigma_est = stats.norm.fit(log_returns_values)

        mu = mu_est + 0.5 *sigma_est**2
        sigma = sigma_est/np.sqrt(1)  #since the data is for each day

        # Compute annualized values
        mu_annual = mu * 252
        sigma_annual = sigma * np.sqrt(252)

        mu_daily_rounded, sigma_daily_rounded, mu_annual_rounded, sigma_annual_rounded  = round(mu * 100, 2), round(sigma * 100, 2), round(mu_annual * 100, 2), round(sigma_annual * 100, 2)
        print(f"Daily mu (sigma) = {mu_daily_rounded}% ± {sigma_daily_rounded}%")
        print(f"Annual mu (sigma) = {mu_annual_rounded}% ± {sigma_annual_rounded}%")

        # Setup subplots
        fig, axs = plt.subplots(3, 1, figsize=(12, 12), constrained_layout=True)

        # ─────────────────────────────
        # 1️⃣ Time Series Plot
        axs[0].plot(self.log_returns, label='Log Returns')
        axs[0].axhline(mu - 1*sigma, color='b', linestyle='--', label=f'μ - 1σ = {mu - 3*sigma:.4f}')
        axs[0].axhline(mu + 1*sigma, color='b', linestyle='--', label=f'μ + 1σ = {mu + 3*sigma:.4f}')
        axs[0].set_title('Daily Log Returns (Time Series)', fontsize=14)
        axs[0].set_xlabel('Date')
        axs[0].set_ylabel('Log Return')
        axs[0].legend()
        axs[0].grid(True)

        # ─────────────────────────────
        # 2️⃣ Histogram + Gaussian Fit
        x = np.linspace(self.log_returns.min(), self.log_returns.max(), 1000)
        pdf = stats.norm.pdf(x, mu, sigma)

        axs[1].hist(log_returns_values, bins=50, density=True, alpha=0.6, edgecolor='k', label='Histogram')
        axs[1].plot(x, pdf, 'r-', label=f'Gaussian Fit\nμ={mu:.4f}, σ={sigma:.4f}')
        axs[1].axvline(mu - 1*sigma, color='b', linestyle='--', label=f'μ - 1σ = {mu - 3*sigma:.4f}')
        axs[1].axvline(mu + 1*sigma, color='b', linestyle='--', label=f'μ + 1σ = {mu + 3*sigma:.4f}')
        axs[1].set_title('Log Returns Distribution & Gaussian Fit', fontsize=14)
        axs[1].set_xlabel('Log Return')
        axs[1].set_ylabel('Density')
        axs[1].legend()
        axs[1].grid(True)

        # ─────────────────────────────
        # 3️⃣ Q-Q Plot
        stats.probplot(log_returns_values, dist="norm", plot=axs[2])
        axs[2].get_lines()[1].set_color('red')  # Line of best fit
        axs[2].set_title('Q-Q Plot of Log Returns', fontsize=14)
        axs[2].grid(True)

        # Show all
        plt.show()

        self.mu_prior = mu_annual
        self.sigma_prior = sigma_annual

        threshold = 3 * sigma
        #Create binary jump indicator
        jump_indicator = (np.abs(self.log_returns - mu) > threshold).astype(int)

        #Resample by year and count jumps
        yearly_jump_counts = jump_indicator.resample('YE').sum()
        yearly_jump_counts.index = yearly_jump_counts.index.year

        sorted = yearly_jump_counts.sort_index()

        #Estimate λ (Poisson mean)
        # Remove outliers using IQR method
        qv1 = sorted.quantile(0.0)
        qv2 = sorted.quantile(0.75)
        iqr = qv2 - qv1
        lower_bound = qv1 - 0 * iqr
        upper_bound = qv2 + 0 * iqr

        # Filter out outliers
        filtered_sorted = sorted[(sorted >= lower_bound) & (sorted <= upper_bound)]
        yearly_jump_counts_values = filtered_sorted.values

        lambda_j = yearly_jump_counts_values.mean()

        ###
        # Theoretical quantiles from Poisson
        n = len(yearly_jump_counts_values)
        quantiles = np.linspace(0.01, 0.99, n)
        percentiles = np.quantile(yearly_jump_counts_values, quantiles)
        theoretical_q = stats.poisson.ppf(quantiles, mu=lambda_j)

        # Create subplots
        fig, axs = plt.subplots(1, 2, figsize=(14, 5))

        # --- Plot 1: Histogram with Poisson PMF ---
        axs[0].hist(yearly_jump_counts_values, bins=range(int(max(yearly_jump_counts_values))+2),
                    density=True, alpha=0.6, edgecolor='k', label='Observed')

        x = np.arange(0, max(yearly_jump_counts_values)+1)
        pmf = stats.poisson.pmf(x, mu=lambda_j)
        axs[0].plot(x, pmf, 'ro-', label=f'Poisson PMF (λ={lambda_j:.2f})')

        axs[0].set_title("Poisson Fit to Yearly Jump Counts")
        axs[0].set_xlabel("Jump Count per Year")
        axs[0].set_ylabel("Probability")
        axs[0].legend()
        axs[0].grid(True)

        # --- Plot 2: Q-Q Plot ---
        axs[1].plot(theoretical_q, percentiles, 'bo', label='Empirical vs Poisson')
        axs[1].plot([0, max(theoretical_q.max(), yearly_jump_counts_values.max())],
                    [0, max(theoretical_q.max(), yearly_jump_counts_values.max())],
                    'r--', label='Ideal Fit (y = x)')

        axs[1].set_title("Q-Q Plot: Empirical vs Poisson Quantiles")
        axs[1].set_xlabel("Theoretical Quantiles (Poisson)")
        axs[1].set_ylabel("Empirical Quantiles (Observed)")
        axs[1].legend()
        axs[1].grid(True)

        plt.tight_layout()
        plt.show()

        # Estimate mu_J and sigma_J (jump size stats)
        jump_sizes = self.log_returns.loc[np.abs(self.log_returns - mu) > threshold].dropna() #Extract jump sizes

        sorted = jump_sizes.sort_index()

        # Remove outliers using IQR method
        qv1 = sorted.quantile(0.25)
        qv2 = sorted.quantile(0.75)
        iqr = qv2 - qv1
        lower_bound = qv1 - 2.5 * iqr
        upper_bound = qv2 + 2.5 * iqr

        # Filter out outliers
        filtered_sorted = sorted[(sorted >= lower_bound) & (sorted <= upper_bound)]
        jump_sizes_values = filtered_sorted.values

        # Fit normal distribution
        mu_J, sigma_J = norm.fit(jump_sizes_values)

        # Create subplots: 1 row, 2 columns
        fig, axs = plt.subplots(1, 2, figsize=(14, 6))

        # Histogram with fitted normal curve
        axs[0].hist(jump_sizes_values, bins=20, density=True, alpha=0.6, color='skyblue', edgecolor='black')
        xmin, xmax = axs[0].get_xlim()
        x = np.linspace(xmin, xmax, 100)
        p = norm.pdf(x, mu_J, sigma_J)
        axs[0].plot(x, p, 'r', linewidth=2)
        axs[0].set_title("Histogram of Jump Sizes with Fitted Normal Curve")
        axs[0].set_xlabel("Jump Size")
        axs[0].set_ylabel("Density")
        axs[0].grid(True)
        # Add mu_J and sigma_J to the plot
        textstr = f"$\\mu_J$ = {mu_J:.4f}\n$\\sigma_J$ = {sigma_J:.4f}"
        axs[0].text(0.95, 0.95, textstr,
                    transform=axs[0].transAxes,
                    fontsize=12,
                    verticalalignment='top',
                    horizontalalignment='right',
                    bbox=dict(boxstyle="round", facecolor="white", edgecolor="gray", alpha=0.8))

        # Q-Q plot
        probplot(jump_sizes_values, dist="norm", sparams=(mu_J, sigma_J), plot=axs[1])
        axs[1].set_title("Q-Q Plot vs Fitted Normal")
        axs[1].grid(True)

        plt.tight_layout()
        plt.show()

        self.mu_j_prior = mu_J
        self.sigma_j_prior = sigma_J
        self.lamda_j_prior = lambda_j

        return self.mu_prior, self.sigma_prior, self.mu_j_prior, self.sigma_j_prior, self.lamda_j_prior