In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
import seaborn as sns
import random

import matplotlib.pyplot as plt

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t


import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

import pymc as pm
import pytensor.tensor as pt
import arviz as az

import tensorflow as tf
from tensorflow import keras


#from tensorflow.keras.utils import plot_model


######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
class prediction_analysis:

    def __init__(self, df, y_test_pred, nfuture):
        self.df = df
        self.y_pred = y_test_pred
        self.nfuture = nfuture

    def ml_ts_plot(self, y_total_pred, npast):
        df_plot = self.df.iloc[npast:].copy()
        df_plot['Predicted_Close'] = y_total_pred

        plt.figure(figsize=(12, 6))

        # --- Time Series Plot: Actual vs Predicted ---
        plt.plot(df_plot.index, df_plot['Close'], label='Actual Close', color='blue')
        plt.plot(df_plot.index, df_plot['Predicted_Close'], label='Predicted Close', linestyle='--', color='orange')

        split_time = self.df.iloc[-self.nfuture].name
        plt.axvline(x=split_time, color='black', linestyle=':', label='Train/Test Split')

        plt.title('Actual vs Predicted Close Prices')
        plt.xlabel('Date')
        plt.ylabel('Price')
        plt.legend()
        plt.grid(True)
        # Optional: limit x-axis
        # plt.xlim(self.df.index[-2 * self.nfuture], self.df.index[-1])

        plt.tight_layout()
        plt.show()


    def plot_predictions(self):
        df_plot = self.df.iloc[-self.nfuture:].copy()
        df_plot['Predicted_Close'] = self.y_pred

        # Calculate percentage error
        df_plot['Prediction_Change_%'] = 100 * (df_plot['Predicted_Close'] - df_plot['Close'] ) / df_plot['Close']
        df_plot.dropna(inplace=True)

        # Get predicted % changes
        test_preds = df_plot['Prediction_Change_%'].values
        x_vals = np.linspace(min(test_preds)-5, max(test_preds)+5, 1000)

        # Fit Student's t-distribution
        df_t, loc_t, scale_t = t.fit(test_preds)
        pdf_t = t.pdf(x_vals, df_t, loc=loc_t, scale=scale_t)
        ci_lower_t, ci_upper_t = t.interval(0.95, df_t, loc=loc_t, scale=scale_t)

        # Fit GMM
        gmm = GaussianMixture(n_components=2, random_state=42)
        gmm.fit(test_preds.reshape(-1, 1))
        gmm_means = gmm.means_.flatten()
        gmm_stds = np.sqrt(gmm.covariances_).flatten()
        gmm_weights = gmm.weights_.flatten()

        pdf_gmm = np.zeros_like(x_vals)
        for w, m, s in zip(gmm_weights, gmm_means, gmm_stds):
            pdf_gmm += w * (1 / (s * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x_vals - m) / s) ** 2)

        # Create subplots
        fig, axs = plt.subplots(nrows=2, figsize=(14, 10))
        fig.subplots_adjust(hspace=0.4)

        # --- Subplot 1: Error vs Actual ---
        axs[0].scatter(df_plot.index, df_plot['Prediction_Change_%'], alpha=0.6, color='green')
        axs[0].axhline(y=0, color='red', linestyle='--', label='No Error')
        split_time = self.df.iloc[-self.nfuture].name
        axs[0].axvline(x=split_time, color='black', linestyle=':', label='Train/Test Split')

        axs[0].set_title('Percentage Prediction Error vs Actual Price')
        axs[0].set_xlabel('Date')
        axs[0].set_ylabel('Prediction_Change_%')
        axs[0].legend()
        axs[0].grid(True)
        axs[0].set_ylim(-20, 20)

        # --- Subplot 2: Histogram + Distribution Fits ---
        sns.histplot(test_preds, bins=30, kde=False, stat="density", color='gray', alpha=0.4, ax=axs[1], label='Histogram')

        # Student's t-distribution
        axs[1].plot(x_vals, pdf_t, color='green', label=f"Student's t PDF (df={df_t:.2f})")
        axs[1].axvline(loc_t, color='green', linestyle='--', label='t Mean')
        axs[1].axvline(ci_lower_t, color='green', linestyle='-.', label='t 95% CI')
        axs[1].axvline(ci_upper_t, color='green', linestyle='-.')

        # GMM PDF
        axs[1].plot(x_vals, pdf_gmm, label='GMM PDF (2 components)', color='purple')
        axs[1].axvline(gmm_means[0], color='purple', linestyle='--', label=f'GMM Mean 1: {gmm_means[0]:.2f}')
        axs[1].axvline(gmm_means[1], color='purple', linestyle='--', label=f'GMM Mean 2: {gmm_means[1]:.2f}')

        axs[1].set_title("Fit of Student’s t-Distribution and GMM to Predicted % Changes")
        axs[1].set_xlabel("Predicted Change (%)")
        axs[1].set_ylabel("Density")
        axs[1].legend()
        axs[1].grid(True)

        plt.tight_layout()
        plt.show()

        # Print optional info
        print(f"Student's t-distribution 95% CI: [{ci_lower_t:.2f}, {ci_upper_t:.2f}]")
        print(f"GMM means: {gmm_means}, std devs: {gmm_stds}")

