In [None]:
from datetime import date
import random
import time
import yfinance as yf
import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from numpy.fft import fft, ifft, fftshift
import numpy as np
from numpy import log, sqrt, exp


from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture
from sklearn.linear_model import LinearRegression

from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.stats.diagnostic import het_breuschpagan
from statsmodels.graphics.gofplots import qqplot

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t, poisson
from scipy.linalg import solve_banded
from scipy.optimize import minimize, differential_evolution
from scipy.integrate import quad
from scipy.special import roots_laguerre
from scipy.interpolate import interp1d
from scipy.sparse import diags, kron, identity, csr_matrix
from scipy.sparse.linalg import spsolve
from scipy.stats import multivariate_normal, kstest

import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

import pymc as pm
import arviz as az

#import aesara.tensor as at

from tensorflow import keras
#from tensorflow.keras.utils import plot_model

#import pyswarms as ps

######################################
#from pmdarima import auto_arima
#from diptest import diptest



In [None]:
class FactorModels:
    def __init__(self, rfr, log_return_pf, log_return_index):
        self.rfr = rfr
        self.trading_days = 252  # Annualize factor
        self.lam = 2  # Placeholder for other models like Black-Litterman
        self.fit_type = None

        self.log_return_pf = log_return_pf
        self.log_return_index = log_return_index

    #########################################################################################
    def Sharpe(self, fit_type):
        """
        Estimate alpha and beta using Sharpe one-factor model:
        R_i  = alpha + beta * (R_m) + error
        """
        # Ensure index alignment and drop NaNs
        data = pd.concat([self.log_return_pf, self.log_return_index], axis=1).dropna()

        # Extract index column name
        index_col = self.log_return_index.columns[0]

        # Excess returns
        excess_pf = data[self.log_return_pf.columns]
        excess_mkt = data[[index_col]]  # Keep as DataFrame

        results = {}

        for col in excess_pf.columns:
            y = excess_pf[col]
            X = sm.add_constant(excess_mkt)

            if fit_type == "OLS":
                model = sm.OLS(y, X)
            elif fit_type == "GLS":
                resid_ols = sm.OLS(y, X).fit().resid
                sigma2 = np.var(resid_ols)
                model = sm.GLS(y, X, sigma=sigma2)
            else:
                raise ValueError("fit_type must be 'OLS' or 'GLS'")

            result = model.fit()
            alpha = result.params["const"]
            beta = result.params[index_col]

            alpha_ci = result.conf_int().loc["const"].tolist()
            beta_ci = result.conf_int().loc[index_col].tolist()

            # Prediction summary
            pred = result.get_prediction(X)
            pred_summary = pred.summary_frame(alpha=0.05)

            # Fitted and residuals
            fitted = result.fittedvalues
            residuals = result.resid

            # --- Plotting ---
            fig, axes = plt.subplots(2, 3, figsize=(18, 10))

            # Plot 1: Regression Line + Confidence Interval
            sns.scatterplot(x=excess_mkt[index_col], y=y, ax=axes[0, 0], label='Data Points', alpha=0.7)
            axes[0, 0].plot(excess_mkt[index_col], fitted, color='black', linestyle='--', label='Best Fit Line')
            axes[0, 0].fill_between(
                excess_mkt[index_col],
                pred_summary["mean_ci_lower"],
                pred_summary["mean_ci_upper"],
                color='gray', alpha=0.3, label='95% CI'
            )
            axes[0, 0].set_xlabel("Market Excess Return")
            axes[0, 0].set_ylabel("Asset Excess Return")
            axes[0, 0].set_title(f"Regression Line & 95% CI - {col}")
            axes[0, 0].legend()
            axes[0, 0].grid(True)

            # Plot 2: ACF of Residuals
            plot_acf(residuals, lags=20, ax=axes[0, 1])
            axes[0, 1].set_title(f"Autocorrelation of Residuals - {col}")

            # Plot 3: Residuals vs Fitted
            axes[0, 2].scatter(fitted, residuals, alpha=0.6)
            axes[0, 2].axhline(0, color='red', linestyle='--')
            axes[0, 2].set_xlabel("Fitted Values")
            axes[0, 2].set_ylabel("Residuals")
            axes[0, 2].set_title("Residuals vs Fitted")
            axes[0, 2].grid(True)

            # Plot 4: Histogram of Residuals
            sns.histplot(residuals, kde=True, ax=axes[1, 0], bins=30, color="skyblue")
            axes[1, 0].set_title("Histogram of Residuals")
            axes[1, 0].set_xlabel("Residual")
            axes[1, 0].grid(True)

            # Plot 5: QQ Plot
            qqplot(residuals, line='s', ax=axes[1, 1])
            axes[1, 1].set_title("QQ Plot of Residuals")

            # Plot 6: Breusch-Pagan Test Output
            bp_test = het_breuschpagan(residuals, result.model.exog)
            bp_labels = ['LM Statistic', 'LM p-value', 'F-Statistic', 'F p-value']
            bp_results = dict(zip(bp_labels, bp_test))

            # Display test result as text
            axes[1, 2].axis('off')
            bp_text = '\n'.join([f"{key}: {val:.4f}" for key, val in bp_results.items()])
            axes[1, 2].text(0, 0.5, f"Breusch-Pagan Test for Heteroscedasticity:\n\n{bp_text}",
                            fontsize=12, verticalalignment='center')

            plt.tight_layout()
            plt.show()

            # Save results
            results[col] = {
                "alpha": alpha,
                "alpha_ci_lower": alpha_ci[0],
                "alpha_ci_upper": alpha_ci[1],
                "alpha_pval": result.pvalues["const"],
                "beta": beta,
                "beta_ci_lower": beta_ci[0],
                "beta_ci_upper": beta_ci[1],
                "beta_pval": result.pvalues[index_col],
                "R2": result.rsquared,
                "Adj_R2": result.rsquared_adj
            }

        return pd.DataFrame(results).T  # Transpose to get asset names as index


    #########################################################################################
    def CAPM(self, fit_type):
        """
        Estimate alpha and beta using Sharpe one-factor model:
        R_i - r_f = beta * (R_m - r_f) + error
        """
        # Ensure index alignment and drop NaNs
        data = pd.concat([self.log_return_pf, self.log_return_index], axis=1).dropna()
        r_f_daily = self.rfr / self.trading_days

        # Extract index column name
        index_col = self.log_return_index.columns[0]

        # Excess returns
        excess_pf = data[self.log_return_pf.columns] - r_f_daily
        excess_mkt = data[[index_col]] - r_f_daily  # Keep as DataFrame

        results = {}

        for col in excess_pf.columns:
            y = excess_pf[col]
            X = sm.add_constant(excess_mkt)

            if fit_type == "OLS":
                model = sm.OLS(y, X)
            elif fit_type == "GLS":
                resid_ols = sm.OLS(y, X).fit().resid
                sigma2 = np.var(resid_ols)
                model = sm.GLS(y, X, sigma=sigma2)
            else:
                raise ValueError("fit_type must be 'OLS' or 'GLS'")

            result = model.fit()
            alpha = result.params["const"]
            beta = result.params[index_col]

            alpha_ci = result.conf_int().loc["const"].tolist()
            beta_ci = result.conf_int().loc[index_col].tolist()

            # Prediction summary
            pred = result.get_prediction(X)
            pred_summary = pred.summary_frame(alpha=0.05)

            # Fitted and residuals
            fitted = result.fittedvalues
            residuals = result.resid

            # --- Plotting ---
            fig, axes = plt.subplots(2, 3, figsize=(18, 10))

            # Plot 1: Regression Line + Confidence Interval
            sns.scatterplot(x=excess_mkt[index_col], y=y, ax=axes[0, 0], label='Data Points', alpha=0.7)
            axes[0, 0].plot(excess_mkt[index_col], fitted, color='black', linestyle='--', label='Best Fit Line')
            axes[0, 0].fill_between(
                excess_mkt[index_col],
                pred_summary["mean_ci_lower"],
                pred_summary["mean_ci_upper"],
                color='gray', alpha=0.3, label='95% CI'
            )
            axes[0, 0].set_xlabel("Market Excess Return")
            axes[0, 0].set_ylabel("Asset Excess Return")
            axes[0, 0].set_title(f"Regression Line & 95% CI - {col}")
            axes[0, 0].legend()
            axes[0, 0].grid(True)

            # Plot 2: ACF of Residuals
            plot_acf(residuals, lags=20, ax=axes[0, 1])
            axes[0, 1].set_title(f"Autocorrelation of Residuals - {col}")

            # Plot 3: Residuals vs Fitted
            axes[0, 2].scatter(fitted, residuals, alpha=0.6)
            axes[0, 2].axhline(0, color='red', linestyle='--')
            axes[0, 2].set_xlabel("Fitted Values")
            axes[0, 2].set_ylabel("Residuals")
            axes[0, 2].set_title("Residuals vs Fitted")
            axes[0, 2].grid(True)

            # Plot 4: Histogram of Residuals
            sns.histplot(residuals, kde=True, ax=axes[1, 0], bins=30, color="skyblue")
            axes[1, 0].set_title("Histogram of Residuals")
            axes[1, 0].set_xlabel("Residual")
            axes[1, 0].grid(True)

            # Plot 5: QQ Plot
            qqplot(residuals, line='s', ax=axes[1, 1])
            axes[1, 1].set_title("QQ Plot of Residuals")

            # Plot 6: Breusch-Pagan Test Output
            bp_test = het_breuschpagan(residuals, result.model.exog)
            bp_labels = ['LM Statistic', 'LM p-value', 'F-Statistic', 'F p-value']
            bp_results = dict(zip(bp_labels, bp_test))

            # Display test result as text
            axes[1, 2].axis('off')
            bp_text = '\n'.join([f"{key}: {val:.4f}" for key, val in bp_results.items()])
            axes[1, 2].text(0, 0.5, f"Breusch-Pagan Test for Heteroscedasticity:\n\n{bp_text}",
                            fontsize=12, verticalalignment='center')

            plt.tight_layout()
            plt.show()

            # Save results
            results[col] = {
                "alpha": alpha,
                "alpha_ci_lower": alpha_ci[0],
                "alpha_ci_upper": alpha_ci[1],
                "alpha_pval": result.pvalues["const"],
                "beta": beta,
                "beta_ci_lower": beta_ci[0],
                "beta_ci_upper": beta_ci[1],
                "beta_pval": result.pvalues[index_col],
                "R2": result.rsquared,
                "Adj_R2": result.rsquared_adj
            }

        # --- Plot Security Market Line after loop ---
        results_df = pd.DataFrame(results).T  # Transpose to get asset names as index
        betas = results_df['beta']
        rf = self.rfr / self.trading_days

        alpha_np = results_df['alpha'].to_numpy()
        beta_np = results_df['beta'].to_numpy()
        market_return_mean = excess_mkt.mean().item()

        fitted_excess_returns = alpha_np + beta_np * market_return_mean
        plt.figure(figsize=(10, 6))
        sns.scatterplot(x=betas, y=fitted_excess_returns * self.trading_days, s=80, color='blue', label='Assets')

        # SML Line
        x_vals = np.linspace(-0.2, max(betas)+0.1, 100)
        y_vals = rf + x_vals * market_return_mean
        plt.plot(x_vals, y_vals * self.trading_days, 'r--', label='Security Market Line (SML)')

        # Asset Labels
        for ticker, x, y in zip(results_df.index, betas, fitted_excess_returns):
            plt.text(x, y, ticker, fontsize=9, ha='right', va='bottom')

        plt.xlabel('Beta')
        plt.ylabel('Expected Return')
        plt.title('Security Market Line (CAPM)')
        plt.legend()
        plt.grid(True)
        plt.tight_layout()
        plt.show()

        return results_df