In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
import seaborn as sns
import random

import matplotlib.pyplot as plt

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t


import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

import pymc as pm
import pytensor.tensor as pt
import arviz as az

import tensorflow as tf
from tensorflow import keras


#from tensorflow.keras.utils import plot_model


######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
class StockPredictor:
    def __init__(self, ticker, start, end, column, nfuture = 252):

        self.ticker = ticker
        self.start = start
        self.end = end
        self.model_type = None
        self.df=None
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        self.nfuture = nfuture
        self.column=column
        self.rfr = 0.07
        self.risk_neutral = False


    def data_initiation(self):
        self.df = StockData(self.ticker, self.start, self.end, self.column).fetch_data()


    def run_mcmc(self, nsample, nburn, nchain, model_type):

        self.model = Stochastic_prices_bayesian(self.df[self.column], self.nfuture, self.df[self.column].iloc[-self.nfuture], nsample, self.rfr, self.risk_neutral)

        if model_type == 'GBM':
            bayesian = Bayesian_PP()
            mu_prior, sigma_prior = bayesian.log_return_GBM(self.df[self.column].iloc[:-self.nfuture]) #lognormal distribution
            mu_samples, sigma_samples = bayesian.fit_bayesian_model_GBM(nsample, nburn, nchain)
            simulated_prices = self.model.simulate_future_prices_GBM(mu_samples, sigma_samples)

        elif model_type == 'BinomialTree':
            bayesian = Bayesian_PP()
            mu_prior, sigma_prior = bayesian.log_return_GBM(self.df[self.column].iloc[:-self.nfuture]) #lognormal distribution
            mu_samples, sigma_samples = bayesian.fit_bayesian_model_GBM(nsample, nburn, nchain)
            simulated_prices = self.model.simulate_future_prices_btree(mu_samples, sigma_samples)

        elif model_type == 'Merton': #lognormal + poisson distribution
            bayesian = Bayesian_PP()
            mu_prior, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior = bayesian.log_return_Merton(self.df[self.column].iloc[:-self.nfuture])
            mu_samples, sigma_samples, mu_j_samples, sigma_j_samples, lambda_j_samples = bayesian.fit_bayesian_model_Merton(nsample, nburn, nchain)
            simulated_prices = self.model.simulate_future_prices_Merton(mu_samples, sigma_samples, mu_j_samples, sigma_j_samples, lambda_j_samples)


        self.model.plot_monte_carlo(simulated_prices, len(self.df) - self.nfuture)
        mean_path = np.mean(simulated_prices, axis=0)
        prediction_analysis(self.df, mean_path.flatten()[1:], self.nfuture).plot_predictions()

    def run_ml(self, npast, model_type):

        if model_type == 'RidgeModel':
            self.model = RidgeModel()
        elif model_type == 'RandomForestModel':
            self.model = RandomForestModel()
        elif model_type == 'SimpleNNModel':
            self.model = SimpleNNModel(sequence_length=npast + 4)
        else:
            raise ValueError("Unsupported model type")


        # Prepare BSM column
        bayesian = Bayesian_PP()
        mu_prior, sigma_prior = bayesian.log_return_GBM(self.df[self.column].iloc[:-self.nfuture]) #lognormal distribution
        X, y = features_ml(self.df, npast, self.column, mu_prior, sigma_prior, param = self.nfuture, split_type='index').features()

        #test-train data :ML
        split_index = int(X.shape[0] - self.nfuture)
        X_train, X_test = X[:split_index], X[split_index:]
        y_train, y_test = y[:split_index], y[split_index:]

        #train
        X_train_scaled = self.scaler.fit_transform(X_train)
        self.model.train(X_train_scaled, y_train, list(X.columns))

        #predict
        y_train_pred = self.model.predict(X_train_scaled)
        y_test_pred = FutureDataPoint(X_train.iloc[[-1]], y_train_pred[-npast:] , self.model, self.scaler, npast, self.column, mu_prior, sigma_prior).predict_future_steps(self.nfuture)

        #analysis
        y_pred=np.concatenate((y_train_pred.flatten(), y_test_pred.flatten()))
        prediction_analysis(self.df, y_pred.flatten(), self.nfuture).ml_ts_plot(y_pred, npast)
        prediction_analysis(self.df, y_test_pred.flatten(), self.nfuture).plot_predictions()


    def run_ts(self, order, seasonal_order, model_type):
        if model_type == 'SARIMAModel':
            self.model = SARIMAModel(self.df[self.column])

        #preprocess_data and check check_stationarity
        log_diff_df = self.model.log_difference(ndiff=1)

        #fit and forecast
        y_test_pred = self.model.fit_predict(self.df[self.column], self.nfuture, order, seasonal_order, ndiff=1, logg=True)
        prediction_analysis(self.df, y_test_pred.values, self.nfuture).plot_predictions()



