In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
import seaborn as sns
import random

import matplotlib.pyplot as plt

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t


import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

import pymc as pm
import pytensor.tensor as pt
import arviz as az

import tensorflow as tf
from tensorflow import keras


#from tensorflow.keras.utils import plot_model


######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
class SimpleNNModel:
    def __init__(self, sequence_length):
        # Define model architecture
        inputs = keras.Input(shape=(sequence_length,))
        x = keras.layers.Dense(128, activation='relu')(inputs)
        x = keras.layers.Dense(64, activation='relu')(x)
        x = keras.layers.Dense(32, activation='relu')(x)
        outputs = keras.layers.Dense(1, activation='relu')(x)

        # Compile model
        self.model = keras.Model(inputs=inputs, outputs=outputs)
        self.model.compile(optimizer='adam', loss='mse')
        self.model.summary()

    def train(self, X_train, y_train, column, epochs=100, batch_size=32):
        # No reshaping needed for simple NN
        self.model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2)

    def predict(self, x_test):
        return self.model.predict(x_test)

In [None]:
class RandomForestModel:
    def __init__(self):
        self.model = RandomForestRegressor(n_estimators=100, random_state=42)

    def train(self, x_train, y_train, columns):
        self.model.fit(x_train, y_train)

    def predict(self, x_test):
        return self.model.predict(x_test)

In [None]:
class RidgeModel:
    def __init__(self):
        self.model = Ridge()

    def train(self, x_train, y_train, columns):
        self.model.fit(x_train, y_train)


        # Create summary DataFrame
        summary = pd.DataFrame({
            'Feature': columns,
            'Coefficient': self.model.coef_
        })
        plt.figure(figsize=(8, 4))
        plt.bar(summary['Feature'], summary['Coefficient'])
        plt.title('Feature Coefficients')
        plt.xlabel('Feature')
        plt.ylabel('Coefficient')
        plt.xticks(rotation=45)

        y_train_p = self.model.predict(x_train)
        # Metrics
        mse = mean_squared_error(y_train, y_train_p)                       # MSE: mean((y - ŷ)^2)
        rmse = np.sqrt(mse)                                                # RMSE: sqrt(MSE)
        mae = mean_absolute_error(y_train, y_train_p)                      # MAE: mean(|y - ŷ|)
        mape = mean_absolute_percentage_error(y_train, y_train_p) * 100    # MAPE: mean(|(y - ŷ)/y|) * 100
        r2 = r2_score(y_train, y_train_p)                                  # R2: 1 - (MSE / variance(y))

        print("Training Metrics:")
        print(f"  - MSE  = {mse:.4f}")
        print(f"  - RMSE = {rmse:.4f}")
        print(f"  - MAE  = {mae:.4f}")
        print(f"  - MAPE = {mape:.2f}%")
        print(f"  - R2 score = {r2:.2f}%")


    def predict(self, x_test):
        return self.model.predict(x_test)

In [None]:
class features_ml:

    def __init__(self, df, npast, column, mu, sigma, param = 252, split_type='index'):
        self.df = df.copy()
        self.npast = npast
        self.split_type = split_type
        self.param = param
        self.column=column
        self.mu = mu
        self.sigma = sigma
        self.X=None
        self.Y=None

    def features(self):
        self.df["date"] = self.df.index
        self.df["year"] = self.df["date"].dt.year
        self.df["month"] = self.df["date"].dt.month
        self.df["day"] = self.df["date"].dt.day
        #self.df['day_of_year'] = self.df["date"].dt.dayofyear
        self.df.drop(columns=["date"], inplace=True)

        ##Prepare noise column
        '''
        self.df['Noise'] = np.nan
        self.df.loc[self.df.index[0], 'Noise'] = self.df.loc[self.df.index[0], self.column]  # initialize first value

        for i in range(1, len(self.df)):
            drift = (self.mu - 0.5 * self.sigma ** 2) * (1 / 252)
            diffusion = self.sigma * np.sqrt(1 / 252) * np.random.normal(0, 1)
            self.df.loc[self.df.index[i], 'Noise'] = self.df.loc[self.df.index[i - 1], 'Noise'] * np.exp(drift + diffusion)
        '''

        self.df['Noise'] = np.random.normal(0, 1, self.df.shape[0])

        for lag in range(1, self.npast + 1):
            self.df[f'lag{lag}'] = self.df[self.column].shift(lag)

        self.df.dropna(inplace=True)

        self.y = self.df[self.column].copy()
        self.X = self.df.drop(columns=self.column).copy()
        self.X.info()
        return self.X, self.y

In [None]:
class FutureDataPoint:
    def __init__(self, X_train_last, y_train_pred_npast , model, scaler, npast, column, mu, sigma):
        self.X_new = X_train_last.copy()
        self.y_train_pred_npast = y_train_pred_npast.copy()
        self.model = model
        self.scaler = scaler
        self.npast = npast
        self.column = column
        self.mu = mu
        self.sigma = sigma

    def predict_future_steps(self, nfuture):
        y_test_pred = []
        for i in range(nfuture):

            next_index = self.X_new.index[0] + pd.tseries.offsets.BDay(1) # Set the next business day as the index
            self.X_new.index = [next_index]

            y_next = self.generate_datapoint_predict()
            y_test_pred.append(y_next)

        return np.array(y_test_pred)

    def generate_datapoint_predict(self):

        self.X_new["date"] = self.X_new.index
        self.X_new["year"] = self.X_new["date"].dt.year
        self.X_new["month"] = self.X_new["date"].dt.month
        self.X_new["day"] = self.X_new["date"].dt.day
        #self.X_new["day_of_year"] = self.X_new["date"].dt.dayofyear
        self.X_new.drop(columns=["date"], inplace=True)

        # Add noise to the dataset
        '''
        drift = (self.mu - 0.5 * self.sigma ** 2) * (1 / 252)
        diffusion = self.sigma * np.sqrt(1 / 252) * np.random.normal(0, 1)
        self.X_new["Noise"] = self.y_train_pred_npast[0] * np.exp(drift + diffusion)
        '''

        self.X_new['Noise'] = np.random.normal(0, 1, 1)

        #add lags from predictions
        for i in range(1, self.npast+1):
            self.X_new[f'lag{i}'] = self.y_train_pred_npast[-i]
        self.X_new.dropna(inplace=True)

        # Scale the new data point
        X_new_scaled = self.scaler.transform(self.X_new)

        # Predict next value
        y_next = self.model.predict(X_new_scaled)

        self.y_train_pred_npast[1:] = self.y_train_pred_npast[:-1]
        self.y_train_pred_npast[0] = y_next.flatten()[0]
        return y_next