In [None]:
from datetime import date
import random
import time
import yfinance as yf
import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from numpy.fft import fft, ifft, fftshift
import numpy as np
from numpy import log, sqrt, exp


from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t, poisson
from scipy.linalg import solve_banded
from scipy.optimize import minimize, differential_evolution
from scipy.integrate import quad
from scipy.special import roots_laguerre
from scipy.interpolate import interp1d
from scipy.sparse import diags, kron, identity, csr_matrix
from scipy.sparse.linalg import spsolve

import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

#import pymc as pm
#import arviz as az

from tensorflow import keras
#from tensorflow.keras.utils import plot_model

#import pyswarms as ps

######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
class StockData:
    def __init__(self, ticker, start, end, column):
        self.ticker = ticker
        self.start = start
        self.end = end
        self.df = None
        self.column=column

    def fetch_data_HFD(self):
        start = pd.to_datetime(self.start)
        end = pd.to_datetime(self.end) if self.end is not None else None

        # Load data
        filepath = '/content/drive/My Drive/LEARN/Finance/quantitative finance/Volatility-models/HFD_SPY.xlsx' #Courtesy CQF - Fitch
        sheet = 'Sheet1'

        df = pd.read_excel(filepath, sheet_name=sheet, usecols=['Date', 'minute', 'Close'])
        df['Date'] = pd.to_datetime(df['Date'].astype(str) + ' ' + df['minute'].astype(str))

        df.set_index('Date', inplace=True)
        df = df[['Close']].copy()
        df = df[df.index >= start].dropna()
        self.df_hfd = df.copy()

        df.info()
        print(df.head(5))

        # Plot intraday data
        plt.figure(figsize=(14, 6))
        plt.plot(df.index, df['Close'], label='Close Price', color='blue')
        plt.title('Intraday Close Price Data')
        plt.xlabel('Datetime')
        plt.ylabel('Close Price')
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()

        return self.df_hfd

    def daily_log_return_HFD(self):

        daily_close = self.df_hfd['Close'].resample('1D').last().dropna()
        # Calculate daily log returns
        daily_log_return = np.log(daily_close / daily_close.shift(1)).dropna()

        return daily_log_return

#######################################################################################################

    def daily_log_return(self):
        daily_log_return = np.log(self.df['Close'] / self.df['Close'].shift(1)).dropna()
        return daily_log_return

    def fetch_data(self):
        '''
        # yfinance
        df = yf.download(self.ticker, start=self.start, end=self.end)
        df.columns = df.columns.droplevel(1)
        df.columns = ['Close', 'High', 'Low', 'Open', 'Volume']
        df.index.name = 'Date'
        self.df = df
        '''
        # Read XLSX and parse the 'Date' column as datetime
        filepath = '/content/drive/My Drive/LEARN/Finance/quantitative finance/Volatility-models/SPY_last_year.xlsx'
        sheet = 'SPY_last_year'
        df = pd.read_excel(filepath, sheet_name=sheet, skiprows=[1, 2])

        df.set_index('Price', inplace=True)
        self.df = df[df.index >= self.start].copy()


        self.df = df.copy()
        print (self.df.head())
        print (self.df.tail())
        ################################################################################################################

        self.df = self.df.sort_index()
        self.df = self.df.dropna()
        self.df.info()

        #PLOT
        ax = self.df.drop(columns=["Volume"]).plot(figsize=(12, 6), title=self.ticker + "Stock Prices")
        ax.set_ylabel("Stock Price")
        ax2 = ax.twinx()
        ax2.set_ylabel("Volume")
        ax2.plot(self.df.index, self.df["Volume"], color="gray", alpha=0.5, linestyle="dashed", label="Volume")
        ax2.legend(loc="upper left")
        plt.show()

        self.df.columns = self.df.columns.get_level_values(0)
        new_df = self.df[[self.column]].copy()
        new_df.info()

        return new_df

    def download_option_data(self):

        obs_price=[]
        strike = []
        option_type=[]
        IV = []

        expirations_values=[]

        ###############################################################################################################

        # Read CSV and parse the 'Date' column as datetime
        filepath = '/content/drive/My Drive/LEARN/Finance/quantitative finance/Volatility-models/SPY_options_chain.xlsx'
        dff = pd.read_excel(filepath, sheet_name='SPY_options_chain')
        # Ensure expiration column is datetime
        dff['expiration'] = pd.to_datetime(dff['expiration'])
        print (dff.info())

        # Iterate over each expiration and display a few rows
        for ii in range(len(dff)):
            option_type.append(dff['Type'].iloc[ii])
            obs_price.append(dff['lastPrice'].iloc[ii])
            strike.append(dff['strike'].iloc[ii])
            IV.append(dff['impliedVolatility'].iloc[ii])
            expirations_values.append(dff['expiration'].iloc[ii])
        '''

        #yfinance
        # Create a Ticker object
        stock = yf.Ticker(self.ticker)

        # Get available expiration dates
        expirations = stock.options
        print("Available Expirations:", expirations)
        for i in range(len(expirations)):
            opt_chain = stock.option_chain(expirations[i])
            calls = opt_chain.calls
            puts = opt_chain.puts

            if  i==0:
               print (calls.head())
               print (puts.head())

            # Sort by strike price to get the first (lowest)
            calls_sorted = calls.sort_values("strike")
            puts_sorted = puts.sort_values("strike")

            for ii in range(len(calls_sorted.index)):
                option_type.append('call')
                obs_price.append(calls_sorted['lastPrice'].iloc[ii])
                strike.append(calls_sorted['strike'].iloc[ii])
                IV.append(calls_sorted['impliedVolatility'].iloc[ii])
                expirations_values.append(expirations[i])


            for ii in range(len(puts_sorted.index)):
                option_type.append('put')
                obs_price.append(puts_sorted['lastPrice'].iloc[ii])
                strike.append(puts_sorted['strike'].iloc[ii])
                IV.append(puts_sorted['impliedVolatility'].iloc[ii])
                expirations_values.append(expirations[i])
        '''
        #####################################################################################################

        records = pd.DataFrame({
            'expirations': pd.Series(expirations_values),
            'option_type': pd.Series(option_type, dtype='str'),
            'strike': pd.Series(strike, dtype='float'),
            'obs_price': pd.Series(obs_price, dtype='float'),
            'IV': pd.Series(IV, dtype='float')
        })

        print (records)
        print (records.info())
        return np.array(expirations_values), np.array(strike), np.array(option_type), np.array(obs_price), records
