In [None]:
from datetime import date
import random
import time
import yfinance as yf
import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors

from numpy.fft import fft, ifft, fftshift
import numpy as np
from numpy import log, sqrt, exp


from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox
import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t, poisson
from scipy.linalg import solve_banded
from scipy.optimize import minimize, differential_evolution
from scipy.integrate import quad
from scipy.special import roots_laguerre
from scipy.interpolate import interp1d
from scipy.sparse import diags, kron, identity, csr_matrix
from scipy.sparse.linalg import spsolve

import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

#import pymc as pm
#import arviz as az

from tensorflow import keras
#from tensorflow.keras.utils import plot_model

#import pyswarms as ps

######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
class OptionPredictor:
    def __init__(self, ticker, start, end, column):

        self.ticker = ticker
        self.start = start
        self.end = end
        self.column=column
        self.rfr = 0.07 - 0.03 #annualized risk free interest rate - annualized dividend yield
        self.risk_neutral = True

        self.nfuture = None
        self.npath = None
        self.model_type = None
        self.df=None
        self.strike = None
        self.obs_price = None
        self.option_type = None
        self.business_days = None
        self.exercise_days = None

    def data_initiation(self):
        stock_data = StockData(self.ticker, self.start, self.end, self.column)
        self.df = stock_data.fetch_data()

        expirations, self.strike, self.option_type ,self.obs_price, self.records = stock_data.download_option_data()

        start = pd.to_datetime(self.df.index[-1])
        expirations = pd.to_datetime(expirations)
        self.business_days = np.array([np.busday_count(start.date(), exp.date()) for exp in expirations])

        # Create 2D array: Dummy [2 day before, 3 days before exercise] for each expiration
        exercise_days = np.array([[exp - pd.Timedelta(days=2), exp - pd.Timedelta(days=3)] for exp in expirations])

        self.exercise_days = exercise_days  # shape (n, 2)

    def parameters_BSM(self):
        mu_prior, sigma_prior = Bayesian_PP().log_return_GBM(self.df[self.column].iloc[:])
        return mu_prior, sigma_prior

    def parameters_Merton(self):
        mu_prior, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior= Bayesian_PP().log_return_Merton(self.df[self.column].iloc[:])
        return mu_prior, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior

    def parameters_Heston(self):
        mu_prior, sigma_prior = self.parameters_BSM()
        rho, kappa, sigma_sigma, theta, inst_vol = -0.5, 2, 0.1, sigma_prior, (sigma_prior)**2

        # Bounds for the search space
        lower_bounds = np.array([-1, 1, 0.01, 0.01, 0.0001])
        upper_bounds = np.array([0, 10, 0.1, 0.1, 0.01])
        bounds = (lower_bounds, upper_bounds)

        #initial_guess = lower_bounds + (upper_bounds - lower_bounds) * np.random.rand(len(lower_bounds))
        #initial_guess = [rho, kappa, sigma_sigma, theta, inst_vol]

        loss_optimizer = loss_function(self.df, self.column, self.business_days, self.npath, self.rfr, self.risk_neutral, self.strike, self.option_type, self.obs_price)
        rho, kappa, sigma_sigma, theta, inst_vol = loss_optimizer.heston_pso_minimizer(bounds, 20)

        print ('rho', 'kappa', 'sigma_sigma', 'theta', 'inst_vol')
        print (rho, kappa, sigma_sigma, theta, inst_vol)

        return rho, kappa, sigma_sigma, theta, inst_vol

    def run_mcmc(self, npath, option_name, model_type, exercise_type):
        self.npath = npath
        records=self.records.copy()
        cal_price_vanilla =  np.zeros(len(self.business_days))
        cal_price_asian =  np.zeros(len(self.business_days))
        greeks = np.zeros((len(self.business_days), 5))
        maturity_years = self.business_days / 252

        if model_type == 'btree':
            mu_prior, sigma_prior = self.parameters_BSM()
        elif model_type == 'BSM':
            mu_prior, sigma_prior = self.parameters_BSM()
        elif model_type == 'Merton':
            mu_prior, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior = self.parameters_Merton()
        elif model_type == 'Heston':
            rho, kappa, sigma_sigma, sigma_prior, inst_vol = self.parameters_Heston()

        for i in range(len(self.business_days)):

            if i == 0 or self.business_days[i-1] != self.business_days[i]:
                self.model = Stochastic_prices_MC (self.df[self.column], self.business_days[i], self.df[self.column].iloc[-1], self.npath, self.rfr, self.risk_neutral)

                if model_type == 'btree':
                    args = (mu_prior, sigma_prior)
                    simulated_prices = self.model.simulate_future_prices_btree(*args)
                elif model_type == 'BSM':
                    args = (mu_prior, sigma_prior)
                    simulated_prices = self.model.simulate_future_prices_BSM(*args)
                elif model_type == 'Merton':
                    args = (mu_prior, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior)
                    simulated_prices = self.model.simulate_future_prices_Merton(*args)
                elif model_type == 'Heston':
                    args = (self.rfr, sigma_prior, rho, kappa, sigma_sigma, inst_vol)
                    simulated_prices, volatility = self.model.simulate_future_prices_Heston(*args)

                if self.business_days[i] < 50:
                    self.model.plot_simulated_price_paths(simulated_prices, self.npath)

            cal_price_vanilla[i], option_price = self.model.price_vanilla_options(simulated_prices, self.strike[i], self.option_type[i], exercise_type, self.exercise_days[i])

            if option_name == 'asian-geom':
                cal_price_asian[i], option_price = self.model.price_asian_options(simulated_prices, self.strike[i], self.option_type[i], exercise_type, self.exercise_days[i])

        records['Maturity'] = maturity_years
        records['Maturity'] = (records['Maturity'] * 100).astype(int) / 100.0
        records['cal_price_vanilla'] = cal_price_vanilla
        records['cal_price_asian'] = cal_price_asian
        #records['delta'] = greeks[:,0]
        #records['gamma'] = greeks[:,1]
        #records['theta'] = greeks[:,2]
        #records['vega'] = greeks[:,3]
        #records['rho'] = greeks[:,4]

        print (records)
        #analysis = OptionAnalysis()
        #analysis.plot_pricing_error_vs_maturity(records)

    def run_btree(self, option_name, model_type, exercise_type):
        self.npath = 100 #this variable is dummy for exact btree since it depends on the maturity
        records=self.records.copy()
        cal_price_vanilla =  np.zeros(len(self.business_days))
        cal_price_asian =  np.zeros(len(self.business_days))
        greeks = np.zeros((len(self.business_days), 5))
        maturity_years = self.business_days / 252

        mu_prior, sigma_prior = self.parameters_BSM()

        for i in range(len(self.business_days)):
            if i == 0 or self.business_days[i-1] != self.business_days[i]:
                self.model = exact_binomial_tree (self.df[self.column], self.business_days[i], self.df[self.column].iloc[-1], self.npath, self.rfr, self.risk_neutral)
                if model_type == 'CRR':
                    simulated_prices = self.model.simulate_future_prices_CRR(mu_prior, sigma_prior)

            cal_price_vanilla[i], option_price = self.model.price_vanilla_options(simulated_prices, self.strike[i], self.option_type[i], exercise_type, self.exercise_days[i])

            if option_name == 'asian-geom':
                cal_price_asian[i], option_price  = self.model.price_asian_options(simulated_prices, self.strike[i], self.option_type[i], exercise_type, self.exercise_days[i])

        records['Maturity'] = maturity_years
        records['Maturity'] = (records['Maturity'] * 100).astype(int) / 100.0
        records['cal_price_vanilla'] = cal_price_vanilla
        records['cal_price_asian'] = cal_price_asian
        #records['delta'] = greeks[:,0]
        #records['gamma'] = greeks[:,1]
        #records['theta'] = greeks[:,2]
        #records['vega'] = greeks[:,3]
        #records['rho'] = greeks[:,4]

        print (records)
        #analysis = OptionAnalysis()
        #analysis.plot_pricing_error_vs_maturity(records)

    def run_sde_closed(self, option_name, model_type):
        records=self.records.copy()
        maturity_years = self.business_days / 252
        cal_price_closed =  np.zeros(len(maturity_years))
        cal_price_asian =  np.zeros(len(maturity_years))
        greeks = np.zeros((len(self.business_days), 5))

        if model_type == 'BSM':
            mu_prior, sigma_prior = self.parameters_BSM()
        elif model_type == 'Merton':
            mu_prior, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior = self.parameters_Merton()
        elif model_type == 'Heston':
            rho, kappa, sigma_sigma, theta, inst_vol = self.parameters_Heston()

        for i in range(len(maturity_years)):
            if model_type == 'BSM':
                self.model = bsm_options_value(self.df[self.column].iloc[-1], self.strike[i], maturity_years[i], self.rfr, sigma_prior, self.option_type[i])
                cal_price_closed[i] , greeks[i,0],  greeks[i,1], greeks[i,2],  greeks[i,3],  greeks[i,4] = self.model.analytical()

                if option_name == 'asian-geom':
                    cal_price_asian[i] = self.model.analytical_asian()

            elif model_type == 'Merton':
                self.model = merton_options_value(self.df[self.column].iloc[-1], self.strike[i], maturity_years[i], self.rfr, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior, self.option_type[i])
                cal_price_closed[i], greeks[i,0],  greeks[i,1], greeks[i,2],  greeks[i,3],  greeks[i,4] = self.model.semi_analytical()

                if option_name == 'asian-geom':
                    cal_price_asian[i] = self.model.analytical_asian()

            elif model_type == 'Heston':
                self.model = heston_options_value(self.df[self.column].iloc[-1], self.strike[i], maturity_years[i], self.rfr, rho, kappa, sigma_sigma, self.rfr, theta, inst_vol, self.option_type[i])
                cal_price_closed[i] = self.model.heston_p1p2()

                if option_name == 'asian-geom':
                    cal_price_asian[i] = self.model.analytical_asian()

        records['Maturity'] = maturity_years
        records['Maturity'] = (records['Maturity'] * 100).astype(int) / 100.0
        records['cal_price_closed'] = cal_price_closed
        records['cal_price_asian'] = cal_price_asian
        #records['delta'] = greeks[:,0]
        #records['gamma'] = greeks[:,1]
        #records['theta'] = greeks[:,2]
        #records['vega'] = greeks[:,3]
        #records['rho'] = greeks[:,4]

        print (records)
        #analysis = OptionAnalysis()
        #analysis.plot_pricing_error_vs_maturity(records)


    def run_sde_numerical_pde(self, option_name, model_type, exercise_type):
        records=self.records.copy()
        maturity_years = self.business_days / 252

        cal_price_closed =  np.zeros(len(maturity_years))
        cal_price_asian =  np.zeros(len(maturity_years))

        greeks = np.zeros((len(self.business_days), 5))

        if model_type == 'BSM':
            mu_prior, sigma_prior = self.parameters_BSM()
        elif model_type == 'Merton':
            mu_prior, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior = self.parameters_Merton()
        elif model_type == 'Heston':
            rho, kappa, sigma_sigma, theta, inst_vol = self.parameters_Heston()


        print ('Crank-Nicholson PDE--------------------------------------------------')

        for i in range(len(maturity_years)):
            if model_type == 'BSM':
                self.model = bsm_options_value(self.df[self.column].iloc[-1], self.strike[i], maturity_years[i], self.rfr, sigma_prior, self.option_type[i])
                cal_price_closed[i] , *_ = self.model.analytical()

                if option_name == 'asian-geom':
                    cal_price_asian[i], out_S, out_vol, out_V, out_S_T, out_vol_T , out_option_price_at_ST = self.model.crank_nicolson_fd_asian(exercise_type, self.exercise_days[i])
                    if  i == 0:
                        self.model.plot_option_value_contour(out_S, out_vol, out_V, out_S_T, out_vol_T , out_option_price_at_ST, ax=None)

            elif model_type == 'Merton':
                self.model = merton_options_value(self.df[self.column].iloc[-1], self.strike[i], maturity_years[i], self.rfr, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior, self.option_type[i])
                cal_price_closed[i], *_ = self.model.semi_analytical()

                if option_name == 'asian-geom':
                    cal_price_asian[i], out_S, out_vol, out_V, out_S_T, out_vol_T , out_option_price_at_ST = self.model.analytical_asian()
                    if  i == 0:
                        self.model.plot_option_value(out_S, out_vol, out_V, out_S_T, out_vol_T , out_option_price_at_ST, ax=None)

            elif model_type == 'Heston':
                fig = None
                self.model = heston_options_value(self.df[self.column].iloc[-1], self.strike[i], maturity_years[i], self.rfr, rho, kappa, sigma_sigma, self.rfr, theta, inst_vol, self.option_type[i])
                cal_price_closed[i] = self.model.heston_p1p2()

                if option_name == 'asian-geom':
                    cal_price_asian[i], out_S, out_vol, out_V, out_S_T, out_vol_T , out_option_price_at_ST = self.model.analytical_asian()
                    if  i == 0:
                        self.model.plot_option_value(out_S, out_vol, out_V, out_S_T, out_vol_T , out_option_price_at_ST, ax=None)

        records['Maturity'] = maturity_years
        records['Maturity'] = (records['Maturity'] * 100).astype(int) / 100.0

        records['cal_price_closed'] = cal_price_closed
        records['cal_price_cn_asian'] = cal_price_asian

        print (records)
        #analysis = OptionAnalysis()
        #analysis.plot_pricing_error_vs_maturity(records)


    def run_sde_numerical_cf(self, option_name, model_type):
        records=self.records.copy()
        maturity_years = self.business_days / 252
        cal_price_closed =  np.zeros(len(maturity_years))

        cal_price_asian =  np.zeros(len(maturity_years))

        greeks = np.zeros((len(self.business_days), 5))

        if model_type == 'BSM':
            mu_prior, sigma_prior = self.parameters_BSM()
        elif model_type == 'Merton':
            mu_prior, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior = self.parameters_Merton()
        elif model_type == 'Heston':
            rho, kappa, sigma_sigma, theta, inst_vol = self.parameters_Heston()


        print ('Characteristic function - Fourier cosine expansion - COS FFT --------------------------------------------------')
        for i in range(len(maturity_years)):
            if model_type == 'BSM':
                self.model = bsm_options_value(self.df[self.column].iloc[-1], self.strike[i], maturity_years[i], self.rfr, sigma_prior, self.option_type[i])
                cal_price_closed[i] , *_ = self.model.analytical()

                if option_name == 'asian-geom':
                    cal_price_asian [i] = self.model.fft_cos_asian()


            elif model_type == 'Merton':
                self.model = merton_options_value(self.df[self.column].iloc[-1], self.strike[i], maturity_years[i], self.rfr, sigma_prior, mu_j_prior, sigma_j_prior, lamda_j_prior, self.option_type[i])
                cal_price_closed[i], *_ = self.model.semi_analytical()

                if option_name == 'asian-geom':
                    cal_price_asian [i] = self.model.fft_cos()

            elif model_type == 'Heston':
                self.model = heston_options_value(self.df[self.column].iloc[-1], self.strike[i], maturity_years[i], self.rfr, rho, kappa, sigma_sigma, self.rfr, theta, inst_vol, self.option_type[i])
                cal_price_closed[i] = self.model.heston_p1p2()

                if option_name == 'asian-geom':
                    cal_price_asian [i] = self.model.fft_cos()

        records['Maturity'] = maturity_years
        records['Maturity'] = (records['Maturity'] * 100).astype(int) / 100.0

        records['cal_price_closed'] = cal_price_closed
        records['cal_price_COS_asian'] = cal_price_asian

        print (records)
        #analysis = OptionAnalysis()
        #analysis.plot_pricing_error_vs_maturity(self.records)