In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import date
import seaborn as sns
import plotly.graph_objects as go
import random

import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import plotly.graph_objects as go

from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.mixture import GaussianMixture


from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.regression.linear_model import OLS
from statsmodels.tools import add_constant

from arch import arch_model

import scipy.stats as stats
from scipy.stats import probplot, laplace, norm, t
from scipy.optimize import minimize
from scipy.stats import norm
from scipy.optimize import brentq
from scipy.interpolate import griddata, Rbf


from numdifftools import Hessian

import statsmodels.api as sm
from statsmodels.nonparametric.kde import KDEUnivariate
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima_process import ArmaProcess

import pymc as pm
import pytensor.tensor as pt
import arviz as az

import tensorflow as tf
from tensorflow import keras


#from tensorflow.keras.utils import plot_model


######################################
#from pmdarima import auto_arima
#from diptest import diptest

In [None]:
class ImpliedVolatility:
    def __init__(self, df, log_return_series, strikes, expirations, option_type, obs_prices, r=0.01):
        """
        df: historical price dataframe (datetime indexed)
        log_return_series: pd.Series of log returns
        strikes: array-like strikes of options
        expirations: array-like time to maturity in years
        option_type: 'call' or 'put', array-like
        obs_prices: observed option prices, same length as strikes
        r: risk-free rate (annual)
        """
        self.df = df
        self.log_return = log_return_series
        self.S0 = df['Close'].iloc[-1]  # current spot price
        self.strikes = np.array(strikes)
        self.expirations = np.array(expirations)
        self.option_type = np.array(option_type)
        self.obs_prices = np.array(obs_prices)
        self.r = r
        self.implied_vols = np.zeros_like(self.obs_prices, dtype=float)

    def local_vol_dupire(self, K_grid, T_grid):
        """
        Compute Dupire local volatility directly from option prices.

        K_grid, T_grid: 2D meshgrid of strikes and expirations
        Returns: local volatility surface (same shape as K_grid)
        """
        # Interpolate option price surface on grid
        points = np.column_stack([self.strikes, self.expirations])
        values = self.obs_prices  # observed option prices
        C_surface = griddata(points, values, (K_grid, T_grid), method='cubic')

        # Grid spacing
        dK = K_grid[0,1] - K_grid[0,0]
        dT = T_grid[1,0] - T_grid[0,0]

        # Partial derivatives
        dC_dT   = np.gradient(C_surface, dT, axis=0)
        dC_dK   = np.gradient(C_surface, dK, axis=1)
        d2C_dK2 = np.gradient(dC_dK, dK, axis=1)

        # Dupire local volatility formula
        numer = dC_dT + self.r * K_grid * dC_dK
        denom = 0.5 * K_grid**2 * d2C_dK2

        sigma_loc2 = numer / denom
        sigma_loc2 = np.clip(sigma_loc2, 0, None)  # avoid negatives
        sigma_loc = np.sqrt(sigma_loc2)

        return sigma_loc

    def _bsm_price(self, sigma, index):
        """Black-Scholes-Merton formula for a single option using self variables"""
        S = self.S0
        K = self.strikes[index]
        T = self.expirations[index]
        opt_type = self.option_type[index]

        if T <= 0:
            return max(0.0, S-K) if opt_type=='call' else max(0.0, K-S)

        d1 = (np.log(S/K) + (self.r + 0.5*sigma**2)*T) / (sigma*np.sqrt(T))
        d2 = d1 - sigma*np.sqrt(T)

        if opt_type=='call':
            price = S*norm.cdf(d1) - K*np.exp(-self.r*T)*norm.cdf(d2)
        else:
            price = K*np.exp(-self.r*T)*norm.cdf(-d2) - S*norm.cdf(-d1)
        return price

    # ---------------- Implied volatility ----------------
    def _implied_vol(self, index):
        """Find implied volatility for option at given index using self variables"""
        price = self.obs_prices[index]
        try:
            iv = brentq(lambda sigma: self._bsm_price(sigma, index) - price,
                        1e-6, 5.0)
            return iv
        except Exception:
            return np.nan

    def compute_implied_vols(self):
        """Compute implied volatilities for all options using self attributes"""
        for i in range(len(self.obs_prices)):
            self.implied_vols[i] = self._implied_vol(i)
        return self.implied_vols

    # ---------------- Plot ----------------
    def plot_vol_smile(self, T_fixed=None):
        """Plot volatility smile for a given expiration using self variables"""
        if T_fixed is None:
            T_fixed = np.unique(self.expirations)[0]
        mask = self.expirations == T_fixed
        plt.figure(figsize=(8,5))
        plt.scatter(self.strikes[mask], self.implied_vols[mask], s=20, c='blue', marker='o')
        plt.xlabel('Strike')
        plt.ylabel('Implied Volatility')
        plt.title(f'Volatility Smile (T={T_fixed:.2f} years)')
        plt.grid(True)
        plt.show()

    def plot_maturity(self):
        """Plot IV vs expiration for the most populous strike"""
        # Find the strike that appears most frequently
        unique, counts = np.unique(self.strikes, return_counts=True)
        most_populous_strike = unique[np.argmax(counts)]

        # Mask for that strike
        mask = self.strikes == most_populous_strike

        plt.figure(figsize=(8,5))
        plt.scatter(self.expirations[mask], self.implied_vols[mask], s = 20, c='green', marker = 'o')
        plt.xlabel('Expiration (Years)')
        plt.ylabel('Implied Volatility')
        plt.title(f'IV vs Maturity for Strike = {most_populous_strike}')
        plt.grid(True)
        plt.show()

    def plot_vol_surface(self):
        """Interactive volatility surface using plotly"""
        fig = go.Figure(data=[go.Scatter3d(
            x=self.strikes,
            y=self.expirations,
            z=self.implied_vols,
            mode='markers',
            marker=dict(
                size=5,
                color=self.implied_vols,       # color by implied vol
                colorscale='Viridis',
                opacity=0.8,
                colorbar=dict(title='Implied Vol')
            ),
            text=[f'Strike: {k}<br>Exp: {t:.2f}<br>IV: {v:.2f}'
                  for k,t,v in zip(self.strikes, self.expirations, self.implied_vols)],
            hoverinfo='text'
        )])

        fig.update_layout(
            scene=dict(
                xaxis_title='Strike',
                yaxis_title='Expiration (Years)',
                zaxis_title='Implied Volatility'
            ),
            title='Interactive Implied Volatility Surface',
            width=900,
            height=700
        )
        fig.show()


    # ---------------- Local volatility from call prices ----------------
    def dupire_local_vol(self, K_grid, T_grid):

        mask = self.option_type == "call"
        strikes_call = self.strikes[mask]
        expirations_call = self.expirations[mask]
        prices_call = self.obs_prices[mask]

        # Build RBF interpolator
        rbf_call = Rbf(
            strikes_call,
            expirations_call,
            prices_call,
            function='multiquadric',
            smooth=0.1
        )

        C_surface = rbf_call(K_grid, T_grid)

        dK = K_grid[0,1] - K_grid[0,0]
        dT = T_grid[1,0] - T_grid[0,0]

        dC_dT   = np.gradient(C_surface, dT, axis=0)
        dC_dK   = np.gradient(C_surface, dK, axis=1)
        d2C_dK2 = np.gradient(dC_dK, dK, axis=1)

        numer = dC_dT + self.r * K_grid * dC_dK
        denom = 0.5 * K_grid**2 * d2C_dK2

        sigma_loc2 = numer / denom
        sigma_loc2 = np.clip(sigma_loc2, 0, None)

        return np.sqrt(sigma_loc2)

    # ---------------- Plotting ----------------
    def dupire_plot_local_vol(self, K_vals, T_vals, method='iv'):
        K_grid, T_grid = np.meshgrid(K_vals, T_vals)
        sigma_loc = self.dupire_local_vol(K_grid, T_grid)
        title = 'Dupire Local Volatility (from Call Prices)'

        fig = go.Figure(data=[go.Surface(
            x=K_grid, y=T_grid, z=sigma_loc,
            colorscale='Viridis', colorbar=dict(title='Local Vol')
        )])

        fig.update_layout(
            scene=dict(
                xaxis_title='Strike',
                yaxis_title='Expiration (Years)',
                zaxis_title='Local Volatility'
            ),
            title=title
        )
        fig.show()

    # ---------------- Local volatility: time-only model ----------------
    def time_local_vol(self):
        """
        Compute local volatility as a function of time only,
        assuming implied volatility is strike-independent.

        Uses total variance derivative:
            sigma_loc^2(T) = d/dT [ T * sigma_imp(T)^2 ]
        """

        # If already computed, return cached values
        if hasattr(self, 'local_vol_time'):
            return self.local_vol_time

        # Step 1: pick ATM or average IV for each maturity
        unique_T = np.unique(self.expirations)
        atm_iv = []
        for T in unique_T:
            mask = self.expirations == T
            atm_iv.append(np.mean(self.implied_vols[mask]))
        atm_iv = np.array(atm_iv)

        # Step 2: compute total variance
        total_var = unique_T * atm_iv**2

        # Step 3: smooth total variance to reduce noise
        from scipy.interpolate import UnivariateSpline
        spline = UnivariateSpline(unique_T, total_var, k=3, s=1e-6)

        # Step 4: derivative = local variance
        T_fine = np.linspace(unique_T.min(), unique_T.max(), 200)
        wprime = spline.derivative()(T_fine)
        wprime = np.clip(wprime, 0, None)   # enforce positivity
        sigma_loc = np.sqrt(wprime)

        # Save for later use
        self.local_vol_time = (T_fine, sigma_loc)

        return T_fine, sigma_loc


    def time_plot_local_vol(self):
        """Plot the time-only local volatility curve"""
        T_fine, sigma_loc = self.time_local_vol()  # <- call the method!

        import matplotlib.pyplot as plt
        plt.figure(figsize=(8,5))
        plt.plot(T_fine, sigma_loc, 'b-', lw=2)
        plt.xlabel("Maturity (Years)")
        plt.ylabel("Local Volatility σ_loc(t)")
        plt.title("Time-dependent Local Volatility (Dupire special case)")
        plt.grid(True)
        plt.show()