In [23]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from scipy.stats import norm

# === Step 1: Stock Price Retrieval ===
def get_stock_data(ticker, start_date, end_date):
    df = yf.download(ticker, start=start_date, end=end_date)
    df.reset_index(inplace=True)
    df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
    return df


# === Step 2: Synthetic Option Generator (Black-Scholes) ===
def black_scholes_price(S, K, T, r, sigma, option_type='call'):
    if T <= 0 or sigma == 0:
        return max(S - K, 0) if option_type == 'call' else max(K - S, 0)
    d1 = (np.log(S / K) + (r + 0.5 * sigma**2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    if option_type == 'call':
        return S * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    else:
        return K * np.exp(-r * T) * norm.cdf(-d2) - S * norm.cdf(-d1)


# === Step 3: Generate Synthetic Historical Options ===
def simulate_historical_options(stock_df, strike_offsets=[-0.05, 0, 0.05], dtes=[7, 14], r=0.02, vol_window=20):
    """
    Generate synthetic option contracts for training purposes.
    """
    synthetic_options = []

    for idx, row in stock_df.iterrows():
        spot = row['Close']
        date = row['Date']
        vol_window_df = stock_df[stock_df['Date'] <= date].tail(vol_window)
        if len(vol_window_df) < vol_window:
            continue

        daily_returns = vol_window_df['Close'].pct_change().dropna()
        sigma = daily_returns.std() * np.sqrt(252)  # annualized
        if sigma == 0:
            continue

        for offset in strike_offsets:
            strike = round(spot * (1 + offset), 2)
            for dte in dtes:
                expiry_date = date + timedelta(days=dte)
                if expiry_date not in stock_df['Date'].values:
                    continue

                T = dte / 365
                for option_type in ['call', 'put']:
                    premium = black_scholes_price(S=spot, K=strike, T=T, r=r, sigma=sigma, option_type=option_type)
                    close_at_expiry = stock_df.loc[stock_df['Date'] == expiry_date, 'Close'].values[0]
                    intrinsic = max(close_at_expiry - strike, 0) if option_type == 'call' else max(strike - close_at_expiry, 0)
                    profit = intrinsic - premium

                    synthetic_options.append({
                        'date': date,
                        'expiry': expiry_date,
                        'spot': spot,
                        'strike': strike,
                        'option_type': option_type,
                        'premium': premium,
                        'close_at_expiry': close_at_expiry,
                        'profit': profit,
                        'label': int(profit > 0),
                        'volatility': sigma,
                        'dte': dte,
                        'offset_pct': offset
                    })

    return pd.DataFrame(synthetic_options)


# === Step 4: Forward-Looking Real Options Chain ===
from datetime import datetime

def get_live_option_chain(ticker):
    """
    Fetches real options from yfinance for all future expiries starting from today.
    This is used for live inference only (not historical training).
    
    Parameters:
    ticker (str): Stock ticker symbol.

    Returns:
    pd.DataFrame: Combined options data for all future expiry dates.
    """
    stock = yf.Ticker(ticker)
    all_expiries = stock.options

    if not all_expiries:
        print(f"No option expiries available for {ticker}")
        return pd.DataFrame()

    today = datetime.today()

    # Filter to include only future expiry dates
    filtered_expiries = [
        expiry for expiry in all_expiries
        if datetime.strptime(expiry, "%Y-%m-%d") >= today
    ]

    options_data_list = []
    for expiry in filtered_expiries:
        try:
            chain = stock.option_chain(expiry)
            for label, df in [('call', chain.calls), ('put', chain.puts)]:
                if df.empty:
                    continue
                df = df.copy()
                df['option_type'] = label
                df['expiry'] = expiry
                options_data_list.append(df)
        except Exception as e:
            print(f"Error retrieving options for {ticker} on {expiry}: {e}")

    if options_data_list:
        return pd.concat(options_data_list, ignore_index=True)
    else:
        return pd.DataFrame()


In [27]:
AAPL = get_live_option_chain('AAPL')
print(AAPL.tail())


           contractSymbol             lastTradeDate  strike  lastPrice  \
2071  AAPL271217P00320000 2025-04-15 18:22:21+00:00   320.0     118.77   
2072  AAPL271217P00325000 2025-04-23 14:38:36+00:00   325.0     117.90   
2073  AAPL271217P00330000 2025-04-16 19:54:23+00:00   330.0     122.34   
2074  AAPL271217P00370000 2025-04-15 16:23:18+00:00   370.0     166.14   
2075  AAPL271217P00400000 2025-04-15 17:54:04+00:00   400.0     198.07   

         bid    ask     change  percentChange  volume  openInterest  \
2071  109.85  114.0   0.000000       0.000000   171.0             2   
2072  114.00  119.0   0.000000       0.000000     2.0             1   
2073  119.65  124.0 -12.980011      -9.592086     1.0             0   
2074  159.00  164.0   0.000000       0.000000     NaN             0   
2075  189.00  194.0   0.000000       0.000000     NaN             0   

      impliedVolatility  inTheMoney contractSize currency option_type  \
2071           0.181038        True      REGULAR      U

In [17]:
vix = yf.download("^VIX", start="2023-01-01", end="2023-12-31")
vix

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,^VIX,^VIX,^VIX,^VIX,^VIX
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2023-01-03,22.900000,23.76,22.730000,23.090000,0
2023-01-04,22.010000,23.27,21.940001,22.930000,0
2023-01-05,22.459999,22.92,21.969999,22.200001,0
2023-01-06,21.129999,22.90,21.000000,22.690001,0
2023-01-09,21.969999,21.98,21.270000,21.750000,0
...,...,...,...,...,...
2023-12-22,13.030000,13.96,13.000000,13.720000,0
2023-12-26,12.990000,13.80,12.960000,13.770000,0
2023-12-27,12.430000,13.04,12.370000,13.020000,0
2023-12-28,12.470000,12.65,12.380000,12.440000,0


In [19]:
from arch import arch_model
returns = df['Close'].pct_change().dropna() * 100
model = arch_model(returns, vol='GARCH', p=1, q=1)
res = model.fit(disp='off')
vol_forecast = res.forecast(horizon=5).variance[-1:]


KeyError: 'Close'