In [1]:
# Vol Surface Constructor

# 1/ Collect Data from YF
# 2/ Process and clean data
# 3/ Calculate IV
# 4/ Contsruct Vol Surface

In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
import plotly.express as px


In [3]:
# Define the ticker symbol (e.g., SPY)
ticker = 'SPY'

# Get the data for the ticker
stock_data = yf.Ticker(ticker)

# Fetch expiration dates
expiration_dates = stock_data.options

# Initialize an empty list to store the option data
option_data = []

# Loop through all expiration dates and collect the data
for exp in expiration_dates:
    # Fetch the option chain for each expiration date
    option_chain = stock_data.option_chain(exp)
    
    # Extract calls and puts DataFrames
    calls = option_chain.calls
    puts = option_chain.puts
    
    # Add expiration date and option type (call/put) columns to both DataFrames
    calls['expiration_date'] = exp
    calls['type'] = 'call'
    puts['expiration_date'] = exp
    puts['type'] = 'put'
    
    # Append both calls and puts to the option_data list
    option_data.append(calls)
    option_data.append(puts)

# Combine the data for all expiration dates into a single DataFrame
options_df = pd.DataFrame(pd.concat(option_data, ignore_index=True))

# Check the resulting DataFrame
options_df


Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,expiration_date,type
0,SPY241231C00350000,2024-12-31 14:30:08+00:00,350.0,238.13,237.23,240.15,-4.769989,-1.963767,9.0,14,5.162113,True,REGULAR,USD,2024-12-31,call
1,SPY241231C00355000,2024-11-12 15:48:53+00:00,355.0,244.82,250.01,252.99,0.000000,0.000000,45.0,42,9.437992,True,REGULAR,USD,2024-12-31,call
2,SPY241231C00360000,2024-12-19 20:43:08+00:00,360.0,229.00,227.25,230.15,0.000000,0.000000,3.0,0,4.916019,True,REGULAR,USD,2024-12-31,call
3,SPY241231C00365000,2024-12-19 20:46:26+00:00,365.0,223.51,223.04,225.04,0.000000,0.000000,98.0,0,3.585939,True,REGULAR,USD,2024-12-31,call
4,SPY241231C00370000,2024-12-20 17:37:39+00:00,370.0,225.52,217.31,220.09,0.000000,0.000000,8.0,1,4.637700,True,REGULAR,USD,2024-12-31,call
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7687,SPY270115P00745000,2024-12-26 19:33:13+00:00,745.0,143.80,153.64,158.50,0.000000,0.000000,1.0,2,0.105020,True,REGULAR,USD,2027-01-15,put
7688,SPY270115P00750000,2024-12-24 17:18:36+00:00,750.0,150.02,158.51,164.00,0.000000,0.000000,,0,0.111657,True,REGULAR,USD,2027-01-15,put
7689,SPY270115P00870000,2024-10-10 13:30:05+00:00,870.0,292.17,269.50,274.29,0.000000,0.000000,,0,0.000010,True,REGULAR,USD,2027-01-15,put
7690,SPY270115P00900000,2024-12-16 14:51:59+00:00,900.0,294.67,309.00,313.95,0.000000,0.000000,1.0,0,0.171105,True,REGULAR,USD,2027-01-15,put


In [4]:
# Data Cleaning

# Convert expiration date to datetime
options_df["expiration_date"] = pd.to_datetime(options_df["expiration_date"])

# remove rows with 'null' values
options_df = options_df[~options_df.isnull().any(axis=1)]

# drop duplicates
options_df = options_df.drop_duplicates(subset="contractSymbol")

# drop columns
options_df.drop(columns=["lastTradeDate","change","percentChange","inTheMoney","contractSize","currency"],inplace=True, errors="ignore")

# Normalise
current_price = yf.Ticker(ticker).history(period="1d").Close.iloc[-1]
options_df["normalised_price"] = options_df["strike"] / current_price
options_df = options_df[(options_df["normalised_price"] <= 1.2) & (options_df["normalised_price"] >= 0.8)]

# time to maturity (+ filtering out long date options)
options_df["time_to_maturity"] = (options_df.expiration_date - pd.to_datetime("today")).dt.days
options_df = options_df[(options_df["time_to_maturity"] <= 180) & (options_df["time_to_maturity"] > 0)]

# Filter out thinly traded options (low open interest)
options_df = options_df[options_df.openInterest > 1000] # may need to change this -> this would need to change for diff tickers


In [5]:
import plotly.express as px

# Filter calls and puts
calls_df = options_df[options_df['type'] == 'call']
puts_df = options_df[options_df['type'] == 'put']

# Plot for calls
fig_calls = px.scatter_3d(
    calls_df,
    x="normalised_price",  # x-axis (strike price)
    y="time_to_maturity",   # y-axis (time to maturity)
    z="impliedVolatility",  # z-axis (IV)
    color="impliedVolatility",  # Optional: Color by expiration date
    title="Call Options - Implied Volatility Surface"
)
fig_calls.update_traces(marker=dict(size=3))
fig_calls.show()

# Plot for puts
fig_puts = px.scatter_3d(
    puts_df,
    x="normalised_price",  # x-axis (strike price)
    y="time_to_maturity",   # y-axis (time to maturity)
    z="impliedVolatility",  # z-axis (IV)
    color="impliedVolatility",  # Optional: Color by expiration date
    title="Put Options - Implied Volatility Surface"
)
# fig_puts.update_traces(marker=dict(size=3))
# fig_puts.show()



In [11]:
import numpy as np
import pandas as pd
from scipy.interpolate import griddata
import plotly.graph_objects as go
from scipy.ndimage import gaussian_filter

# Assume options_df is your DataFrame with columns: 'strike', 'time_to_maturity', and 'IV'

# Create a grid of strike prices and times to maturity (for interpolation)
strike_grid = np.linspace(calls_df['normalised_price'].min(), calls_df['normalised_price'].max(), 50)
maturity_grid = np.linspace(calls_df['time_to_maturity'].min(), calls_df['time_to_maturity'].max(), 50)

# Create a meshgrid of strike price and time to maturity
X, Y = np.meshgrid(strike_grid, maturity_grid)

# Perform cubic spline interpolation to get implied volatilities for the grid points
Z = griddata(
    (calls_df['normalised_price'], calls_df['time_to_maturity']),
    calls_df['impliedVolatility'],
    (X, Y),
    method='cubic'
)

Z_smooth = gaussian_filter(Z, sigma=2)

# Create the 3D surface plot using plotly.graph_objects
fig = go.Figure(data=[go.Surface(z=Z_smooth, x=X, y=Y)])

# Add title and labels
fig.update_layout(
    title="Implied Volatility Surface (Cubic Spline)",
    scene=dict(
        xaxis_title='Strike Price',
        yaxis_title='Time to Maturity',
        zaxis_title='Implied Volatility'
    )
)

# Show the plot
fig.show()

import plotly.io as pio
pio.show(fig)



In [7]:
# Notes

# how to deal with outliers in my scatter plot