In [None]:
# Vol Surface Constructor

# 1/ Collect Data from YF
# 2/ Process and clean data
# 3/ Calculate IV
# 4/ Contsruct Vol Surface

In [1]:
import yfinance as yf
import pandas as pd

In [2]:
# Define the ticker symbol (e.g., SPY)
ticker = 'SPY'

# Get the data for the ticker
stock_data = yf.Ticker(ticker)

# Fetch expiration dates
expiration_dates = stock_data.options

# Initialize empty lists for calls and puts data
calls_data = []
puts_data = []

# Loop through all expiration dates and collect the data
for exp in expiration_dates:
    # Fetch the option chain for each expiration date
    option_chain = stock_data.option_chain(exp)
    
    # Extract calls and puts DataFrames
    calls = option_chain.calls
    puts = option_chain.puts
    
    # Add expiration date column to both DataFrames
    calls['expiration_date'] = exp
    puts['expiration_date'] = exp
    
    # Append the data to respective lists
    calls_data.append(calls)
    puts_data.append(puts)

# Combine the data for all expiration dates into final DataFrames
calls_df = pd.DataFrame(pd.concat(calls_data, ignore_index=True))
puts_df = pd.DataFrame(pd.concat(puts_data, ignore_index=True))

# Check the resulting DataFrames
calls_df

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,expiration_date
0,SPY241231C00350000,2024-12-20 20:47:09+00:00,350.0,242.90,0.00,0.00,0.0,0.0,31.0,0,0.000010,True,REGULAR,USD,2024-12-31
1,SPY241231C00355000,2024-11-12 15:48:53+00:00,355.0,244.82,250.01,252.99,0.0,0.0,45.0,42,9.547123,True,REGULAR,USD,2024-12-31
2,SPY241231C00360000,2024-12-19 20:43:08+00:00,360.0,229.00,0.00,0.00,0.0,0.0,3.0,0,0.000010,True,REGULAR,USD,2024-12-31
3,SPY241231C00365000,2024-12-19 20:46:26+00:00,365.0,223.51,0.00,0.00,0.0,0.0,98.0,0,0.000010,True,REGULAR,USD,2024-12-31
4,SPY241231C00370000,2024-12-20 17:37:39+00:00,370.0,225.52,0.00,0.00,0.0,0.0,8.0,0,0.000010,True,REGULAR,USD,2024-12-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4017,SPY270115C00890000,2024-12-30 16:55:12+00:00,890.0,0.95,0.00,0.00,0.0,0.0,20.0,0,0.062509,False,REGULAR,USD,2027-01-15
4018,SPY270115C00895000,2024-12-30 20:29:30+00:00,895.0,1.08,0.00,0.00,0.0,0.0,39.0,0,0.062509,False,REGULAR,USD,2027-01-15
4019,SPY270115C00900000,2024-12-30 20:55:48+00:00,900.0,0.89,0.00,0.00,0.0,0.0,190.0,0,0.062509,False,REGULAR,USD,2027-01-15
4020,SPY270115C00905000,2024-12-30 19:37:12+00:00,905.0,0.81,0.00,0.00,0.0,0.0,40.0,0,0.062509,False,REGULAR,USD,2027-01-15


In [39]:
# Data Cleaning

# Convert expiration date to datetime
calls_df["expiration_date"] = pd.to_datetime(calls_df["expiration_date"])
puts_df["expiration_date"] = pd.to_datetime(puts_df["expiration_date"])

# remove rows with 'null' values
calls_df = calls_df[~calls_df.isnull().any(axis=1)]
puts_df = puts_df[~puts_df.isnull().any(axis=1)]

# drop duplicates
calls_df = calls_df.drop_duplicates(subset="contractSymbol")
puts_df = puts_df.drop_duplicates(subset="contractSymbol")




3897

<bound method Series.drop_duplicates of 0       SPY241231C00350000
1       SPY241231C00355000
2       SPY241231C00360000
3       SPY241231C00365000
4       SPY241231C00370000
               ...        
4017    SPY270115C00890000
4018    SPY270115C00895000
4019    SPY270115C00900000
4020    SPY270115C00905000
4021    SPY270115C00910000
Name: contractSymbol, Length: 3897, dtype: object>