The below will be used to download **S&P 500** options data.

In [1]:
from yahoo_fin import options
import pandas as pd
from datetime import datetime

import requests
from requests.exceptions import ConnectionError

In [2]:
spx_dates = options.get_expiration_dates("^SPX")
spx_dates

['July 10, 2023',
 'July 11, 2023',
 'July 12, 2023',
 'July 13, 2023',
 'July 14, 2023',
 'July 17, 2023',
 'July 18, 2023',
 'July 19, 2023',
 'July 20, 2023',
 'July 21, 2023',
 'July 24, 2023',
 'July 25, 2023',
 'July 26, 2023',
 'July 27, 2023',
 'July 28, 2023',
 'July 31, 2023',
 'August 1, 2023',
 'August 2, 2023',
 'August 3, 2023',
 'August 4, 2023',
 'August 7, 2023',
 'August 8, 2023',
 'August 11, 2023',
 'August 18, 2023',
 'August 25, 2023',
 'August 31, 2023',
 'September 15, 2023',
 'September 29, 2023',
 'October 20, 2023',
 'October 31, 2023',
 'November 17, 2023',
 'November 30, 2023',
 'December 15, 2023',
 'December 29, 2023',
 'January 19, 2024',
 'February 16, 2024',
 'March 15, 2024',
 'March 28, 2024',
 'April 19, 2024',
 'May 17, 2024',
 'June 21, 2024',
 'June 28, 2024',
 'July 19, 2024',
 'September 20, 2024',
 'December 20, 2024',
 'June 20, 2025',
 'December 19, 2025',
 'March 20, 2026',
 'December 18, 2026',
 'December 17, 2027',
 'December 15, 2028']

In [3]:
dates = ['July 14, 2023','August 18, 2023','September 15, 2023','October 20, 2023','November 17, 2023',
        'December 15, 2023','January 19, 2024','February 16, 2024','March 15, 2024','April 19, 2024','May 17, 2024',
 'June 21, 2024','July 19, 2024',
 'September 20, 2024',
 'December 20, 2024']

In [4]:
len(dates)

15

In [5]:
symbol = "^SPX"
max_attempts = 3
attempt = 1
spx_calls = {}

for i in dates:
    while attempt <= max_attempts:
        try:
            spx_option_data = options.get_calls(symbol,i)
            break
        except (ConnectionError, requests.Timeout):
            print(f"Attempt {attempt} failed. Retrying...")
            attempt += 1

    if attempt > max_attempts:
        print("Max number of attempts reached. Unable to retrieve data.")
    else:
        spx_calls[i] = spx_option_data
        spx_calls[i]['Expiration'] = i

In the code above the options data is downloaded and stored in the dictionary spx_calls, with the expiration dates added as an extra column.

In [6]:
spx_calls[dates[14]]

Unnamed: 0,Contract Name,Last Trade Date,Strike,Last Price,Bid,Ask,Change,% Change,Volume,Open Interest,Implied Volatility,Expiration
0,SPX241220C00200000,2023-07-07 12:49PM EDT,200.0,4168.90,4130.90,4150.70,0.0,-,5,2158,0.00%,"December 20, 2024"
1,SPX241220C00400000,2023-06-29 3:10PM EDT,400.0,3942.27,3946.00,3965.80,0.0,-,4,2808,0.00%,"December 20, 2024"
2,SPX241220C00600000,2023-07-03 9:46AM EDT,600.0,3815.90,3761.30,3781.10,0.0,-,2,2542,0.00%,"December 20, 2024"
3,SPX241220C00800000,2023-06-29 3:08PM EDT,800.0,3573.90,3576.90,3596.60,0.0,-,2,3001,0.00%,"December 20, 2024"
4,SPX241220C01000000,2023-07-06 3:43PM EDT,1000.0,3437.85,3392.60,3412.30,0.0,-,1,8409,55.74%,"December 20, 2024"
...,...,...,...,...,...,...,...,...,...,...,...,...
68,SPX241220C08400000,2023-06-23 1:37PM EDT,8400.0,0.12,0.05,0.40,0.0,-,10,239,17.52%,"December 20, 2024"
69,SPX241220C08600000,2023-06-21 10:23AM EDT,8600.0,0.20,0.00,0.20,0.0,-,1,242,17.12%,"December 20, 2024"
70,SPX241220C08800000,2023-07-05 11:07AM EDT,8800.0,0.10,0.00,0.35,0.0,-,135,4391,18.43%,"December 20, 2024"
71,SPX241220C09000000,2023-07-05 11:07AM EDT,9000.0,0.10,0.00,0.10,0.0,-,120,3903,17.27%,"December 20, 2024"


In [7]:
spx_calls[dates[3]]

Unnamed: 0,Contract Name,Last Trade Date,Strike,Last Price,Bid,Ask,Change,% Change,Volume,Open Interest,Implied Volatility,Expiration
0,SPXW231020C00200000,2023-07-07 10:18AM EDT,200.0,4197.59,4192.1,4196.10,0.00,-,2,167,0.00%,"October 20, 2023"
1,SPXW231020C00400000,2023-06-28 10:22AM EDT,400.0,3961.29,3994.6,3998.60,0.00,-,3,61,0.00%,"October 20, 2023"
2,SPXW231020C00600000,2023-06-27 2:20PM EDT,600.0,3774.80,3798.5,3802.50,0.00,-,6,40,0.00%,"October 20, 2023"
3,SPX231020C00800000,2023-05-23 11:53AM EDT,800.0,3379.00,0.0,0.00,0.00,-,2,0,0.00%,"October 20, 2023"
4,SPX231020C01000000,2023-06-20 3:30PM EDT,1000.0,3392.20,3404.1,3409.40,0.00,-,2500,2583,100.39%,"October 20, 2023"
...,...,...,...,...,...,...,...,...,...,...,...,...
222,SPXW231020C06000000,2023-06-09 11:29AM EDT,6000.0,0.05,0.0,0.15,0.00,-,10,90,18.90%,"October 20, 2023"
223,SPX231020C06200000,2023-07-10 12:00PM EDT,6200.0,0.07,0.0,0.10,-0.01,-12.50%,2,250,20.04%,"October 20, 2023"
224,SPX231020C06400000,2023-06-06 3:13PM EDT,6400.0,0.06,0.0,0.10,0.00,-,8,382,21.73%,"October 20, 2023"
225,SPXW231020C06600000,2023-05-25 10:39AM EDT,6600.0,0.07,0.0,0.10,0.00,-,-,2,23.34%,"October 20, 2023"


So data is stored in such a way that the options expiring on date "i" are stored as a dataframe with the name spx_calls[dates[i]].

In [8]:
#storing all dataframes in an array
dfs = [spx_calls[dates[i]] for i in range(len(dates))]

    

In [9]:
len(dfs)
dfs[3]

Unnamed: 0,Contract Name,Last Trade Date,Strike,Last Price,Bid,Ask,Change,% Change,Volume,Open Interest,Implied Volatility,Expiration
0,SPXW231020C00200000,2023-07-07 10:18AM EDT,200.0,4197.59,4192.1,4196.10,0.00,-,2,167,0.00%,"October 20, 2023"
1,SPXW231020C00400000,2023-06-28 10:22AM EDT,400.0,3961.29,3994.6,3998.60,0.00,-,3,61,0.00%,"October 20, 2023"
2,SPXW231020C00600000,2023-06-27 2:20PM EDT,600.0,3774.80,3798.5,3802.50,0.00,-,6,40,0.00%,"October 20, 2023"
3,SPX231020C00800000,2023-05-23 11:53AM EDT,800.0,3379.00,0.0,0.00,0.00,-,2,0,0.00%,"October 20, 2023"
4,SPX231020C01000000,2023-06-20 3:30PM EDT,1000.0,3392.20,3404.1,3409.40,0.00,-,2500,2583,100.39%,"October 20, 2023"
...,...,...,...,...,...,...,...,...,...,...,...,...
222,SPXW231020C06000000,2023-06-09 11:29AM EDT,6000.0,0.05,0.0,0.15,0.00,-,10,90,18.90%,"October 20, 2023"
223,SPX231020C06200000,2023-07-10 12:00PM EDT,6200.0,0.07,0.0,0.10,-0.01,-12.50%,2,250,20.04%,"October 20, 2023"
224,SPX231020C06400000,2023-06-06 3:13PM EDT,6400.0,0.06,0.0,0.10,0.00,-,8,382,21.73%,"October 20, 2023"
225,SPXW231020C06600000,2023-05-25 10:39AM EDT,6600.0,0.07,0.0,0.10,0.00,-,-,2,23.34%,"October 20, 2023"


In [11]:
#concating all dataframes into a single one.
df = pd.concat(dfs, axis=0, ignore_index=True)
df.head()

Unnamed: 0,Contract Name,Last Trade Date,Strike,Last Price,Bid,Ask,Change,% Change,Volume,Open Interest,Implied Volatility,Expiration
0,SPXW230714C01200000,2023-07-10 10:32AM EDT,1200.0,3205.33,3203.8,3207.4,-30.19,-0.93%,2,2255,427.05%,"July 14, 2023"
1,SPXW230714C02000000,2023-06-30 3:45PM EDT,2000.0,2455.75,2404.5,2407.9,0.0,-,5,10,232.32%,"July 14, 2023"
2,SPXW230714C02800000,2023-06-26 12:37PM EDT,2800.0,1544.75,1605.4,1608.9,0.0,-,30,5,156.32%,"July 14, 2023"
3,SPXW230714C02900000,2023-06-14 2:31PM EDT,2900.0,1454.6,1506.3,1509.8,0.0,-,-,15,154.54%,"July 14, 2023"
4,SPXW230714C03000000,2023-06-09 9:38AM EDT,3000.0,1314.73,1402.3,1408.8,0.0,-,-,1,148.79%,"July 14, 2023"


In [12]:
len(df)

2206

One final transformation needs to be performed, which is to convert the 'Expiration' column into time left from today, in years. 

In [13]:
df.columns.get_loc('Expiration')

11

In [14]:
#an example on how to  change Expiration date, which is in string format, to Time to Expiration which is the number of days
#between today and Expiration date.

a = df.iloc[10,11]
print(a,type(a))
b = datetime.strptime(a,"%B %d, %Y").date()
print(b,type(b))

today = datetime.today().date()
print(today,type(today))

diff = (b - today)
print(diff,type(diff))
delta = int(diff.days)/252 #assuming there are 252 trading days in a calender year.
delta

July 14, 2023 <class 'str'>
2023-07-14 <class 'datetime.date'>
2023-07-10 <class 'datetime.date'>
4 days, 0:00:00 <class 'datetime.timedelta'>


0.015873015873015872

In [15]:
for i in range(len(df)):
    
    df.iloc[i,11] = int( (datetime.strptime(df.iloc[i,11],"%B %d, %Y").date() - today).days)/252

In [16]:
df.head()

Unnamed: 0,Contract Name,Last Trade Date,Strike,Last Price,Bid,Ask,Change,% Change,Volume,Open Interest,Implied Volatility,Expiration
0,SPXW230714C01200000,2023-07-10 10:32AM EDT,1200.0,3205.33,3203.8,3207.4,-30.19,-0.93%,2,2255,427.05%,0.015873
1,SPXW230714C02000000,2023-06-30 3:45PM EDT,2000.0,2455.75,2404.5,2407.9,0.0,-,5,10,232.32%,0.015873
2,SPXW230714C02800000,2023-06-26 12:37PM EDT,2800.0,1544.75,1605.4,1608.9,0.0,-,30,5,156.32%,0.015873
3,SPXW230714C02900000,2023-06-14 2:31PM EDT,2900.0,1454.6,1506.3,1509.8,0.0,-,-,15,154.54%,0.015873
4,SPXW230714C03000000,2023-06-09 9:38AM EDT,3000.0,1314.73,1402.3,1408.8,0.0,-,-,1,148.79%,0.015873


In [17]:
a = df.iloc[0,1][:10]
b = datetime.strptime(a,"%Y-%m-%d").date()

In [18]:
b

datetime.date(2023, 7, 10)

In [21]:
# get S&p 500 index price and add it to the dataframe!

import yfinance as yf

# Create a ticker object for the S&P 500 index
ticker = yf.Ticker('^GSPC')

# data = ticker.history(start='yyyy-mm-dd', end='yyyy-mm-dd')

# # Extract the closing price for the specific date
# closing_price = data['Close'].iloc[0]

In [27]:
data = ticker.history(period='2d')
data

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-07-07 00:00:00-04:00,4404.540039,4440.390137,4397.399902,4398.950195,3630480000,0.0,0.0
2023-07-10 00:00:00-04:00,4394.22998,4412.600098,4389.919922,4401.080078,1228044000,0.0,0.0


In [28]:
df['Spot Price'] = 4398.950195

In [29]:
df.head()

Unnamed: 0,Contract Name,Last Trade Date,Strike,Last Price,Bid,Ask,Change,% Change,Volume,Open Interest,Implied Volatility,Expiration,Spot Price
0,SPXW230714C01200000,2023-07-10 10:32AM EDT,1200.0,3205.33,3203.8,3207.4,-30.19,-0.93%,2,2255,427.05%,0.015873,4398.950195
1,SPXW230714C02000000,2023-06-30 3:45PM EDT,2000.0,2455.75,2404.5,2407.9,0.0,-,5,10,232.32%,0.015873,4398.950195
2,SPXW230714C02800000,2023-06-26 12:37PM EDT,2800.0,1544.75,1605.4,1608.9,0.0,-,30,5,156.32%,0.015873,4398.950195
3,SPXW230714C02900000,2023-06-14 2:31PM EDT,2900.0,1454.6,1506.3,1509.8,0.0,-,-,15,154.54%,0.015873,4398.950195
4,SPXW230714C03000000,2023-06-09 9:38AM EDT,3000.0,1314.73,1402.3,1408.8,0.0,-,-,1,148.79%,0.015873,4398.950195


In [30]:
#we will only look at the OTM calls!

df_otm = df[df['Spot Price'] < df['Strike']]
df_otm.head()
len(df_otm)

817

In [32]:
df_otm.head()

Unnamed: 0,Contract Name,Last Trade Date,Strike,Last Price,Bid,Ask,Change,% Change,Volume,Open Interest,Implied Volatility,Expiration,Spot Price
107,SPXW230714C04400000,2023-07-10 1:45PM EDT,4400.0,29.7,29.5,29.7,-0.8,-2.62%,1469,3110,12.99%,0.015873,4398.950195
108,SPXW230714C04405000,2023-07-10 1:35PM EDT,4405.0,26.5,26.5,26.8,-1.6,-5.69%,456,321,12.84%,0.015873,4398.950195
109,SPXW230714C04410000,2023-07-10 1:44PM EDT,4410.0,23.8,23.7,23.9,-1.2,-4.80%,1118,441,12.61%,0.015873,4398.950195
110,SPXW230714C04415000,2023-07-10 1:30PM EDT,4415.0,21.7,21.7,21.9,-1.05,-4.62%,260,1087,12.75%,0.015873,4398.950195
111,SPXW230714C04420000,2023-07-10 1:41PM EDT,4420.0,18.72,19.0,19.2,-1.64,-8.06%,496,747,12.47%,0.015873,4398.950195


In [34]:
df_otm.to_csv("OTM_Calls.csv")