# Simple Data Analysis of options

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
import plotly.graph_objs as go
import plotly.express as px
from matplotlib import pyplot as plt

In [None]:
ticker = yf.Ticker("^SPX")
exps = ticker.options
s0 = ticker.info['regularMarketPreviousClose']
dt_exps = pd.to_datetime(exps)

In [None]:
len(exps)

54

In [None]:
call_dict = {}
put_dict = {}
for date in exps:
  opt = ticker.option_chain(date)
  call_dict[date] = opt.calls
  put_dict[date] = opt.puts

In [None]:
call_liquidity = []
put_liquidity = []
for date in exps:
  call_liquidity.append(call_dict[date].shape[0])
  put_liquidity.append(put_dict[date].shape[0])

In [None]:
liquidity_df = pd.DataFrame(data={"call_liquidity": call_liquidity, "put_liquidity": put_liquidity}, index=dt_exps)

In [None]:
most_liquid = liquidity_df.index[liquidity_df.call_liquidity.argmax()]
print(f"Most liquid calls: {most_liquid}")

Most liquid calls: 2024-06-28 00:00:00


In [None]:
trace1 = go.Scatter(x=dt_exps, y=call_liquidity, mode='lines', name='calls')
trace2 = go.Scatter(x=dt_exps, y=put_liquidity, mode='lines', name='puts')

# Create the layout
layout = go.Layout(title='Liquidity of Calls and Puts', xaxis=dict(title='Expiration Dates'), yaxis=dict(title='Liquidity'))

# Create the figure
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Display the figure
fig.show()

In [None]:
trace1 = go.Scatter(x=dt_exps[:20], y=call_liquidity[:20], mode='lines', name='calls')
trace2 = go.Scatter(x=dt_exps[:20], y=put_liquidity[:20], mode='lines', name='puts')

# Create the layout
layout = go.Layout(title='Liquidity of Calls and Puts', xaxis=dict(title='Expiration Dates'), yaxis=dict(title='Liquidity'))

# Create the figure
fig = go.Figure(data=[trace1, trace2], layout=layout)

# Display the figure
fig.show()

We can see
- The weekly expiried options are relatively more liquid.
- The nearest monthly expiried options are the most liquid.
- The puts are more liquid than the calls.

## Volatility Smile

In [None]:
most_liquid = liquidity_df.index[liquidity_df.call_liquidity.argmax()]

In [None]:
def get_vol_smile(date):
  str_date = date.strftime("%Y-%m-%d")
  calls = call_dict[str_date]
  puts = put_dict[str_date]
  calls['k/s'] = calls['strike'] / s0
  puts['k/s'] = puts['strike'] / s0
  calls['spread'] = calls['ask'] - calls['bid']
  puts['spread'] = puts['ask'] - puts['bid']
  trace1 = go.Scatter(x=calls['k/s'], y=calls['impliedVolatility'], mode='lines', name='calls')
  trace2 = go.Scatter(x=puts['k/s'], y=-puts['impliedVolatility'], mode='lines', name='puts')

  # Create the layout
  layout = go.Layout(title='Volatility Smile for Calls and puts', xaxis=dict(title='K/F'), yaxis=dict(title='Implied Vol'))

  # Create the figure
  fig = go.Figure(data=[trace1,trace2], layout=layout)

  # Display the figure
  fig.show()
  total = calls.merge(puts, on = 'strike', suffixes = ('_call', '_put'),how="inner")
  total['combo_ask'] = (total['ask_call'] - total['bid_put'])
  total['combo_bid'] = (total['bid_call'] - total['ask_put'])
  total_new = total[['strike', 'combo_ask', 'combo_bid']]
  total_new['combo_mid'] = (total_new['combo_ask'] + total_new['combo_bid']) / 2
  return total_new

In [None]:
most_liquid_exmaple = get_vol_smile(most_liquid)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Typically, the volatility smile for index is higher for the low-strike(here K/F lower) ones than the high-strike ones.

Actually that's **the price matters**. The price determines the implied vol actually(Through BS model), which means for the options with higher price, we expect higher volatility.

Back to the financial intuition, the investors tends to protect their loss by using options. But they typically need to protect against decreases rather than increases in the index. Hence, the demand for the low-strike options are greater. Then we have the higher price and so higher implied volatility. But it's asymmetry for the high-strike options. That's why we may obtain the different pattern for it.

In [None]:
strdate = most_liquid.strftime("%Y-%m-%d")
strikes = call_dict[strdate]["k/s"]

In [None]:
call_dict[strdate]

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,k/s,spread,log_strike
0,SPXW240628C01000000,2024-05-22 16:10:12+00:00,1000.0,4318.45,4308.0,4315.3,0.0,0.0,26.0,543,3.408327,True,REGULAR,USD,0.189272,7.3,-1.664570
1,SPXW240628C01200000,2024-05-23 13:35:03+00:00,1200.0,4127.10,4108.8,4116.0,0.0,0.0,1.0,128,3.059206,True,REGULAR,USD,0.227126,7.2,-1.482248
2,SPXW240628C01400000,2024-05-31 13:34:09+00:00,1400.0,3845.74,3909.5,3916.8,0.0,0.0,1.0,153,2.768008,True,REGULAR,USD,0.264981,7.3,-1.328098
3,SPXW240628C01600000,2024-05-31 13:41:18+00:00,1600.0,3657.27,3710.3,3717.6,0.0,0.0,1.0,3,2.518864,True,REGULAR,USD,0.302835,7.3,-1.194566
4,SPXW240628C01800000,2024-04-01 15:50:01+00:00,1800.0,3448.73,3284.8,3297.1,0.0,0.0,1.0,30,0.000010,True,REGULAR,USD,0.340690,12.3,-1.076783
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,SPXW240628C06150000,2024-06-04 19:21:51+00:00,6150.0,0.10,0.0,0.0,0.0,0.0,81.0,258,0.125009,False,REGULAR,USD,1.164023,0.0,0.151882
361,SPXW240628C06200000,2024-05-22 13:49:32+00:00,6200.0,0.07,0.0,0.0,0.0,0.0,2.0,141,0.125009,False,REGULAR,USD,1.173487,0.0,0.159979
362,SPXW240628C06250000,2024-05-22 15:59:28+00:00,6250.0,0.07,0.0,0.1,0.0,0.0,,32,0.212898,False,REGULAR,USD,1.182950,0.1,0.168012
363,SPXW240628C06300000,2024-05-21 16:58:37+00:00,6300.0,0.07,0.0,0.1,0.0,0.0,146.0,214,0.222176,False,REGULAR,USD,1.192414,0.1,0.175980


In [None]:
from scipy.interpolate import interp1d

In [None]:
# Define a common set of strike prices
common_strikes = np.linspace(0.2, 1.4, 100)

X = np.array(dt_exps[:20])
Y = common_strikes
Z = np.zeros((len(X), len(common_strikes)))

# Interpolate implied volatilities onto the common set of strike prices
for i, date in enumerate(exps[:20]):
    df = call_dict[date]
    df['k/s'] = df['strike'] / s0
    interp_func = interp1d(df['k/s'], df['impliedVolatility'], kind='linear', fill_value='extrapolate')
    Z[i, :] = interp_func(common_strikes)

# Creating the surface plot
fig = go.Figure(data=[go.Surface(z=Z, x=X, y=Y)])

# Updating the layout
fig.update_layout(
    title='Volatility Surface',
    scene=dict(
        xaxis_title='Date',
        yaxis_title='s/k',
        zaxis_title='Implied Volatility'
    )
)

# Display the figure
fig.show()

In [None]:
most_liquid_exmaple

Unnamed: 0,strike,combo_ask,combo_bid,combo_mid
0,1000.0,4315.3,4307.90,4311.600
1,1200.0,4116.0,4108.60,4112.300
2,1400.0,3916.8,3909.50,3913.150
3,1600.0,3717.6,3710.25,3713.925
4,1800.0,3297.1,3284.70,3290.900
...,...,...,...,...
334,6150.0,-816.1,-822.30,-819.200
335,6200.0,-865.9,-872.10,-869.000
336,6250.0,-915.6,-921.90,-918.750
337,6300.0,-965.4,-971.50,-968.450


For the combo, we have the *Put-Call Parity*:
$$
C-P = DF(F-K)
$$
where DF is the discounted factor. For the LHS, we consider long a call and short a put for the same time to maturity and strike. It then becomes something like the long-combo synthetic index forward.

Since we long a call and short a put, the ask and bid of combo are
- Bid: Now we want to enter with a long position of call and a short position of put. For long position of call, we just enter with $C_{bid}$. But for short position of put, we want to borrow it and sell it at $P_{ask}$.
- Ask: Now we want to exit by selling the call at $C_{ask}$ and biying the put back at $P_{bid}$ and give it back to the lender.

Then the ask and bid of the combo becomes:
$$
Combo_{bid} = C_{bid} - P_{ask},\quad Combo_{ask} = C_{ask} - P_{bid}
$$

In [None]:
most_liquid_exmaple["forward_price"] = most_liquid_exmaple["strike"] + most_liquid_exmaple["combo_mid"]

In [None]:
most_liquid_exmaple["forward_price"].describe()

count     339.000000
mean     5190.245206
std       314.968783
min      3574.800000
25%      5258.225000
50%      5325.950000
75%      5328.075000
max      5562.025000
Name: forward_price, dtype: float64

In [None]:
trace = go.Scatter(x=most_liquid_exmaple['strike'], y=most_liquid_exmaple['forward_price'], mode='lines')
layout = go.Layout(title='Strike Forward Relationship', xaxis=dict(title='Strike'), yaxis=dict(title='Forward Price'))

# Create the figure
fig = go.Figure(data=[trace], layout=layout)

# Display the figure
fig.show()

In [None]:
trace = go.Scatter(x=most_liquid_exmaple['strike'], y=most_liquid_exmaple['combo_mid'], mode='lines')
layout = go.Layout(title='Strike-Combo Price Relationship', xaxis=dict(title='Strike'), yaxis=dict(title='Mid Price of Combo'))

# Create the figure
fig = go.Figure(data=[trace], layout=layout)

# Display the figure
fig.show()

We can see it meets our expectation. For simplicity, we set $DF = 1$.
$$
Combo_{mid} = C - P = DF(F-K) \Rightarrow Combo_{mid} + K = F \approx \text {Constant for now}
$$

### Further Steps

Now we have the curve of the volatility smile. We need to find the window for a "flat volatility zone". For each smile curve, it's easy to have a threshold to cut off(both low strike side and high strike side, but the low strike is more significant).

But we need to consider the size as well. Still working on it...

## Vol-Spot Dynamic

For the dynamic, we need the historical option data price series. For each option given the same strike and time to maturity, we have a time series of it. We denote it by $i$, which means one observation(actually I think we have optionID to track). For each i, we obtain
$$
vol_{i,t-n},\ vol_{i,...},\ ,vol_{i,t}\quad| S_{t-n},S_{...},S_{t}
$$
Then we have the diff in both time series,
$$
\Delta vol_{i,t-n-1},\cdots,\Delta vol_{i,t} | \Delta S_{t-n-1},\cdots,\Delta S_{t}
$$
Here we can apply regression to each option to see the vol-spot dynamic.
$$
\Delta vol_{i,t}(K,T) = \alpha_i + \beta_i(T) \Delta \ln S_{t} + \varepsilon_i
$$
Then for the coefficient $\alpha_i$, we have the statistics like mean, std.

Since it's also like a grid value for each strike and time to maturity, we can use a surface like the volatility surface to see the dynamic and find some patterns.

Actually here it comes to the **"Skewed Stickiness Ratio(SSR)"**.

We define the ATM volatility skew
$$
\psi(T) = \partial_k vol(k,T)|_{k=0}
$$
Actually from the regression above, we have $\beta(T)$, which is the estimation of the change in implied volatility conditional on a change in the spot.
$$
\beta(T) = \bar{\beta_i}(T)\rightarrow \mathbb{E}[\Delta vol(T)|\frac{\Delta S}{S}] = \mathbb{E}[\Delta vol(T)|\Delta \ln S]
$$
Here $\beta(T)$ is the average of the coefficients for the options with the same time to maturity. For example, 1 month, 3 months etc.

Then we have the skew stickiness ratio
$$
\mathcal{R}(T) = \frac{\beta(T)}{\psi(T)}
$$
We have the explicit calculation method for $\beta(T)$ via regression. For $\psi(T)$, we can use numerical method to calculate. The ATM options are liquid. This is actually from our volatility smile ---- the slope of the curve. And for the ATM options, it will be in the "flat window".

The regression is like
$$
\Delta vol(T) \sim \Delta \ln S(T) \times\psi(T)
$$
They are all calculated day by day.

<font color = "red"> **Question** </font>

Suppose we are calculating SSR for 1M ATM options, how do we choose the 1M options day by day?

I think it means for each trade day, we need to find the nearest options to the 1 months maturity. For example, "2024-06-04"→"2024-07-04", "2024-06-06"→"2024-07-06". Does our conclusion about liquidity matter in this case?

Or it means we always choose SPX options instead of SPXW?