In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


# Creating a training data set using implied volatality, strike price and underlying asset price

use for doc : https://www.codearmo.com/blog/implied-volatility-european-call-python

In [2]:
import numpy as np
from scipy.stats import norm

N_prime = norm.pdf
N = norm.cdf


def black_scholes_call(S, K, T, r, sigma):
    '''

    :param S: Asset price
    :param K: Strike price
    :param T: Time to maturity
    :param r: risk-free rate (treasury bills)
    :param sigma: volatility
    :return: call price
    '''

    ###standard black-scholes formula
    d1 = (np.log(S / K) + (r + sigma ** 2 / 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)

    call = S * N(d1) -  N(d2)* K * np.exp(-r * T)
    return call

def vega(S, K, T, r, sigma):
    '''

    :param S: Asset price
    :param K: Strike price
    :param T: Time to Maturity
    :param r: risk-free rate (treasury bills)
    :param sigma: volatility
    :return: partial derivative w.r.t volatility
    '''

    ### calculating d1 from black scholes
    d1 = (np.log(S / K) + (r + sigma ** 2 / 2) * T) / sigma * np.sqrt(T)

    #see hull derivatives chapter on greeks for reference
    vega = S * N_prime(d1) * np.sqrt(T)
    return vega



def implied_volatility_call(C, S, K, T, r, tol=0.0001,
                            max_iterations=100):
    '''

    :param C: Observed call price
    :param S: Asset price
    :param K: Strike Price
    :param T: Time to Maturity
    :param r: riskfree rate
    :param tol: error tolerance in result
    :param max_iterations: max iterations to update vol
    :return: implied volatility in percent
    '''


    ### assigning initial volatility estimate for input in Newton_rap procedure
    sigma = 0.3
    
    for i in range(max_iterations):

        ### calculate difference between blackscholes price and market price with
        ### iteratively updated volality estimate
        diff = black_scholes_call(S, K, T, r, sigma) - C

        ###break if difference is less than specified tolerance level
        if abs(diff) < tol:
            print(f'found on {i}th iteration')
            print(f'difference is equal to {diff}')
            break

        ### use newton rapshon to update the estimate
        sigma = sigma - diff / vega(S, K, T, r, sigma)

    return sigma

In [3]:
import csv
with open('bank_50_option_365_CE.csv', 'r') as infile, open('SVM_train.csv', 'w', newline='') as outfile:
    reader = csv.reader(infile)
    writer = csv.writer(outfile)
    delete_column = -1  # index of the column you want to delete
    
    for row in reader:
        if row[delete_column] == '-':
            del row[delete_column]
        writer.writerow(row)

In [4]:
option_df = pd.read_csv('SVM_train.csv')

In [5]:
option_df

Unnamed: 0,Symbol,Date,Expiry,Option Type,Strike Price,Open,High,Low,Close,LTP,Settle Price,No. of contracts,Turnover in Lacs,Premium Turnover in Lacs,Open Int,Change in OI,Underlying Value
0,NIFTY,22-Dec-2022,29-Mar-2023,CE,21000,31.90,32.90,25.10,27.85,27.60,27.85,1274,13395.93,18.93,270300,270300,18127.35
1,NIFTY,22-Dec-2022,29-Mar-2023,CE,22000,13.95,16.40,12.05,12.60,12.25,12.60,837,9212.38,5.38,84650,84650,18127.35
2,NIFTY,22-Dec-2022,29-Mar-2023,CE,23000,10.15,10.15,7.70,9.40,9.40,12.90,19,218.58,0.08,17850,17850,18127.35
3,NIFTY,22-Dec-2022,29-Mar-2023,CE,24000,0.00,0.00,0.00,6.85,6.85,3.95,0,0.00,0.00,50,50,18127.35
4,NIFTY,22-Dec-2022,29-Mar-2023,CE,18000,903.45,903.50,780.00,797.05,789.10,797.05,2003,18842.68,815.68,474750,474750,18127.35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5111,NIFTY,24-Mar-2023,29-Mar-2023,CE,16900,234.00,257.45,129.20,140.65,133.00,140.65,397943,3400663.40,38045.05,1646550,1045300,16945.05
5112,NIFTY,24-Mar-2023,29-Mar-2023,CE,16950,201.40,218.70,102.55,113.60,106.45,113.60,340873,2914599.78,25701.10,1129750,997250,16945.05
5113,NIFTY,24-Mar-2023,29-Mar-2023,CE,17000,159.90,182.25,79.10,88.95,83.00,88.95,2267633,19422969.04,148088.54,7990700,5288900,16945.05
5114,NIFTY,24-Mar-2023,29-Mar-2023,CE,17050,138.35,148.60,58.00,68.65,63.20,68.65,1619854,13897769.39,88514.04,2264950,1892650,16945.05


In [10]:
Strike_price = list(option_df['Strike Price'])
Strike_price = [float(x) for x in Strike_price]
Stock_price = list(option_df['Underlying Value'])
Time_expiry = []
start_time = list(option_df['Date'])
end_time = list(option_df['Expiry'])
# option_price = list(option_df['LTP'])

start_time = [x.replace("-", ":") for x in start_time]
end_time = [x.replace("-", ":") for x in end_time]

dict_month = {
    "Jan":"01",
    "Feb":"02",
    "Mar":"03",
    "Apr":"04",
    "May":"05",
    "Jun":"06",
    "Jul":"07",
    "Aug":"08",
    "Sep":"09",
    "Oct":"10",
    "Nov":"11",
    "Dec":"12",
}
for x in start_time:
    z = x.split(":")
    y = z[0] + ":" + dict_month[z[1]] + ":" + z[2]
    start_time[start_time.index(x)] = y
for x in end_time:
    z = x.split(":")
    y = z[0] + ":" + dict_month[z[1]] + ":" + z[2]
    end_time[end_time.index(x)] = y

from datetime import datetime
start_time = [datetime.strptime(x, '%d:%m:%Y').date()
 for x in start_time]
end_time = [datetime.strptime(x, '%d:%m:%Y').date()
 for x in end_time]

for x, y in zip(end_time, start_time):
    tmp = x - y
    Time_expiry.append(tmp.days)

Time_expiry = [x/365.0 for x in Time_expiry]
Interst = [10/100.0] * len(Strike_price)



In [11]:
print (len(Strike_price), Strike_price)
print (len(Stock_price), Stock_price)
print (len(Time_expiry), Time_expiry)
print (len(option_price), option_price)


5116 [21000.0, 22000.0, 23000.0, 24000.0, 18000.0, 19000.0, 17000.0, 11000.0, 12000.0, 13000.0, 14000.0, 15000.0, 16000.0, 20000.0, 21000.0, 22000.0, 23000.0, 24000.0, 18000.0, 19000.0, 17000.0, 11000.0, 12000.0, 13000.0, 14000.0, 15000.0, 16000.0, 20000.0, 21000.0, 22000.0, 23000.0, 24000.0, 18000.0, 19000.0, 17000.0, 11000.0, 12000.0, 13000.0, 14000.0, 15000.0, 16000.0, 20000.0, 21000.0, 22000.0, 23000.0, 24000.0, 18000.0, 19000.0, 17000.0, 11000.0, 12000.0, 13000.0, 14000.0, 15000.0, 16000.0, 20000.0, 21000.0, 22000.0, 23000.0, 24000.0, 18000.0, 19000.0, 17000.0, 11000.0, 12000.0, 13000.0, 14000.0, 15000.0, 16000.0, 20000.0, 21000.0, 22000.0, 23000.0, 24000.0, 18000.0, 19000.0, 20000.0, 11000.0, 12000.0, 13000.0, 14000.0, 15000.0, 16000.0, 17000.0, 17100.0, 17150.0, 17200.0, 17250.0, 17300.0, 17350.0, 17400.0, 17450.0, 21000.0, 22000.0, 23000.0, 24000.0, 17500.0, 17550.0, 17600.0, 17650.0, 17700.0, 17750.0, 17800.0, 17850.0, 17900.0, 17950.0, 18000.0, 18050.0, 18100.0, 18150.0, 1820

In [12]:
IV = []
for C, S, k, T, r in zip(option_price, Stock_price, Strike_price, Time_expiry, Interst):
    iv = str(implied_volatility_call(C, S, k, T, r))
    iv = iv[:4]
    IV.append(float(iv))
    

found on 39th iteration
difference is equal to 7.509676597550197e-05
found on 73th iteration
difference is equal to 9.455519858647676e-05
found on 98th iteration
difference is equal to 8.981507179406378e-05
found on 8th iteration
difference is equal to 6.932644862445159e-05
found on 5th iteration
difference is equal to 2.2038761017029174e-05
found on 18th iteration
difference is equal to 6.16472587324779e-05
found on 52th iteration
difference is equal to 9.90705530057312e-05
found on 81th iteration
difference is equal to 8.985756875290463e-05
found on 5th iteration
difference is equal to 1.253521372746036e-05
found on 8th iteration
difference is equal to 3.0055860946731627e-05
found on 20th iteration
difference is equal to 8.343477747985162e-05


  d1 = (np.log(S / K) + (r + sigma ** 2 / 2) * T) / (sigma * np.sqrt(T))
  d1 = (np.log(S / K) + (r + sigma ** 2 / 2) * T) / sigma * np.sqrt(T)
  sigma = sigma - diff / vega(S, K, T, r, sigma)
  d1 = (np.log(S / K) + (r + sigma ** 2 / 2) * T) / (sigma * np.sqrt(T))
  d1 = (np.log(S / K) + (r + sigma ** 2 / 2) * T) / sigma * np.sqrt(T)


found on 25th iteration
difference is equal to 6.866860053378332e-05
found on 56th iteration
difference is equal to 9.45116547654834e-05
found on 81th iteration
difference is equal to 8.664562431093259e-05
found on 7th iteration
difference is equal to 3.274671360031789e-05
found on 6th iteration
difference is equal to 8.483663259539753e-05
found on 24th iteration
difference is equal to 6.141062940656639e-05
found on 54th iteration
difference is equal to 9.972430734705995e-05
found on 98th iteration
difference is equal to 9.561751434183918e-05
found on 8th iteration
difference is equal to 8.735218830224767e-05
found on 5th iteration
difference is equal to 8.712755516171455e-05
found on 21th iteration
difference is equal to 6.351967654438795e-05
found on 55th iteration
difference is equal to 8.513851366842573e-05
found on 8th iteration
difference is equal to 6.956937977520283e-05
found on 6th iteration
difference is equal to 8.342750334122684e-06
found on 21th iteration
difference is equ

  sigma = sigma - diff / vega(S, K, T, r, sigma)


found on 15th iteration
difference is equal to -7.16338640813774e-05
found on 83th iteration
difference is equal to -8.353922203241382e-05
found on 91th iteration
difference is equal to 9.183445394000955e-05
found on 57th iteration
difference is equal to 8.922273182188079e-05
found on 65th iteration
difference is equal to 8.710836476666373e-05
found on 77th iteration
difference is equal to 8.924372956453652e-05
found on 22th iteration
difference is equal to 6.839022617022295e-05
found on 29th iteration
difference is equal to 8.4300504886059e-05
found on 38th iteration
difference is equal to 7.603516721665926e-05
found on 48th iteration
difference is equal to 9.172917955346804e-05
found on 7th iteration
difference is equal to 2.680514080566354e-05
found on 9th iteration
difference is equal to 8.019498081779375e-05
found on 12th iteration
difference is equal to 8.199037511147367e-05
found on 16th iteration
difference is equal to 9.888006870895083e-05
found on 5th iteration
difference is 

In [13]:
f = open("SVM_train_02.csv", "w")

header = ["Strike", "Stock", "option_price", "IV"]

writer = csv.writer(f)

writer.writerow(header)


30

In [14]:
for strike, stock, option, iv in zip(Strike_price, Stock_price, option_price, IV):
    if (str(iv) != "nan"):
        if (str(iv) != "-inf"):
            ls = [strike, stock, option, iv]
            writer.writerow(ls)


In [15]:
f.close()

## Now lets built SVR

In [16]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [17]:
dataset = pd.read_csv('SVM_train_02.csv')

In [18]:
dataset

Unnamed: 0,Strike,Stock,option_price,IV
0,21000.0,18127.35,27.60,0.14
1,22000.0,18127.35,12.25,0.15
2,23000.0,18127.35,9.40,0.18
3,24000.0,18127.35,6.85,0.20
4,18000.0,18127.35,789.10,0.11
...,...,...,...,...
4239,16900.0,16945.05,133.00,0.12
4240,16950.0,16945.05,106.45,0.12
4241,17000.0,16945.05,83.00,0.12
4242,17050.0,16945.05,63.20,0.12


In [19]:
X = dataset[dataset.columns[dataset.columns.isin(['Strike','Stock'])]]
Y = dataset[dataset.columns[dataset.columns.isin(['IV'])]]

In [20]:
X

Unnamed: 0,Strike,Stock
0,21000.0,18127.35
1,22000.0,18127.35
2,23000.0,18127.35
3,24000.0,18127.35
4,18000.0,18127.35
...,...,...
4239,16900.0,16945.05
4240,16950.0,16945.05
4241,17000.0,16945.05
4242,17050.0,16945.05


In [21]:
Y

Unnamed: 0,IV
0,0.14
1,0.15
2,0.18
3,0.20
4,0.11
...,...
4239,0.12
4240,0.12
4241,0.12
4242,0.12


In [22]:

from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X = sc_X.fit_transform(X)
y = sc_y.fit_transform(Y)

# Removing NAN

In [23]:
import numpy as np
from sklearn.impute import SimpleImputer
imp = SimpleImputer(missing_values=np.nan, strategy='mean')
imp.fit(X)
X = imp.transform(X)

In [26]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X, y)

  y = column_or_1d(y, warn=True)


# Testing

In [25]:
strike_price = 16400
stock_price = 16951.70
IV_pred = regressor.pred(strike_price, stock_price)

AttributeError: 'SVR' object has no attribute 'pred'

In [None]:
pred = regressor.pred(16650, 16951.70, )
actual = 334