In [None]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(1,"/core/github/cryptoderiv-quant-lib")

from cdcqr.analytics.derivatives.vol_fitting.smoothing.utils import cumsum_getTEvents, fit_fun2
from scipy.interpolate import interp1d

from cryptoderiv_quantlib import VolModels
from cryptoderiv_quantlib import VolCurve, VolSurface, StickyOptions

from cdcqr.analytics.derivatives.vol_fitting.smoothing import kalman_filter as kf
from numpy.linalg import inv

In [None]:
from importlib import reload

In [None]:
MODEL_TO_TEST = "parabolic_linear6"
model = VolModels.vol_model(MODEL_TO_TEST)

In [None]:
df0 =  pd.read_pickle('/core/data/bt_spikeDec_MAR22.pkl')

In [None]:
df0.head(2).T

In [None]:
df0

In [None]:
df = pd.read_pickle('/core/data/fitted_vol_1.pkl')

### 1) use vol_atm --> find rolling mean and std --> apply cusum

In [None]:
print(df.shape)
print(df.columns)
lbw = 50

In [None]:
df['ma_localvol_atm'] = df['localvol_atm'].rolling(lbw).mean()
df['rstd_localvol_atm'] = df['localvol_atm'].rolling(lbw).std()
df['zscore_localvol_atm'] = (df['localvol_atm']-df['ma_localvol_atm'])/df['rstd_localvol_atm']

### 1.1) test a range of threshold for CUMSUM algo

In [None]:
h2events_ratio = {}
n = df.shape[0] - lbw
for h in np.linspace(0.5, 5, 100):
    m = len(cumsum_getTEvents(df['zscore_localvol_atm'], h))
    r = m/n
    h2events_ratio[h] = r

In [None]:
df_h2events_ratio = pd.DataFrame(h2events_ratio, index=[0]).T.reset_index()
df_h2events_ratio.columns = ['cumsum_threshold', 'event_ratio']
df_h2events_ratio = df_h2events_ratio.set_index('cumsum_threshold')
df_h2events_ratio.plot()

#### 1.2) take h = 3 for example

In [None]:
h = 3
interp1d(df_h2events_ratio.index, df_h2events_ratio['event_ratio'])([3])[0]

In [None]:
cumsum_event_index = cumsum_getTEvents(df['zscore_localvol_atm'], h)
df['cumsum_event'] = False
df.loc[cumsum_event_index, ['cumsum_event']]=True
df['cumsum_event'].iloc[0]=True

In [None]:
df['cumsum_curve'] = df['localcurve']
df['cumsum_curve'] = df['cumsum_curve'].where(df['cumsum_event'], np.nan)
df['cumsum_curve'] = df['cumsum_curve'].ffill()

#### 1.3) apply metric to check is the CUMSUM algo with h = 3 is good or not?

In [None]:
error =  df[['cumsum_curve','df']].apply(lambda x: fit_fun2(*x), axis=1).mean(1)

In [None]:
df.shape

In [None]:
display(error.mean())

### 2) at each time t, use the time series of mid iv of each strke = miv = (biv+aiv)/2 --> find the mean and std --> aggregate the z value --> apply cusum (or some variation of this)

In [None]:
df0 = pd.read_pickle('/core/data/vol_smoothing_base_1.pkl')

In [None]:
display(df0.head(2))
display(df.head(2))

In [None]:
df1 = pd.merge(left=df, right=df0[['tm','aiv','biv',]], left_on=['t'], right_on=['tm'], how='left').groupby('t').first().reset_index()

In [None]:
df1['miv'] = 0.5*(df1['biv'] + df1['aiv'])

In [None]:
df1['ma_miv'] = df1['miv'].rolling(lbw).mean()
df1['rstd_miv'] = df1['miv'].rolling(lbw).std()
df1['zscore_miv'] = (df1['miv']-df1['ma_miv'])/df1['rstd_miv']

In [None]:
cumsum_event_index = cumsum_getTEvents(df1['zscore_miv'], h)
df1['cumsum_event'] = False
df1.loc[cumsum_event_index, ['cumsum_event']]=True
df1['cumsum_event'].iloc[0]=True

In [None]:
df1['cumsum_curve'] = df1['localcurve']
df1['cumsum_curve'] = df1['cumsum_curve'].where(df1['cumsum_event'], np.nan)
df1['cumsum_curve'] = df1['cumsum_curve'].ffill()

#### apply metric to check is the CUMSUM algo with h = 3 is good or not?

In [None]:
error =  df1[['cumsum_curve','df']].apply(lambda x: fit_fun2(*x), axis=1).mean(1)

In [None]:
display(error.mean())

### 3) at each time t, use the spread = aiv - biv as a measure of std (maybe spread = 2 std) --> use miv as the mean --> z value --> aggregate --> apply cusum

In [None]:
df2 = df1.copy()
df2['iv_range'] = df2['biv'] - df2['aiv']
df2['normalized_miv'] = (df2['miv']-df2['ma_miv'])/df2['iv_range']
# try more methods

In [None]:
cumsum_event_index = cumsum_getTEvents(df2['normalized_miv'], h)
df2['cumsum_event'] = False
df2.loc[cumsum_event_index, ['cumsum_event']]=True
df2['cumsum_event'].iloc[0]=True

In [None]:
df2['cumsum_curve'] = df2['localcurve']
df2['cumsum_curve'] = df2['cumsum_curve'].where(df2['cumsum_event'], np.nan)
df2['cumsum_curve'] = df2['cumsum_curve'].ffill()

#### apply metric to check is the CUMSUM algo with h = 3 is good or not?
     * plot w.r.t time
     * confluence

In [None]:
error2 =  df2[['cumsum_curve','df']].apply(lambda x: fit_fun2(*x), axis=1).mean(1)

In [None]:
error2.plot()

In [None]:
error2.mean()

## Kalman filter smoothing

In [None]:
reload(kf)

In [None]:
kf.simple_kalman_filter?

In [None]:
pre_params = df['localcurve'].iloc[0].params
pre_x = np.array([1, pre_params['skew_left'], pre_params['skew_right'], pre_params['conv_left'], pre_params['conv_right'], pre_params['rightwing'], pre_params['leftwing']])
pre_x

In [None]:
df1 = df['df'][1]

In [None]:
kright = 1.5
kleft = -1.5

In [None]:
S = df1['s'].iloc[0]

volATM = interp1d(df1.index, df1['m'])([S])[0]

In [None]:
df1.head(5)

In [None]:
# observations
z = df1['m'].values

In [None]:
# transformation matrix

df1['col0'] = volATM
df1['col1'] = (df1['logKFtau']<0).astype(int)*kleft
df1['col2'] = (df1['logKFtau']>0).astype(int)*kright
df1['col3'] = (df1['logKFtau']<0).astype(int)*kleft**2
df1['col4'] = (df1['logKFtau']>0).astype(int)*kright**2
df1['col5'] = (df1['logKFtau']>kright).astype(int)*(df1['logKFtau']-kright)
df1['col6'] = (df1['logKFtau']<kleft).astype(int)*(df1['logKFtau']-kleft)

h_mat = df1[['col0','col1','col2','col3','col4','col5','col6']].values

In [None]:
kalman_vc = VolCurve()

In [None]:
df['localcurve'][0].params

In [None]:
df_parames = df[['localskew_left','localskew_right','localconv_left','localconv_right','localrightwing','localleftwing']]#['localcurve'][10].params

In [None]:
df_parames.describe()

In [None]:
reload(kf)
display(pre_x)
sigma = 0.001
kf_params = kf.simple_kalman_filter(z, pre_x, sigma, h_mat, sigma)

In [None]:
df['kfcurve'] = None
df['kfcurve'].iloc[0] = df['localcurve'].iloc[0]

localcurve0 =  df['localcurve'].iloc[0]
pre_params = localcurvei.params
pre_x = np.array([1, pre_params['skew_left'], pre_params['skew_right'], pre_params['conv_left'], pre_params['conv_right'], pre_params['rightwing'], pre_params['leftwing']])
sigma = 0.001 # using spread informaiton

for i in range(1, df.shape[0]):        
    print('---------------------------', i, '---------------------------')
    
    df1 = df['df'].iloc[i]
    
    pre_params = df['kfcurve'].iloc[i-1].params
    kleft = pre_params['kleft']
    kright = pre_params['kright']
    
    S = df1['s'].iloc[0]
    volATM = interp1d(df1.index, df1['m'])([S])[0]
    df1['col0'] = volATM
    df1['col1'] = (df1['logKFtau']<0).astype(int)*kleft
    df1['col2'] = (df1['logKFtau']>0).astype(int)*kright
    df1['col3'] = (df1['logKFtau']<0).astype(int)*kleft**2
    df1['col4'] = (df1['logKFtau']>0).astype(int)*kright**2
    df1['col5'] = (df1['logKFtau']>kright).astype(int)*(df1['logKFtau']-kright)
    df1['col6'] = (df1['logKFtau']<kleft).astype(int)*(df1['logKFtau']-kleft)
    h_mat = df1[['col0','col1','col2','col3','col4','col5','col6']].values
    
 
    pre_x = np.array([1, pre_params['skew_left'], pre_params['skew_right'], pre_params['conv_left'], pre_params['conv_right'], pre_params['rightwing'], pre_params['leftwing']])
    z = df1['m'].values
    kf_params = kf.simple_kalman_filter(z, pre_x, sigma, h_mat, sigma)

    print(pre_x)
    print(kf_params)
    
    lc_i = df['localcurve'].iloc[i]
    kfc_param_dict = lc_i.params
    kfc_param_dict['skew_left'] = kf_params[1]
    kfc_param_dict['skew_right'] = kf_params[2]
    kfc_param_dict['conv_left'] = kf_params[3]
    kfc_param_dict['conv_right'] = kf_params[4]
    kfc_param_dict['rightwing'] = kf_params[5]
    kfc_param_dict['leftwing'] = kf_params[6]
    kfc = VolCurve(lc_i.expiry_time_ms, kfc_param_dict)
    
    df['kfcurve'].iloc[i] = kfc

In [None]:
df

#### apply metric to check if the Kalman filter with sigma=0.001 is good or not?
     * plot w.r.t time
     * confluence

In [None]:
error3 =  df[['kfcurve','df']].apply(lambda x: fit_fun2(*x), axis=1).mean(1)

In [None]:
error3.plot()

In [None]:
error3.mean()

In [None]:
error3.discribe()