## Z-Score

In [25]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import plotly.graph_objects as go
from sklearn import linear_model

def adf_test(df,critical_value):
    p_value = adfuller(df)[1]
    
    if p_value < critical_value:
        print('P-Value of Spread: {}'.format(p_value))
        print("TimeSeries Data is Stationary")
        return True
    else:
        print('P-Value of Spread: {}'.format(p_value))
        print("TimeSeries Data is Non-Stationary")
        return False

def signal_level(spread, z_score):
    mean = spread.mean()
    std = spread.std()

    return z_score*std + mean

df = pd.read_excel("../Data/df_etf.xlsx",index_col=0)
df_copy = df.copy()

In [5]:
condition_kodex = [df_copy.columns[i] for i in range(df_copy.shape[1]) if 'KODEX' in df_copy.columns[i]]

df_copy_kodex = df_copy.loc[:, condition_kodex]

In [28]:
etf_1 = 'KODEX 코스피'
etf_2 = 'KODEX 코스닥150'

start_date = '2021-01-01'
end_date = '2021-08-01'

df_copy_kodex_sample = df_copy_kodex.loc[start_date:end_date,[etf_1,etf_2]].dropna()

model = linear_model.LinearRegression(fit_intercept=False)
model.fit(df_copy_kodex_sample[[etf_2]], df_copy_kodex_sample[[etf_1]])

spread = df_copy_kodex_sample[etf_1] - model.coef_[0]*df_copy_kodex_sample[etf_2]

print(adf_test(spread,critical_value=0.05))

data_mean=[spread.mean() for i in range(spread.shape[0])]


### upper level & under level

z_scroe = 1.6

upper_level = signal_level(spread=spread,z_score=z_scroe)
lower_level = signal_level(spread=spread, z_score=-z_scroe)

data_upper_level = [upper_level for i in range(spread.shape[0])]
data_lower_level = [lower_level for i in range(spread.shape[0])]

trace1 = go.Scatter(x=spread.index,
                   y=spread,
                   mode='lines',
                   name='Spread')
trace2 = go.Scatter(x=spread.index,
                    y=data_mean,
                    mode='lines',
                    name='Spread Mean')
trace3 = go.Scatter(x=spread.index,
                    y= data_upper_level,
                    mode='lines',
                    name='Upper Level')
trace4 = go.Scatter(x=spread.index,
                    y= data_lower_level,
                    mode='lines',
                    name='Lower Level')
layout = go.Layout(title = 'Spread of {} & {}'.format(etf_1, etf_2))
fig = go.Figure(data = [trace1, trace2,trace3,trace4], layout = layout)
fig.update_layout(template = 'plotly_dark',
                 width = 1000,
                 height = 600)
fig.show()

P-Value of Spread: 0.015095139812408447
TimeSeries Data is Stationary
True
