In [9]:
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import plotly.graph_objects as go
from sklearn import linear_model

# ETF Data 불러오기 & KODEX ETF 선정

df = pd.read_excel('../Data/df_etf.xlsx', index_col = 0)
df_copy = df.copy()

condition_kodex = [df_copy.columns[i] for i in range(df_copy.shape[1]) if 'KODEX' in df_copy.columns[i]]

df_copy_kodex = df_copy.loc[:, condition_kodex]

def adf_test(df,critical_value):
    p_value = adfuller(df)[1]
    
    if p_value < critical_value:
        print('P-Value of Spread: {}'.format(p_value))
        print("TimeSeries Data is Stationary")
        return True
    else:
        print('P-Value of Spread: {}'.format(p_value))
        print("TimeSeries Data is Non-Stationary")
        return False

In [12]:
etf_1 = 'KODEX 코스피'
etf_2 = 'KODEX 반도체'
etf_3 = 'KODEX 2차전지산업'

start_date = '2020-01-01'
end_date = '2020-06-30'

df_copy_kodex_sample = df_copy_kodex.loc[start_date:end_date,[etf_1, etf_2, etf_3]].dropna()

X = df_copy_kodex_sample[[etf_2, etf_3]]
y = df_copy_kodex_sample[[etf_1]]

model = linear_model.LinearRegression(fit_intercept = False)

model.fit(X, y)

spread = df_copy_kodex_sample[etf_1] - model.coef_[0][0] * df_copy_kodex_sample[etf_2] - model.coef_[0][1] * df_copy_kodex_sample[etf_3]

adf_test(df = spread, critical_value = 0.05)

data_mean = [spread.mean() for i in range(spread.shape[0])]

trace1 = go.Scatter(x = spread.index,
                   y = spread,
                   mode = 'lines',
                   name = 'Spread')

trace2 = go.Scatter(x = spread.index,
                   y = data_mean,
                   mode = 'lines',
                   name = 'Spread Mean')
layout = go.Layout(title = 'Spread of {} & ({} & {})'.format(etf_1, etf_2, etf_3))
fig = go.Figure(data = [trace1, trace2], layout = layout)
fig.update_layout(template = 'plotly_white',
                 width = 1000,
                 height = 600)
fig.show()

P-Value of Spread: 0.026626956102771347
TimeSeries Data is Stationary


In [13]:
etf_1 = 'KODEX 코스피'
etf_2 = 'KODEX 반도체'

start_date = '2020-01-01'
end_date = '2020-06-30'

df_copy_kodex_sample = df_copy_kodex.loc[start_date:end_date,[etf_1, etf_2, etf_3]].dropna()

X = df_copy_kodex_sample[[etf_2]]
y = df_copy_kodex_sample[[etf_1]]

# Create Model
model = linear_model.LinearRegression(fit_intercept = False)

# Fitting
model.fit(X = X,y = y)

model.coef_

spread = df_copy_kodex_sample[etf_1] - model.coef_[0][0] *df_copy_kodex_sample[etf_2]
adf_test(df = spread, critical_value = 0.05)

data_mean = [spread.mean() for i in range(spread.shape[0])]

trace1 = go.Scatter(x = spread.index, 
                   y = spread,
                   mode ='lines',
                   name = 'Spread')
trace2 = go.Scatter(x = spread.index,
                   y = data_mean,
                   mode = 'lines',
                   name = 'Spread Mean')
layout = go.Layout(title = 'Spread of {} & {}'.format(etf_1, etf_2),
                  yaxis = {'tickformat':','})
fig = go.Figure(data= [trace1, trace2], layout = layout)
fig.update_layout(template = 'plotly_dark',
                 width = 1000,
                 height = 600)
fig.show()


P-Value of Spread: 0.04074857677179322
TimeSeries Data is Stationary
