About Stationary

## Stationary Example

- Stationary란 시간이 지나도 평균과 분산이 일정한 것을 의미.

In [29]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go

In [30]:
def norm_dist_data(mu,sigma):
    return np.random.normal(mu,sigma)

In [31]:
# 평균, 분산이 일정한 정규분포에서 데이터를 생성

N =1000
mu = 0
sigma = 5

data_list = [norm_dist_data(mu,sigma) for i in range(N)]
data_index = [i for i in range(N)]
data_mean = np.sum(data_list)/ len(data_list)
data_mean_list = [data_mean for i in range(N)]

df = pd.DataFrame(data_list)
df.index = data_index
df.columns = ['data']
df['Rolling mean'] = df['data'].rolling(window=30).mean()


trace1=go.Scatter(x=data_index,
           y=data_list,
           mode='lines',
           name='Normal Dist Data')

trace2 = go.Scatter(x=data_index,
                    y=data_mean_list,
                    mode='lines',
                    name='Average')

trace3 = go.Scatter(x=df.index,
                    y=df['Rolling mean'],
                    mode='lines',
                    name='Rolling Mean')


layout=go.Layout(title='Normal Dist Data')

fig = go.Figure([trace1,trace2,trace3], layout)
fig.show()

## Non-Stationary Data
- Non-Stationary란 시간이 지나면서 평균과 분산이 일정하지 않는것.
- Trend(추세) 혹은 Seasonality(계절성)이 내포되어 있는 경우 시간이 지나면서 평균과 분산이 변함

In [32]:
N = 100
mu = 0
sigma = 5

data_list = [norm_dist_data(mu+i,sigma) for i in range(N)]  # 평균이 증가하는 데이터를 뽑음 -> 정상성을 만족하지 못함
data_index = [i for i in range(N)]
data_mean = np.sum(data_list)/ len(data_list)
data_mean_list = [data_mean for i in range(N)]

df = pd.DataFrame(data_list)
df.index = data_index
df.columns = ['data']
df['Rolling mean'] = df['data'].rolling(window=30).mean()


trace1=go.Scatter(x=data_index,
                y=data_list,
                mode='lines',
                name='Normal Dist Data')

trace2 = go.Scatter(x=data_index,
                    y=data_mean_list,
                    mode='lines',
                    name='Average')

trace3 = go.Scatter(x=df.index,
                    y=df['Rolling mean'],
                    mode='lines',
                    name='Rolling Mean')


layout=go.Layout(title='Normal Dist Data')

fig = go.Figure([trace1,trace2,trace3], layout)
fig.show()

## KOSPI Data

In [33]:
df=pd.read_excel('../Data/df_etf.xlsx',index_col=0)

In [34]:
df_copy = df.copy()

start_date = '2020-01-01'
end_date = '2021-01-01'

kodex_200 = df_copy.loc[start_date:end_date,['KODEX 200']]
kodex_200['Rolling 30 Mean'] = kodex_200['KODEX 200'].rolling(window=30).mean()
 
trace1 = go.Scatter(x=kodex_200.index,
                    y=kodex_200['KODEX 200'],
                    mode='lines',
                    name='Close Price')

trace2 = go.Scatter(x=kodex_200.index,
                    y=kodex_200['Rolling 30 Mean'],
                    mode='lines',
                    name='Rolling 30 Mean Price')

layout = go.Layout(title='KODEX 200')

fig=go.Figure([trace1,trace2],layout)
fig.show()

In [35]:
kodex_200['Diff(1)'] = kodex_200['KODEX 200'] - kodex_200['KODEX 200'].shift(1)
kodex_200['Diff(1) Rolling Mean'] = kodex_200['Diff(1)'].rolling(window=30).mean()
kodex_200

Unnamed: 0_level_0,KODEX 200,Rolling 30 Mean,Diff(1),Diff(1) Rolling Mean
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-02,28213,,,
2020-01-03,28209,,-4.0,
2020-01-06,27993,,-216.0,
2020-01-07,28275,,282.0,
2020-01-08,28095,,-180.0,
...,...,...,...,...
2020-12-23,36310,34931.900000,509.0,130.800000
2020-12-24,37033,35091.966667,723.0,160.066667
2020-12-28,37220,35247.066667,187.0,155.100000
2020-12-29,37852,35400.100000,632.0,153.033333


In [36]:
trace1 = go.Scatter(x=kodex_200.index,
                   y=kodex_200['Diff(1)'],
                   mode='lines',
                   name='KODEX 200 Diff(1)')
trace2 = go.Scatter(x=kodex_200.index,
                   y=kodex_200['Diff(1) Rolling Mean'],
                   mode='lines',
                   name='KODEX 200 Diff(1) Rolling 30 Mean')

layout = go.Layout(title='KODEX 200 Diff(1)')

fig = go.Figure([trace1,trace2],layout)
fig.show()