# imports

In [1]:
import pandas as pd
import numpy as np; np.random.seed(0)
import matplotlib.pyplot as plt; plt.rcParams['font.family'] = 'Malgun Gothic'
import seaborn as sns; sns.set_theme(font='Malgun Gothic')

import warnings; warnings.filterwarnings(action='ignore')
pd.set_option('display.max_rows', 100, 'display.max_columns', 100, 'max_colwidth', None)

# read

In [127]:
kospi  = pd.read_csv('./input/kospi.csv')  # 코스피지수
nasdaq = pd.read_csv('./input/nasdaq.csv') # 나스닥지수
sp500  = pd.read_csv('./input/sp500.csv')  # S&P500지수

cd     = pd.read_csv('./input/cd.csv')     # 일별 CD금리
exc    = pd.read_csv('./input/exc.csv')    # 일별 원-달러 환율
agg    = pd.read_csv('./input/agg.csv')    # AGG 미 국채

# 자산별 학습 데이터 가공

In [3]:
def makeTrain(df):
    
    """학습 데이터프레임 구성"""
    
    df = df[['Date', 'Close']]
    
    # '2003-07-28' 이후 데이터 학습
    df = df[df['Date'] > '2003-07-28']
    
    # 30일 이동평균선
    df['MA30'] = df['Close'].rolling(window=30).mean()
    
    # 30일 모멘텀
    df['Mom30'] = df['Close'].diff(30)
    
    # 과거 30일 표준편차
    df['STD30'] = df['Close'].rolling(window=30).std(ddof=0)
    
    # target : 30일 후 변동성
    df['target_STD30'] = df['STD30'].shift(-30)
    
    # dropna
    df.dropna(axis=0, how='any', inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    return df

# create window

In [4]:
def createWindow(data, seqeuence_length=30, seqeuence_stride=1):
    
    result = []
    for index in range(len(data) - (seqeuence_length + seqeuence_stride)):
        result.append(data[index: index + (seqeuence_length + seqeuence_stride)])
    
    return result

# 정규화

In [5]:
# 모든 컬럼에 적용해야 할지 몰라서 일단 보류

def normalizeWindow(data):
    
    normalized_data = []
    for window in data:
        normalized_window = []
        for i in window:
            result = float(i) / float(window[0]) - 1
            normalized_window.append(result)
        normalized_data.append(normalized_window)
    
    return np.array(normalized_data)

# 자산별 변동성

In [6]:
from keras.models import Sequential
from keras.layers import Dense, LSTM

def fitSequence(data, train_size=3600):
    
    X_train = data[:train_size, :-1]
    X_test  = data[train_size:, :-1]
    y_train = data[:train_size, -1]
    y_test  = data[train_size:, -1]
    
    model = Sequential()
    
    model.add(LSTM(units=30, return_sequences=True, input_shape=(30, 5)))
    model.add(Dense(units=1))
    
    model.compile(loss='mse', optimizer='adam')
    
    history = model.fit(
        X_train, y_train, 
        validation_data=(X_test, y_test), 
        batch_size=50, epochs=50
    )
    
    return history

# 벤치마크 포트폴리오

In [156]:
kospi_weight  = .25
nasdaq_weight = .35
sp500_weight  = .40
cd_weight     = .00

for df in [kospi, nasdaq, sp500, cd]:
    df['Date'] = pd.to_datetime(df['Date'])

benchmark_portfolio = pd.DataFrame(
    {'Date': pd.date_range(start='2003-07-29', end='2021-11-05')}
)

benchmark_portfolio = benchmark_portfolio.merge(kospi[['Date', 'Close']], how='left', on='Date')
benchmark_portfolio = benchmark_portfolio.merge(nasdaq[['Date', 'Close']], how='left', on='Date')
benchmark_portfolio = benchmark_portfolio.merge(sp500[['Date', 'Close']], how='left', on='Date')
benchmark_portfolio = benchmark_portfolio.merge(cd[['Date', 'CD_interest']], how='left', on='Date')

benchmark_portfolio.columns = ['Date', 'kospi', 'nasdaq', 'sp500', 'cd']
benchmark_portfolio.dropna(axis=0, how='any', inplace=True)

benchmark_portfolio['kospi_change']  = benchmark_portfolio['kospi'].pct_change()
benchmark_portfolio['nasdaq_change'] = benchmark_portfolio['nasdaq'].pct_change()
benchmark_portfolio['sp500_change']  = benchmark_portfolio['sp500'].pct_change()
benchmark_portfolio['cd_change']     = benchmark_portfolio['cd'].pct_change()

benchmark_portfolio['kospi_base']  = benchmark_portfolio['kospi'] / 722.33 * 1000
benchmark_portfolio['nasdaq_base'] = benchmark_portfolio['nasdaq'] / 8.55 * 1000
benchmark_portfolio['sp500_base']  = benchmark_portfolio['sp500'] / 989.28 * 1000
benchmark_portfolio['cd_base']     = benchmark_portfolio['cd'] / 3.98 * 1000

# (비중 * 변동률)의 합 = 전체 변동률
benchmark_portfolio['benchmark_change'] = benchmark_portfolio['kospi_change'] * kospi_weight + benchmark_portfolio['nasdaq_change'] * nasdaq_weight + benchmark_portfolio['sp500_change'] * sp500_weight + benchmark_portfolio['cd_change'] * cd_weight
benchmark_portfolio.loc[0, 'benchmark_change'] = 0

benchmark_portfolio['bench_cumprod'] = (1 + benchmark_portfolio['benchmark_change']).cumprod()

benchmark_portfolio

Unnamed: 0,Date,kospi,nasdaq,sp500,cd,kospi_change,nasdaq_change,sp500_change,cd_change,kospi_base,nasdaq_base,sp500_base,cd_base,benchmark_change,bench_cumprod
0,2003-07-29,722.33,8.55,989.28,3.98,,,,,1000.000000,1000.000000,1000.000000,1000.000000,0.000000,1.000000
1,2003-07-30,714.15,8.60,987.49,3.97,-0.011324,0.005848,-0.001809,-0.002513,988.675536,1005.847953,998.190603,997.487437,-0.001508,0.998492
2,2003-07-31,713.52,8.60,990.31,3.96,-0.000882,0.000000,0.002856,-0.002519,987.803359,1005.847953,1001.041161,994.974874,0.000922,0.999412
3,2003-08-01,727.26,8.50,980.15,3.95,0.019257,-0.011628,-0.010259,-0.002525,1006.825135,994.152047,990.771066,992.462312,-0.003359,0.996055
6,2003-08-04,718.54,8.25,982.82,3.94,-0.011990,-0.029412,0.002724,-0.002532,994.753091,964.912281,993.469998,989.949749,-0.012202,0.983901
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6670,2021-11-01,2978.94,209.90,4613.67,1.12,0.002781,0.000143,0.001800,0.000000,4124.070716,24549.707602,4663.664483,281.407035,0.001465,10.872104
6671,2021-11-02,3013.49,211.22,4630.65,1.12,0.011598,0.006289,0.003680,0.000000,4171.902039,24704.093567,4680.828481,281.407035,0.006573,10.943563
6672,2021-11-03,2975.71,209.48,4660.57,1.12,-0.012537,-0.008238,0.006461,0.000000,4119.599075,24500.584795,4711.072699,281.407035,-0.003433,10.905994
6673,2021-11-04,2983.22,212.27,4680.06,1.13,0.002524,0.013319,0.004182,0.008929,4129.995985,24826.900585,4730.773896,283.919598,0.006965,10.981957


In [None]:
# 자산의 연도별 종가

def getYearlyClose(df):
    
    result = pd.DataFrame(columns=['Date', 'Close', 'MA30', 'Mom30', 'STD30', 'target_STD30'])
    for year in [str(i) for i in range(2008, 2018)]:
        last_date = df[df['Date'] < year].tail(1)
        result = pd.concat([result, last_date])
    
    result['pct_change'] = result['Close'].pct_change()
    
    return result

In [None]:
kospi_yearly_close  = getYearlyClose(kospi)
nasdaq_yearly_close = getYearlyClose(nasdaq)
sp500_yearly_close  = getYearlyClose(sp500)

In [None]:
kospi_weight  = .25
nasdaq_weight = .40
sp500_weight  = .35

glide_path = pd.DataFrame(
    {
        'year': [str(year) for year in range(2008, 2018)], 
        'risk_weight': [1., 1., 1., .95, .88, .83, .75, .68, .61, .55], 
        'safe_weight': [0., 0., 0., .05, .12, .17, .25, .32, .39, .45], 
    }
)

glide_path['kospi_weigth']  = glide_path['risk_weight'] * kospi_weight
glide_path['nasdaq_weigth'] = glide_path['risk_weight'] * nasdaq_weight
glide_path['sp500_weigth']  = glide_path['risk_weight'] * sp500_weight

In [None]:
glide_path



In [None]:
gp = pd.DataFrame(
    {
        '연도': [str(i) for i in range(2008, 2018)], 
        '위험': [1., 1., 1., .95, .88, .83, .75, .68, .61, .55], 
        '비위험': [0, 0, 0, .05, .12, .17, .25, .32, .39, .45]
    }
)

gp['kospi_weight']  = .25
gp['nasdaq_weight'] = .40
gp['sp500_weight']  = .35

gp['kospi_asset'] = np.nan
gp['nasdaq_asset'] = np.nan
gp['sp500_asset'] = np.nan

gp['kospi_last_asset'] = np.nan
gp['nasdaq_last_asset'] = np.nan
gp['sp500_last_asset'] = np.nan

gp.loc[0, 'kospi_asset']  = 10000 * .25
gp.loc[0, 'nasdaq_asset'] = 10000 * .40
gp.loc[0, 'sp500_asset']  = 10000 * .35

In [None]:
gp

In [None]:
# 3, 6, 9, 12 : kospi weight 3/ asset 6/ last_asset 9/ pct 12
for i in range(9):
    
    gp.loc[i + 1, 'kospi_last_asset'] = gp.loc[i, 'kospi_asset'] * (1 + gp.loc[i + 1, 'kospi_pct'])
    gp.loc[i + 1, 'nasdaq_last_asset'] = gp.loc[i, 'nasdaq_asset'] * (1 + gp.loc[i + 1, 'nasdaq_pct'])
    gp.loc[i + 1, 'sp500_last_asset'] = gp.loc[i, 'sp500_asset'] * (1 + gp.loc[i + 1, 'sp500_pct'])
    
    total = gp.loc[i + 1, 'kospi_last_asset'] + gp.loc[i + 1, 'nasdaq_last_asset'] + gp.loc[i + 1, 'sp500_last_asset'] 
    
    gp.loc[i + 1, 'kospi_asset']  = total * gp.loc[i + 1, 'kospi_weight']
    gp.loc[i + 1, 'nasdaq_asset'] = total * gp.loc[i + 1, 'nasdaq_weight']
    gp.loc[i + 1, 'sp500_asset']  = total * gp.loc[i + 1, 'sp500_weight']

In [None]:
gp

In [None]:
kospi[kospi['Date'] < '2008'].tail(1)

In [None]:
cd.head(100)

In [None]:
window = 30

result_kospi = []

for index in range(kospi_temp.shape[0] - window - 1):
    result_kospi.append(kospi_temp[index : index + window + 1])

In [None]:
window = 30

result_sp500 = []

for index in range(kospi_temp.shape[0] - window - 1):
    result_sp500.append(kospi_temp[index : index + window + 1])

In [None]:
result_kospi[0].shape, result_sp500[0].shape, len(result_kospi)

In [None]:
normalize_kospi = []

for window_kospi in result_kospi:
    normalize_kospi.append([((float(p) / float(window_kospi[0])) - 1) for p in window_kospi])

normalize_kospi = np.array(normalize_kospi)

In [None]:
normalize_kospi.shape, normalize_kospi[0].shape

In [None]:
normalize_sp500 = []

for window_sp500 in result_sp500:
    normalize_sp500.append([((float(p) / float(window_sp500[0])) - 1) for p in window_sp500])

normalize_sp500 = np.array(normalize_sp500)

In [None]:
normalize_sp500.shape, normalize_sp500[0].shape

In [None]:
kospi_train = normalize_kospi[:552, :]
np.random.shuffle(kospi_train)

X_train = kospi_train[:, :-1]
X_train.shape

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_train.shape

In [None]:
y_train = kospi_train[:, -1]

In [None]:
kospi_train = normalize_kospi[552:, :-1]

X_test = kospi_train[:, :-1]
X_test.shape

In [None]:
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
X_test.shape

In [None]:
y_test = normalize_kospi[552:, -1]
y_test.shape

In [None]:
X_train.shape, X_test.shape

In [None]:
from keras.models import Sequential
from keras.layers import *

model = Sequential()

model.add(LSTM(units=30, return_sequences=True, input_shape=(30, 1)))
model.add(LSTM(units=15, return_sequences=False))
model.add(Dense(units=1, activation='linear')) # relu보다 잘나오는데 이유 모름

model.compile(loss='mse', optimizer='rmsprop') # adam이랑 비슷함
model.summary()

In [None]:
history = model.fit(
    X_train, y_train, 
    validation_data=(X_test, y_test), 
    batch_size=10, 
    epochs=50
)

In [None]:
pred = model.predict(X_test)

fig = plt.figure(facecolor='white', figsize=(20, 10))
ax = fig.add_subplot(111)
ax.plot(y_test, label='True')
ax.plot(pred, label='Prediction')
ax.legend()
plt.show()

# 위험자산 포트폴리오 변동성