<a href="https://colab.research.google.com/github/yeonghun00/finance-machine-learning-notes/blob/main/yield%20curve%20prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3144622

예측값: 
- 1M, 5Y, 30Y 만기 수익률

예측 변수:
- 1M, 3M, 1Y, 2Y, 5Y, 7Y, 10Y, 30Y 만기 수익률
- 공공, 외국정부, 연방준비금 부채비율
- 10년 국채 금리 대비 Baa 등급 부채 기업 스프레드

In [115]:
import requests
from io import StringIO
import numpy as np
import pandas as pd
from datetime import datetime

def get_fred(id, start_date='2000-01-01'):
  dic = {}
  if type(id) == str:
    id = [id]
  for i in id:
    url = 'https://fred.stlouisfed.org/graph/fredgraph.csv?id=' + i + '&revision_date=' + datetime.today().strftime('%Y-%m-%d') + '&nd=1901-01-01'
    r = requests.get(url)
    s = str(r.content,'utf-8')
    data = StringIO(s) 
    df = pd.read_csv(data)
    df['DATE'] = pd.to_datetime(df['DATE'], format='%Y-%m-%d')
    df[i] = [np.nan if x=='.' else float(x) for x in df[i]]
    df = df.set_index('DATE')
    df = df.rename(columns={i: "data"})
    df = df.ffill()
    dic[i] = df[df.index > start_date]
  return pd.concat([dic[x] for x in dic], axis=1)

In [116]:
tickers = ['DGS1MO', 'DGS3MO', 'DGS1', 'DGS2', 'DGS5', 'DGS7', 'DGS10', 'DGS30', 
           'TREAST', 'FYGFDPUN', 'FDHBFIN', 'GFDEBTN', 'BAA10Y']
data = get_fred(tickers)
data.columns = tickers

In [127]:
data

Unnamed: 0_level_0,DGS1MO,DGS3MO,DGS1,DGS2,DGS5,DGS7,DGS10,DGS30,TREAST,FYGFDPUN,FDHBFIN,GFDEBTN,BAA10Y
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2000-01-03,,5.48,6.09,6.38,6.50,6.65,6.58,6.61,,,,,1.69
2000-01-04,,5.43,6.00,6.30,6.40,6.56,6.49,6.53,,,,,1.72
2000-01-05,,5.44,6.05,6.38,6.51,6.68,6.62,6.64,,,,,1.67
2000-01-06,,5.41,6.03,6.35,6.46,6.63,6.57,6.58,,,,,1.67
2000-01-07,,5.38,6.00,6.31,6.42,6.58,6.52,6.55,,,,,1.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-12-13,3.89,4.35,4.64,4.22,3.66,3.60,3.51,3.53,,,,,1.91
2022-12-14,3.91,4.33,4.64,4.23,3.64,3.59,3.49,3.52,5513754.0,,,,1.95
2022-12-15,3.95,4.34,4.65,4.23,3.62,3.56,3.44,3.48,,,,,1.97
2022-12-16,3.94,4.31,4.61,4.17,3.61,3.58,3.48,3.53,,,,,1.98


In [128]:
return_period = 5

In [131]:
data[['DGS1MO', 'DGS5', 'DGS30']]

Unnamed: 0_level_0,DGS1MO,DGS5,DGS30
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2000-01-03,,6.50,6.61
2000-01-04,,6.40,6.53
2000-01-05,,6.51,6.64
2000-01-06,,6.46,6.58
2000-01-07,,6.42,6.55
...,...,...,...
2022-12-13,3.89,3.66,3.53
2022-12-14,3.91,3.64,3.52
2022-12-15,3.95,3.62,3.48
2022-12-16,3.94,3.61,3.53


In [134]:
data['DGS1MO']

DATE
2000-01-03     NaN
2000-01-04     NaN
2000-01-05     NaN
2000-01-06     NaN
2000-01-07     NaN
              ... 
2022-12-13    3.89
2022-12-14    3.91
2022-12-15    3.95
2022-12-16    3.94
2022-12-19    3.95
Name: DGS1MO, Length: 6017, dtype: float64

In [138]:
Y = np.log(data['DGS1MO']).diff(return_period).shift(-return_period)
Y.name = Y.name+'_pred'


In [139]:
Y

DATE
2000-01-03   NaN
2000-01-04   NaN
2000-01-05   NaN
2000-01-06   NaN
2000-01-07   NaN
              ..
2022-12-13   NaN
2022-12-14   NaN
2022-12-15   NaN
2022-12-16   NaN
2022-12-19   NaN
Name: DGS1MO_pred, Length: 6017, dtype: float64

In [126]:
Y = np.log(data['DGS1MO']).diff(return_period).shift(-return_period)
Y.name = Y.name[-1]+'_pred'

X1 = np.log(stk_data.loc[:, ('Adj Close', ('GOOGL', 'IBM'))]).diff(return_period)
X1.columns = X1.columns.droplevel()
X2 = np.log(ccy_data).diff(return_period)
X3 = np.log(idx_data).diff(return_period)

X4 = pd.concat([np.log(stk_data.loc[:, ('Adj Close', 'MSFT')]).diff(i) for i in [return_period, return_period*3, return_period*6, return_period*12]], axis=1).dropna()
X4.columns = ['MSFT_DT', 'MSFT_3DT', 'MSFT_6DT', 'MSFT_12DT']

X = pd.concat([X1, X2, X3, X4], axis=1)

dataset = pd.concat([Y, X], axis=1).dropna().iloc[::return_period, :]
Y = dataset.loc[:, Y.name]
X = dataset.loc[:, X.columns]

TypeError: ignored