In [1]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import statsmodels.api as sm

In [2]:
sector = pd.read_csv('sector.csv', index_col=0) #좌변 포트폴리오
price = pd.read_csv('price.csv', index_col=0) #가격데이터
monthly_return = pd.read_csv('monthly_return.csv', index_col=0) #종목별 월별 수익률

#Rm - Rf
market_return = pd.read_csv('market_excess_return.csv', index_col=0) #시장 수익률

op = pd.read_csv('op.csv', index_col=0) #profit
pbr = pd.read_csv('pbr.csv', index_col=0) #b/m
size = pd.read_csv('size.csv', index_col=0) #size
inv = pd.read_csv('inv.csv', index_col=0) #inv


In [10]:
sector = sector*0.01

In [3]:
monthly_return = monthly_return - 1

In [4]:
def get_factor_premium(i : int, Factor : pd.DataFrame): 
    companies = monthly_return.iloc[i,:].dropna().index
    companies = size.iloc[i][companies]
    size_median = companies.median() # Size 중위값 설정

    # 1. 사이즈별 분류
    big_pf = companies[companies > size_median] # Large Cap 그룹
    small_pf = companies[companies < size_median] # Small Cap 그룹

    # 2. 사이즈 분류 후 팩터 내 분류
    # 2.1 Large & Factor High / Low
    big_factor_companies = Factor.iloc[i,:][big_pf.index] # Large Cap 그룹 내에서 팩터 분류 시작
    big_factor = big_factor_companies.dropna() # Factor 값 없는 종목 Drop
    big_upper = big_factor[big_factor > big_factor.quantile(0.7)].index # Large Cap 중 팩터 상위 30% 종목
    big_low = big_factor[big_factor.quantile(0.3) > big_factor].index # Large Cap 중 팩터 하위 30% 종목

    # 2.2 Small & Factor High / Low
    small_factor_companies = Factor.iloc[i,:][small_pf.index] # Small Cap 그룹
    small_factor = small_factor_companies.dropna() # Factor 값 없는 종목 Drop
    small_upper = small_factor[small_factor > small_factor.quantile(0.7)].index # Small Cap 중 팩터 상위 30% 종목
    small_low = small_factor[small_factor.quantile(0.3) > small_factor].index # Small Cap 중 팩터 하위 30% 종목

    # 3. 시가총액 비중 가중치 할당
    mkt_cap = size.iloc[i,:].dropna()

    weight_big_upper = mkt_cap[big_upper] / mkt_cap[big_upper].sum()
    weight_big_low = mkt_cap[big_low] / mkt_cap[big_low].sum()

    weight_small_upper = mkt_cap[small_upper] / mkt_cap[small_upper].sum()
    weight_small_low = mkt_cap[small_low] / mkt_cap[small_low].sum()

    # 4. 포트폴리오별 수익률 계산
    returns = monthly_return.iloc[i,:]
    big_upper_return = returns[big_upper] @ weight_big_upper
    big_low_return = returns[big_low] @ weight_big_low

    small_upper_return = returns[small_upper] @ weight_small_upper
    small_low_return = returns[small_low] @ weight_small_low

    # 5. 팩터 프리미엄 계산
    factor_premium = ((big_upper_return + small_upper_return)/2) - ((big_low_return + small_low_return)/2)
    size_premium = (small_upper_return + small_low_return)/2 - (big_upper_return + big_low_return)/2
    return factor_premium, size_premium

In [5]:
size_pm_df = pd.DataFrame(columns = ['SMB_value', 'SMB_profit', 'SMB_inv'], index = monthly_return.index)

HML = []
RMW = []
CMA = []


for i in range(len(monthly_return)):
    vf, sf = get_factor_premium(i, pbr)
    HML.append(vf)
    size_pm_df.iloc[i]['SMB_value'] = sf
for i in range(len(monthly_return)):
    qp, sp = get_factor_premium(i, op)
    RMW.append(qp)
    size_pm_df.iloc[i]['SMB_profit'] = sp
for i in range(len(monthly_return)):
    mp, sp = get_factor_premium(i, inv)
    CMA.append(mp)
    size_pm_df.iloc[i]['SMB_inv'] = sp


PREMIUM = pd.DataFrame({'HML' : HML, 
                        'CMA' : RMW,
                        'UMD' : CMA,
                        'SMB' : size_pm_df.mean(axis = 'columns').values
                        }, index = monthly_return.index)

In [6]:
PREMIUM['Rm-Rf'] = market_return['Rm-Rf']
start_date = '2000-01-31'
sector = sector.loc[start_date:]
PREMIUM = PREMIUM.loc[start_date:]
market_return = market_return.loc[start_date:]
rf = market_return['Rf']
rf.index = pd.to_datetime(rf.index)
sector.index = pd.to_datetime(sector.index)
sector_return = sector.sub(rf, axis = 0)
PREMIUM.index = pd.to_datetime(PREMIUM.index)
exposure = pd.DataFrame(index = ['const', 'HML', 'CMA', 'UMD', 'SMB', 'Rm-Rf'], columns = sector_return.columns).T

In [65]:
X = PREMIUM
Y = sector_return['경기관련소비재']

X = sm.add_constant(X)
model = sm.OLS(Y, X).fit()

print(model.params)
#개씨발뭐지...

const    0.000839
HML     -0.000313
CMA      0.000790
UMD     -0.018047
SMB      0.004330
Rm-Rf    0.916761
dtype: float64


In [75]:
for column in sector_return.columns:
    print(column)
    Y = sector_return[column]
    X = sm.add_constant(X)
    model = sm.OLS(Y,X).fit()
    #const, HML, CMA, UMD, SMB, BETA = model.params

    exposure.loc[column] = model.params


에너지
소재
산업재
경기관련소비재
필수소비재
건강관리
금융
IT
커뮤니케이션서비스
유틸리티


In [77]:
exposure.to_csv('Factor_Exposure.csv')