In [60]:
import pandas as pd
import numpy as np
import os

import statsmodels.api as sm

from sklearn.decomposition import PCA
from sklearn import preprocessing

def minmaxscaler_for_train(factor, std = 3):
    factorInfor = pd.DataFrame(index=['mean', 'std', 'up', 'down', 'min', 'max'], columns=factor.columns)
    
    factor_mean = factor.mean()
    factor_std = factor.std()
    factorInfor.loc['mean'] = factor_mean
    factorInfor.loc['std'] = factor_std
    
    factor_max = factor.max()
    factor_min = factor.min()
    factorInfor.loc['min'] = factor_min
    factorInfor.loc['max'] = factor_max
    
    factor_normal = pd.DataFrame(index=factor.index, columns=factor.columns) 
    for column in factor.columns:
        fac = factor[column].copy()
        fac_max = factor_max[column]
        fac_min = factor_min[column]
        factor_normal.loc[:, column] = (fac - fac_min) / (fac_max - fac_min)
    
    return factor_normal, factorInfor

def minmaxscaler_for_test(factor, factorInfor):
    factor_min = factorInfor.loc['min']
    factor_max = factorInfor.loc['max']
    
    factor_normal = pd.DataFrame(index=factor.index, columns=factor.columns) 
    for column in factor.columns:
        fac = factor[column].copy()
        fac_max = factor_max[column]
        fac_min = factor_min[column]
        factor_normal.loc[:, column] = (fac - fac_min) / (fac_max - fac_min)
    
    return factor_normal


In [61]:
begin_date = pd.Timestamp("1980-01-30")
end_date = pd.Timestamp("2009-12-01")
datapath = "../data"
rawpath = "../data/raw"

In [62]:
LogExcessReturn = pd.read_csv(os.path.join(datapath, "log_excess_return.csv"))
LogExcessReturn = LogExcessReturn.set_index("Date")
LogExcessReturn.index = pd.to_datetime(LogExcessReturn.index)
LogExcessReturn = LogExcessReturn.loc[begin_date: end_date]

In [63]:
LogExcessReturn.index

DatetimeIndex(['1980-01-31', '1980-02-29', '1980-03-31', '1980-04-30',
               '1980-05-31', '1980-06-30', '1980-07-31', '1980-08-31',
               '1980-09-30', '1980-10-31',
               ...
               '2009-02-28', '2009-03-31', '2009-04-30', '2009-05-31',
               '2009-06-30', '2009-07-31', '2009-08-31', '2009-09-30',
               '2009-10-31', '2009-11-30'],
              dtype='datetime64[ns]', name='Date', length=359, freq=None)

In [64]:
LogExcessReturn.columns

Index(['yr2', 'yr3', 'yr4', 'yr5'], dtype='object')

In [65]:
dfFactor = pd.read_csv(os.path.join(datapath, "dfFactor.csv"))
dfFactor = dfFactor.set_index("Date")
dfFactor.index = pd.to_datetime(dfFactor.index)
dfFactor = dfFactor.rename({"Amihud liquidity": "AmihudLiq",
                       "Mkt-RF": "MktRF",
                       "BWSENT^": "BWSENTcoth"}, axis = 1)
dfFactor.columns

Index(['AmihudLiq', 'AggLiq', 'InnovLiq', 'TradedLiq', 'CLI', 'MktRF', 'SMB',
       'HML', 'RMW', 'CMA', 'RF', 'BWSENTcoth', 'BWSENT', 'RV', 'BV', 'RJ',
       'CP'],
      dtype='object')

In [66]:
pcaFactor = pd.read_csv(os.path.join(datapath, "pcaFactor.csv"))
pcaFactor = pcaFactor.set_index("Date")
pcaFactor.index = pd.to_datetime(pcaFactor.index)
pcaFactor = pcaFactor.rename({"Size/BM": "SizeBM",
                       "Prices,Yield,Dividends(PYD)": "PYD",
                       "RiskFactors(RiF)": "RiF"}, axis = 1)

In [67]:
pcaFactor.index

DatetimeIndex(['1980-01-31', '1980-02-29', '1980-03-31', '1980-04-30',
               '1980-05-31', '1980-06-30', '1980-07-31', '1980-08-31',
               '1980-09-30', '1980-10-31',
               ...
               '2009-02-28', '2009-03-31', '2009-04-30', '2009-05-31',
               '2009-06-30', '2009-07-31', '2009-08-31', '2009-09-30',
               '2009-10-31', '2009-11-30'],
              dtype='datetime64[ns]', name='Date', length=359, freq=None)

In [68]:
pcaFactor.columns

Index(['CLI_allPca', 'tech_class0', 'tech_class1', 'tech_class2',
       'tech_class3', 'tech_class4', 'tech_class5', 'tech_class6',
       'tech_class7', 'tech_class8', 'tech_class9', 'output_and_income',
       'labor_market', 'housing', 'consumption', 'money', 'bond_ex', 'price',
       'stock', 'PYD', 'RiF', 'Industries', 'SizeBM'],
      dtype='object')

In [69]:
Y = LogExcessReturn['yr2']

In [70]:
Factor = pd.concat([dfFactor, pcaFactor], axis=1)
Factor_normal, _ = minmaxscaler_for_train(Factor)

In [71]:
Factor_normal.columns

Index(['AmihudLiq', 'AggLiq', 'InnovLiq', 'TradedLiq', 'CLI', 'MktRF', 'SMB',
       'HML', 'RMW', 'CMA', 'RF', 'BWSENTcoth', 'BWSENT', 'RV', 'BV', 'RJ',
       'CP', 'CLI_allPca', 'tech_class0', 'tech_class1', 'tech_class2',
       'tech_class3', 'tech_class4', 'tech_class5', 'tech_class6',
       'tech_class7', 'tech_class8', 'tech_class9', 'output_and_income',
       'labor_market', 'housing', 'consumption', 'money', 'bond_ex', 'price',
       'stock', 'PYD', 'RiF', 'Industries', 'SizeBM'],
      dtype='object')

In [72]:
LogExcessReturn['mean'] = LogExcessReturn.mean(axis=1)

## 因子列表

* 流动性：(1)AmihudLiquidity； (2)PastorandStambaughLiq

* 领先指标：（1）CompositeLeadingIndicators of USA（只有美国）；（2）CompositeLeadingIndicators_8Country（8个国家PCA，描述国际CLI情况）

* Fama五因子：F-F_Research_Data_5_Factors_2x3

* 投资者情绪：Baker and Wurgler (2006, 2007)

* 波动率跳跃：Wright J H, Zhou H

* 技术指标：10类200个技术因子，各类进行PCA，最后得到10个因子，代表10类

* CP因子：CP文章

* 宏观因子：8类132个技术因子，各类进行PCA，最后得到8个因子，代表8类， Ludvigson and Ng (2009)

* 金融指标：4类147个技术因子，各类进行PCA，最后得到4个因子，代表4类，Ludvigson and Ng (2007)

* LogExcessReturn：CP文章，超额收益

In [73]:
type(LogExcessReturn)

pandas.core.frame.DataFrame

In [74]:
GOODFactor = pd.DataFrame(index = dfFactor.index)

## 流动性

### Unrestricted Model

In [75]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

name = ['AmihudLiq', 'AggLiq', 'InnovLiq', 'TradedLiq']

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, dfFactor_normal[name]).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                    yr2   R-squared:                       0.180
Model:                            OLS   Adj. R-squared:                  0.170
Method:                 Least Squares   F-statistic:                     19.44
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           1.78e-14
Time:                        19:10:02   Log-Likelihood:                -737.30
No. Observations:                 359   AIC:                             1483.
Df Residuals:                     355   BIC:                             1498.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
AmihudLiq     -1.5496      1.500     -1.033      0.3

In [76]:
Y = LogExcessReturn['yr5']
model = sm.OLS(Y, dfFactor_normal[name]).fit()
result = model.summary()
result

0,1,2,3
Dep. Variable:,yr5,R-squared:,0.113
Model:,OLS,Adj. R-squared:,0.103
Method:,Least Squares,F-statistic:,11.3
Date:,"Sat, 08 Dec 2018",Prob (F-statistic):,1.22e-08
Time:,19:10:02,Log-Likelihood:,-1164.0
No. Observations:,359,AIC:,2336.0
Df Residuals:,355,BIC:,2352.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
AmihudLiq,-3.9649,4.923,-0.805,0.421,-13.648,5.718
AggLiq,-2.7686,5.324,-0.520,0.603,-13.240,7.703
InnovLiq,7.6183,6.037,1.262,0.208,-4.255,19.492
TradedLiq,-1.0771,2.667,-0.404,0.687,-6.323,4.169

0,1,2,3
Omnibus:,6.402,Durbin-Watson:,0.166
Prob(Omnibus):,0.041,Jarque-Bera (JB):,6.22
Skew:,-0.285,Prob(JB):,0.0446
Kurtosis:,3.302,Cond. No.,23.2


In [77]:
model.rsquared

0.11292736275712212

### Restricted Model

In [78]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

Y = LogExcessReturn['mean']

name = ['AmihudLiq', 'AggLiq', 'InnovLiq', 'TradedLiq']

model = sm.OLS(Y, dfFactor_normal[name]).fit()
result = model.summary()
print(result)
print("\n\n")
# print(result.tables[0][0][3].data)

GOODFactor['Liq_total_minmax'] = (dfFactor_normal[name] * model.params).sum(axis=1)

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['Liq_total_minmax']).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                   mean   R-squared:                       0.136
Model:                            OLS   Adj. R-squared:                  0.126
Method:                 Least Squares   F-statistic:                     13.96
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           1.39e-10
Time:                        19:10:02   Log-Likelihood:                -1020.2
No. Observations:                 359   AIC:                             2048.
Df Residuals:                     355   BIC:                             2064.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
AmihudLiq     -2.8099      3.298     -0.852      0.3

In [93]:
def get_coef_table(model,i):
    coef_name = model.params.index[0]
    df_coef = pd.DataFrame({'coef Name': [coef_name],
                            'coef.':[round(model.params[coef_name],2)],
                        'std err':[round(model.bse[coef_name],2)],
                        'pvalue':[round(model.pvalues[coef_name],2)],
                        '[ 0.025':[round(model.conf_int(alpha=0.05, cols=None)[0][coef_name],2)],
                        '0.975 ]':[round(model.conf_int(alpha=0.05, cols=None)[1][coef_name],2)],
                        'R-sqaured':[round(model.rsquared,2)]
                        },index = ['yr'+str(i)])
    return df_coef
#df = get_coef_table(model,2)
#pd.DataFrame().append(df)


In [94]:
result = pd.DataFrame()
for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['Liq_total_minmax']).fit()
    df = get_coef_table(model,i)
    #result = model.summary()
    #print(result)
    result = result.append(df)
result

Unnamed: 0,coef Name,coef.,std err,pvalue,[ 0.025,0.975 ],R-sqaured
yr2,Liq_total_minmax,0.54,0.06,0.0,0.42,0.66,0.18
yr3,Liq_total_minmax,0.91,0.12,0.0,0.68,1.13,0.15
yr4,Liq_total_minmax,1.21,0.16,0.0,0.9,1.53,0.14
yr5,Liq_total_minmax,1.34,0.2,0.0,0.95,1.73,0.11


In [95]:
type(model)
model.bse

Liq_total_minmax    0.19897
dtype: float64

## CLI

### Unrestricted Model

In [96]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

name = ['CLI', 'CLI_allPca']

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, dfFactor_normal[name]).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                    yr2   R-squared:                       0.285
Model:                            OLS   Adj. R-squared:                  0.281
Method:                 Least Squares   F-statistic:                     71.15
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           9.86e-27
Time:                        19:13:03   Log-Likelihood:                -712.63
No. Observations:                 359   AIC:                             1429.
Df Residuals:                     357   BIC:                             1437.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
CLI           -0.2483      0.199     -1.246      0.2

### Restricted Model

In [97]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

Y = LogExcessReturn['mean']

name = ['CLI', 'CLI_allPca']

model = sm.OLS(Y, dfFactor_normal[name]).fit()
result = model.summary()
print(result)
print("\n\n")
# print(result.tables[0][0][3].data)

GOODFactor['CLI_minmax'] = (dfFactor_normal[name] * model.params).sum(axis=1)

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['CLI_minmax']).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                   mean   R-squared:                       0.234
Model:                            OLS   Adj. R-squared:                  0.230
Method:                 Least Squares   F-statistic:                     54.53
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           2.17e-21
Time:                        19:13:04   Log-Likelihood:                -998.56
No. Observations:                 359   AIC:                             2001.
Df Residuals:                     357   BIC:                             2009.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
CLI           -0.6854      0.442     -1.550      0.1

In [98]:
result = pd.DataFrame()
for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['CLI_minmax']).fit()
    df = get_coef_table(model,i)
    #result = model.summary()
    #print(result)
    result = result.append(df)
result

Unnamed: 0,coef Name,coef.,std err,pvalue,[ 0.025,0.975 ],R-sqaured
yr2,CLI_minmax,0.51,0.04,0.0,0.43,0.6,0.28
yr3,CLI_minmax,0.9,0.08,0.0,0.73,1.06,0.25
yr4,CLI_minmax,1.21,0.12,0.0,0.98,1.44,0.23
yr5,CLI_minmax,1.38,0.14,0.0,1.1,1.66,0.21


## Fama五因子

### Unrestricted Model

In [None]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

name = ['MktRF','SMB', 'HML', 'RMW', 'CMA', 'RF']

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, dfFactor_normal[name]).fit()
    result = model.summary()
    print(result)
    print("\n\n")

### Restricted Model

In [None]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

Y = LogExcessReturn['mean']

name = ['MktRF','SMB', 'HML', 'RMW', 'CMA', 'RF']

model = sm.OLS(Y, dfFactor_normal[name]).fit()
result = model.summary()
print(result)
print("\n\n")
# print(result.tables[0][0][3].data)

GOODFactor['FF5_total_minmax'] = (dfFactor_normal[name] * model.params).sum(axis=1)

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['FF5_total_minmax']).fit()
    result = model.summary()
    print(result)
    print("\n\n")

In [None]:
result = pd.DataFrame()
for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['FF5_total_minmax']).fit()
    df = get_coef_table(model,i)
    #result = model.summary()
    #print(result)
    result = result.append(df)
result

## 投资者情绪

### Unrestricted Model

In [99]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

name = ['BWSENTcoth', 'BWSENT']

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, dfFactor_normal[name]).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                    yr2   R-squared:                       0.207
Model:                            OLS   Adj. R-squared:                  0.202
Method:                 Least Squares   F-statistic:                     46.54
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           1.09e-18
Time:                        19:14:55   Log-Likelihood:                -731.26
No. Observations:                 359   AIC:                             1467.
Df Residuals:                     357   BIC:                             1474.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
BWSENTcoth    -7.2168      1.845     -3.911      0.0

### Restricted Model

In [100]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

Y = LogExcessReturn['mean']

name = ['BWSENTcoth', 'BWSENT']

model = sm.OLS(Y, dfFactor_normal[name]).fit()
result = model.summary()
print(result)
print("\n\n")
# print(result.tables[0][0][3].data)

GOODFactor['BWaggSENT_minmax'] = (dfFactor_normal[name] * model.params).sum(axis=1)

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['BWaggSENT_minmax']).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                   mean   R-squared:                       0.133
Model:                            OLS   Adj. R-squared:                  0.128
Method:                 Least Squares   F-statistic:                     27.32
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           9.08e-12
Time:                        19:14:58   Log-Likelihood:                -1020.8
No. Observations:                 359   AIC:                             2046.
Df Residuals:                     357   BIC:                             2053.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
BWSENTcoth   -11.1817      4.134     -2.705      0.0

In [102]:
result = pd.DataFrame()
for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['BWaggSENT_minmax']).fit()
    df = get_coef_table(model,i)
    #result = model.summary()
    #print(result)
    result = result.append(df)
result

Unnamed: 0,coef Name,coef.,std err,pvalue,[ 0.025,0.975 ],R-sqaured
yr2,BWaggSENT_minmax,0.58,0.06,0.0,0.46,0.7,0.21
yr3,BWaggSENT_minmax,0.94,0.12,0.0,0.71,1.16,0.15
yr4,BWaggSENT_minmax,1.19,0.16,0.0,0.87,1.51,0.13
yr5,BWaggSENT_minmax,1.29,0.2,0.0,0.89,1.69,0.1


## 波动率跳跃

### Unrestricted Model

In [103]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

name = ['RV', 'BV', 'RJ']

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, dfFactor_normal[name]).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                    yr2   R-squared:                       0.165
Model:                            OLS   Adj. R-squared:                  0.158
Method:                 Least Squares   F-statistic:                     23.52
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           6.57e-14
Time:                        19:20:28   Log-Likelihood:                -740.39
No. Observations:                 359   AIC:                             1487.
Df Residuals:                     356   BIC:                             1498.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
RV            -4.6131      2.031     -2.271      0.0

### Restricted Model

In [104]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

Y = LogExcessReturn['mean']

name = ['RV', 'BV', 'RJ']

model = sm.OLS(Y, dfFactor_normal[name]).fit()
result = model.summary()
print(result)
print("\n\n")
# print(result.tables[0][0][3].data)

GOODFactor['RJ_total_minmax'] = (dfFactor_normal[name] * model.params).sum(axis=1)

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['RJ_total_minmax']).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                   mean   R-squared:                       0.139
Model:                            OLS   Adj. R-squared:                  0.131
Method:                 Least Squares   F-statistic:                     19.09
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           1.67e-11
Time:                        19:20:34   Log-Likelihood:                -1019.6
No. Observations:                 359   AIC:                             2045.
Df Residuals:                     356   BIC:                             2057.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
RV           -10.3227      4.421     -2.335      0.0

In [105]:
result = pd.DataFrame()
for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['RJ_total_minmax']).fit()
    df = get_coef_table(model,i)
    #result = model.summary()
    #print(result)
    result = result.append(df)
result

Unnamed: 0,coef Name,coef.,std err,pvalue,[ 0.025,0.975 ],R-sqaured
yr2,RJ_total_minmax,0.5,0.06,0.0,0.39,0.62,0.16
yr3,RJ_total_minmax,0.89,0.11,0.0,0.66,1.11,0.14
yr4,RJ_total_minmax,1.22,0.16,0.0,0.9,1.53,0.14
yr5,RJ_total_minmax,1.39,0.2,0.0,1.0,1.77,0.12


## 技术指标

### Unrestricted Model

In [106]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

name = [
    'tech_class0', 'tech_class1', 'tech_class2',
       'tech_class3', 'tech_class4', 'tech_class5', 'tech_class6',
       'tech_class7', 'tech_class8', 'tech_class9'
]

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, dfFactor_normal[name]).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                    yr2   R-squared:                       0.208
Model:                            OLS   Adj. R-squared:                  0.185
Method:                 Least Squares   F-statistic:                     9.155
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           1.85e-13
Time:                        19:23:30   Log-Likelihood:                -731.03
No. Observations:                 359   AIC:                             1482.
Df Residuals:                     349   BIC:                             1521.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
tech_class0   -47.3179     39.301     -1.204      

### Restricted Model

In [107]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

Y = LogExcessReturn['mean']

name = [
    'tech_class0', 'tech_class1', 'tech_class2',
       'tech_class3', 'tech_class4', 'tech_class5', 'tech_class6',
       'tech_class7', 'tech_class8', 'tech_class9'
]

model = sm.OLS(Y, dfFactor_normal[name]).fit()
result = model.summary()
print(result)
print("\n\n")
# print(result.tables[0][0][3].data)

GOODFactor['tech_pca_total_minmax'] = (dfFactor_normal[name] * model.params).sum(axis=1)

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['tech_pca_total_minmax']).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                   mean   R-squared:                       0.170
Model:                            OLS   Adj. R-squared:                  0.146
Method:                 Least Squares   F-statistic:                     7.129
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           3.11e-10
Time:                        19:23:35   Log-Likelihood:                -1013.0
No. Observations:                 359   AIC:                             2046.
Df Residuals:                     349   BIC:                             2085.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
tech_class0   -67.7508     86.212     -0.786      

In [108]:
result = pd.DataFrame()
for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['tech_pca_total_minmax']).fit()
    df = get_coef_table(model,i)
    #result = model.summary()
    #print(result)
    result = result.append(df)
result

Unnamed: 0,coef Name,coef.,std err,pvalue,[ 0.025,0.975 ],R-sqaured
yr2,tech_pca_total_minmax,0.51,0.05,0.0,0.4,0.61,0.2
yr3,tech_pca_total_minmax,0.9,0.1,0.0,0.7,1.1,0.18
yr4,tech_pca_total_minmax,1.21,0.14,0.0,0.93,1.49,0.17
yr5,tech_pca_total_minmax,1.38,0.17,0.0,1.04,1.73,0.15


## 宏观因子

### Unrestricted Model

In [109]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

name = [
    'labor_market', 
    'housing', 
    'consumption', 
    'money', 
    'bond_ex', 
    'price', 
    'stock'
]

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, dfFactor_normal[name]).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                    yr2   R-squared:                       0.575
Model:                            OLS   Adj. R-squared:                  0.567
Method:                 Least Squares   F-statistic:                     68.04
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           1.26e-61
Time:                        19:25:30   Log-Likelihood:                -619.25
No. Observations:                 359   AIC:                             1253.
Df Residuals:                     352   BIC:                             1280.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
labor_market     0.2893      0.527      0.549   

### Restricted Model

In [110]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

Y = LogExcessReturn['mean']

name = [
    'labor_market', 
    'housing', 
    'consumption', 
    'money', 
    'bond_ex', 
    'price', 
    'stock'
]

model = sm.OLS(Y, dfFactor_normal[name]).fit()
result = model.summary()
print(result)
print("\n\n")
# print(result.tables[0][0][3].data)

GOODFactor['macro_pca_total_minmax'] = (dfFactor_normal[name] * model.params).sum(axis=1)

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['macro_pca_total_minmax']).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                   mean   R-squared:                       0.514
Model:                            OLS   Adj. R-squared:                  0.505
Method:                 Least Squares   F-statistic:                     53.24
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           1.56e-51
Time:                        19:25:33   Log-Likelihood:                -916.79
No. Observations:                 359   AIC:                             1848.
Df Residuals:                     352   BIC:                             1875.
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
labor_market     0.6234      1.207      0.516   

In [111]:
result = pd.DataFrame()
for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['macro_pca_total_minmax']).fit()
    df = get_coef_table(model,i)
    #result = model.summary()
    #print(result)
    result = result.append(df)
result

Unnamed: 0,coef Name,coef.,std err,pvalue,[ 0.025,0.975 ],R-sqaured
yr2,macro_pca_total_minmax,0.49,0.02,0.0,0.45,0.54,0.57
yr3,macro_pca_total_minmax,0.89,0.04,0.0,0.8,0.97,0.53
yr4,macro_pca_total_minmax,1.2,0.06,0.0,1.08,1.33,0.51
yr5,macro_pca_total_minmax,1.41,0.08,0.0,1.26,1.57,0.47


## 金融指标

### Unrestricted Model

In [112]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

name = [
    'PYD', 
    'RiF',
    'Industries', 
    'SizeBM'
]

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, dfFactor_normal[name]).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                    yr2   R-squared:                       0.170
Model:                            OLS   Adj. R-squared:                  0.160
Method:                 Least Squares   F-statistic:                     18.15
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           1.42e-13
Time:                        19:29:25   Log-Likelihood:                -739.45
No. Observations:                 359   AIC:                             1487.
Df Residuals:                     355   BIC:                             1502.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
PYD            0.1877      0.431      0.435      0.6

### Restricted Model

In [113]:
dfFactor_normal, _ = minmaxscaler_for_train(Factor)

Y = LogExcessReturn['mean']

name = [
    'PYD', 
    'RiF',
    'Industries', 
    'SizeBM'
]

model = sm.OLS(Y, dfFactor_normal[name]).fit()
result = model.summary()
print(result)
print("\n\n")
# print(result.tables[0][0][3].data)

GOODFactor['financial_pca_total_minmax'] = (dfFactor_normal[name] * model.params).sum(axis=1)

for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['financial_pca_total_minmax']).fit()
    result = model.summary()
    print(result)
    print("\n\n")

                            OLS Regression Results                            
Dep. Variable:                   mean   R-squared:                       0.132
Model:                            OLS   Adj. R-squared:                  0.122
Method:                 Least Squares   F-statistic:                     13.48
Date:                Sat, 08 Dec 2018   Prob (F-statistic):           3.06e-10
Time:                        19:29:27   Log-Likelihood:                -1021.0
No. Observations:                 359   AIC:                             2050.
Df Residuals:                     355   BIC:                             2066.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
PYD           -0.7833      0.945     -0.829      0.4

In [116]:
result = pd.DataFrame()
for i in range(2, 6):
    Y = LogExcessReturn['yr' + str(i)]
    model = sm.OLS(Y, GOODFactor['financial_pca_total_minmax']).fit()
    df = get_coef_table(model,i)
    #result = model.summary()
    #print(result)
    result = result.append(df)
result

Unnamed: 0,coef Name,coef.,std err,pvalue,[ 0.025,0.975 ],R-sqaured
yr2,financial_pca_total_minmax,0.52,0.06,0.0,0.4,0.64,0.16
yr3,financial_pca_total_minmax,0.9,0.12,0.0,0.67,1.13,0.14
yr4,financial_pca_total_minmax,1.22,0.16,0.0,0.89,1.54,0.13
yr5,financial_pca_total_minmax,1.36,0.2,0.0,0.96,1.76,0.11
