In [2]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from read_data import DataLoader, DataLoader2
from e_score_factor import run_longshort_strategy
from fama_french_analysis import perform_rolling_regression
from plot_trend import plot_trend_func
from reconstruct_portfolio import compute_rolling_betas, calculate_portfolio_returns, calculate_portfolio_returns_by_year, industry_analysis
from fama_macbeth import fama_macbeth_regression, analyze_significance
from counterfactual import simulate_counterfactual_performance, draw_counterfactual

import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
# 设置支持中文的字体路径
font_path = 'C:/Windows/Fonts/simhei.ttf' 
font_prop = FontProperties(fname=font_path)

# 更新 matplotlib 全局字体设置
plt.rcParams['font.family'] = font_prop.get_name()
plt.rcParams['axes.unicode_minus'] = False 

os.chdir('D:\Quant_JunruWang\毕业论文代码')
loader = DataLoader() # loader用于读取原始数据
loader2 = DataLoader2() # loader用于读取处理后数据

In [3]:
# 数据准备
ff5 = loader.factors('ff5')
ff3 = loader.factors('ff3')
umd5 = loader.factors('umd5')
stk_ret = loader.stock_ret
bench = loader.index_close
textfactors = loader2.textfactors
e_score = loader.e_score
mv = loader.market_value
industry = loader.industry
ccpu = loader.ccpu
escore_longshort = loader2.escore_return
escore_ret = pd.DataFrame((escore_longshort['chg_short']-escore_longshort['chg_long']+1).cumprod(),columns=['ret'])
escore_ret['date'] = escore_longshort['date']

ff3['date'] = pd.to_datetime(ff3['date'])
ff5['date'] = pd.to_datetime(ff5['date'])
umd5['date'] = pd.to_datetime(umd5['date'])
factors_ff3 = pd.merge(textfactors, ff3, how='outer', on='date').ffill()
factors_ff5 = pd.merge(textfactors, ff5, how='outer', on='date').ffill()
factors_umd5 = pd.merge(textfactors, umd5, how='outer', on='date').ffill()

### 三、文本因子的构建
由于多线程函数的原因，不能在主文件中调用，直接运行news_LDA.py即可。
以下用于绘制构建的文本因子走势，输出相关性


In [12]:
plot_trend_func(textfactors, bench,output_path='输出/文本因子走势.png')

all_f_corr = factors_umd5.corr()
all_f_corr.to_excel('输出/因子相关性.xlsx')

### 四、回归分析
#### 4.1 文本因子对Fama-French五因子回归

In [None]:
perform_rolling_regression(factors_ff5[['碳排放']],
                            factors_ff5[['RiskPremium','SMB','HML','RMW','CMA']], 
                            output_path='输出/FF回归/碳排放_ff5.xlsx')
perform_rolling_regression(factors_ff5[['环境监管']],
                            factors_ff5[['RiskPremium','SMB','HML','RMW','CMA']], 
                            output_path='输出/FF回归/环境监管_ff5.xlsx')
perform_rolling_regression(factors_ff5[['绿色金融']],
                            factors_ff5[['RiskPremium','SMB','HML','RMW','CMA']], 
                            output_path='输出/FF回归/绿色金融_ff5.xlsx')

#### 4.2 FM回归
Fama-French三因子、得到文本因子的风险暴露，120天

In [3]:
fm_ff3_betas = loader2.fm_results('ff3','beta')
fm_betas = fama_macbeth_regression(ret=stk_ret, factors=factors_ff3, betas=fm_ff3_betas, window=120, output_path='输出/FM回归/ff3')

滚动回归计算beta: 100%|██████████| 2626/2626 [48:09<00:00,  1.10s/it]
滚动回归计算beta: 100%|██████████| 2626/2626 [1:06:07<00:00,  1.51s/it]  


FM回归的统计

In [5]:
fm_ff3_result = loader2.fm_results('ff3','result')
fm_ff5_result = loader2.fm_results('ff5','result')
fm_umd5_result = loader2.fm_results('umd5','result')
result1 = analyze_significance(fm_ff3_result)
result2 = analyze_significance(fm_ff5_result)
result3 = analyze_significance(fm_umd5_result)

result1.to_excel('输出/FM回归/ff3/统计结果.xlsx')
result2.to_excel('输出/FM回归/ff5/统计结果.xlsx')
result3.to_excel('输出/FM回归/umd5/统计结果.xlsx')

#### 4.3 多空组合


4.3.1 E评分多空组合\
第一个代码格计算多空收益\
第二个代码格直接读取存储的收益，然后绘图

In [None]:
escore_longshort = run_longshort_strategy(e_score, stk_ret, mv)

In [None]:
ret = pd.DataFrame(escore_longshort['short_cum_ret']-escore_longshort['long_cum_ret'],columns=['ret'])
ret['date'] = escore_longshort['date']
bench = loader.index_close
plot_trend_func(ret, bench,output_path='结果/E评分多空组合.png')

4.3.2 文本因子多空组合

多窗口测试，绘制收益图

In [None]:
for days in [120, 240, 360, 720, 1200]:
    betas = loader2.betas(days)
    portfolio_returns = calculate_portfolio_returns(
        ret=stk_ret,
        beta_df=betas,
        freq=days
    )
    cum_ret = (portfolio_returns+1).cumprod()
    plot_trend_func(cum_ret, escore_ret, rows=1, cols=3, output_path=f'输出/组合beta/组合收益_{days}天.png')

行业分析，选取的公司，E评分是否符合“高污染绿色度低”


In [None]:
weights = loader2.weights
industry_analysis(weights, industry)

### 五、稳健性分析
5.1 在各个窗口下，测试大市值、小市值各自的多空表现

In [None]:
for days in [120, 240, 360, 720, 1200]:
    betas = loader2.betas(days)
    l_ret, h_ret = calculate_portfolio_returns_by_year(ret=stk_ret, beta_df=betas, market_value=mv)

    cum_l_ret = (l_ret+1).cumprod()
    cum_h_ret = (h_ret+1).cumprod()

    plot_trend_func(cum_l_ret, escore_ret, rows=1, cols=3, output_path=f'输出/组合beta/低市值组合收益_{days}.png')
    plot_trend_func(cum_h_ret, escore_ret, rows=1, cols=3, output_path=f'输出/组合beta/高市值组合收益_{days}.png')


5.2 更换FM回归的模型

In [None]:
fm_betas = fama_macbeth_regression(ret=stk_ret, factors=factors_ff5, window=120, output_path='输出/FM回归/ff5')
fm_betas = fama_macbeth_regression(ret=stk_ret, factors=factors_umd5, window=120, output_path='输出/FM回归/umd5')

5.3 FM回归更换数据来源：CCPU
China Climate Policy Uncertainty
使用ccpu对文本因子回归，取残差，观察fm回归结果

In [12]:
import statsmodels.api as sm
ccpu_umd5 = ccpu.merge(umd5, how='inner', on='date').set_index('date')
fm_results = fama_macbeth_regression(ret=stk_ret, factors=ccpu_umd5, window=120, output_path='输出/FM回归/ccpu+umd5')

滚动回归计算beta: 100%|██████████| 1004/1004 [23:44<00:00,  1.42s/it]


In [4]:
fm_ccpuumd5_result = pd.read_excel('输出/FM回归/ccpu+umd5/回归结果.xlsx')
result4 = analyze_significance(fm_ccpuumd5_result)

result4.to_excel('输出/FM回归/ccpu+umd5/统计结果.xlsx')

5.4 FM回归更换模型：更换包含UMD的五因子\
4.2已经写入

### 六、反事实分析

In [None]:
ret = pd.read_excel('输出/组合beta/组合收益_240天.xlsx')
data = factors_ff3.merge(ret, how='inner', on='date',suffixes=('_因子', '_收益'))
results = simulate_counterfactual_performance(data, '碳排放_收益', ['碳排放_因子'],
                                              'date' )

draw_counterfactual(results, output_path='输出/反事实表现.png')
