In [6]:
import pandas as pd
from functools import reduce

In [7]:
## Look-ahead-bias free data
forecast_woLAB = pd.read_parquet('../data/Results/RF_wo_lookahead_raw_005.parquet')
forecast_woLAB = forecast_woLAB[['permno','YearMonth',
                                'RF_q1','RF_q2','RF_q3',
                                'RF_y1','RF_y2'
                    ]]

In [8]:
## Alternative Machine Learning Models
f_abbr_list = [('OLS_pred','OLS'),
               ('PLS_pred','PLS'),
               ('LASSO_pred','LASSO'),
               ('ENet_pred','ENet'),
               ('RF_pred','RF'),
               ('LGBM_pred','LGBM'),
              ]
abbr_list = list(map(lambda x: x[1], f_abbr_list))

forecast_all = []
for f,abbr in f_abbr_list:
    RF = pd.read_parquet(f'../data/Results/ML_variants/{f}.parquet')
    RF = RF[['permno','YearMonth',f'{abbr}_EPS_Q1',f'{abbr}_EPS_Q2',f'{abbr}_EPS_Q3',
             f'{abbr}_EPS_Y1',f'{abbr}_EPS_Y2',]].set_index(['permno','YearMonth'])
    forecast_all.append(RF)
forecast_all = reduce(lambda x,y: pd.merge(x,y,on=['permno','YearMonth'],how='outer'),
                      forecast_all)
forecast_all.reset_index(inplace=True)

## Composite
for i in ['Q1','Q2','Q3','Y1','Y2']:
    forecast_all[f'Composite_EPS_{i}'] = forecast_all[[f'OLS_EPS_{i}',f'PLS_EPS_{i}',f'LASSO_EPS_{i}',
                                                   f'ENet_EPS_{i}',f'RF_EPS_{i}',f'LGBM_EPS_{i}']].mean(axis=1)

In [9]:
forecast_all = forecast_woLAB.merge(forecast_all, how='left', on=['permno','YearMonth'])

In [10]:
forecast_all.head()

Unnamed: 0,permno,YearMonth,RF_q1,RF_q2,RF_q3,RF_y1,RF_y2,OLS_EPS_Q1,OLS_EPS_Q2,OLS_EPS_Q3,...,LGBM_EPS_Q1,LGBM_EPS_Q2,LGBM_EPS_Q3,LGBM_EPS_Y1,LGBM_EPS_Y2,Composite_EPS_Q1,Composite_EPS_Q2,Composite_EPS_Q3,Composite_EPS_Y1,Composite_EPS_Y2
0,10015.0,1986-01-31,,,,0.498439,,,,,...,,,,,,,,,,
1,10057.0,1986-01-31,,0.053902,0.094371,0.634142,,,,,...,,,,,,,,,,
2,10137.0,1986-01-31,,0.996859,0.717298,,3.803343,,,,...,,,,,,,,,,
3,10145.0,1986-01-31,0.462089,0.771296,0.869043,3.940302,3.807993,,,,...,,,,,,,,,,
4,10153.0,1986-01-31,,-0.854675,-0.716934,,-1.336952,,,,...,,,,,,,,,,


In [11]:
forecast_all.to_csv('../data/Results/Look_Ahead_Bias_Free_Earnings_Forecast.csv', index=False)