In [1]:
import pandas as pd
import numpy as np
from scipy.linalg import inv
from PortOpt_factor.optimizer import pyport
from ipca_utils import impute_w_median, normalize, IPCA_factor


#### Read Raw Data and Preprocess

In [None]:
fn = "data/kelly_data_without_nanocap.p"
df = pd.read_pickle(fn)
cols_to_drop = ["isin", "cusip", "sedol", "excntry"]
df = df.drop(cols_to_drop, axis=1)
df = df.dropna(subset=['ret_local_lead1m'])

characteristics = df.columns[6:] #list of characteristics
df_ipca = impute_w_median(df, characteristics)
df_ipca = normalize(df_ipca, characteristics)

pd.set_option("display.max_column", None)
df.head(10)

#### Read Preprocessed Data

In [2]:
df_ipca = pd.read_csv('data/factor_data_qnormed.csv')
characteristics = df_ipca.columns[6:] #list of characteristics
pd.set_option("display.max_column", None)
df_ipca.head(10)

Unnamed: 0,eom,id,ret_exc_lead1m,ret_local_lead1m,ret_local,ret_exc,niq_su,ret_6_1,ret_12_1,saleq_su,tax_gr1a,ni_inc8q,prc_highprc_252d,resff3_6_1,resff3_12_1,be_me,debt_me,at_me,ret_60_12,ni_me,fcf_me,div12m_me,eqpo_me,eqnpo_me,sale_gr3,sale_gr1,ebitda_mev,sale_me,ocf_me,ival_me,bev_mev,netdebt_me,eq_dur,capex_abn,at_gr1,ppeinv_gr1a,noa_at,noa_gr1a,lnoa_gr1a,capx_gr1,capx_gr2,capx_gr3,chcsho_12m,eqnpo_12m,debt_gr3,inv_gr1,inv_gr1a,oaccruals_at,taccruals_at,cowc_gr1a,coa_gr1a,col_gr1a,nncoa_gr1a,ncoa_gr1a,ncol_gr1a,nfna_gr1a,sti_gr1a,lti_gr1a,fnl_gr1a,be_gr1a,oaccruals_ni,taccruals_ni,netis_at,eqnetis_at,dbnetis_at,niq_be,niq_be_chg1,niq_at,niq_at_chg1,ebit_bev,ebit_sale,sale_bev,at_turnover,gp_at,gp_atl1,ope_be,ope_bel1,op_at,op_atl1,cop_at,cop_atl1,f_score,o_score,z_score,pi_nix,at_be,saleq_gr1,rd_me,rd_sale,opex_at,emp_gr1,rd5_at,age,dsale_dinv,dsale_drec,dgp_dsale,dsale_dsga,sale_emp_gr1,tangibility,kz_index,ocfq_saleq_std,cash_at,ni_ar1,ni_ivol,earnings_variability,aliq_at,aliq_mat,seas_1_1an,seas_1_1na,seas_2_5an,seas_2_5na,seas_6_10an,seas_6_10na,seas_11_15an,seas_11_15na,seas_16_20an,seas_16_20na,market_equity,ivol_ff3_21d,ivol_capm_252d,ivol_capm_21d,ivol_hxz4_21d,rvol_21d,beta_60m,betabab_1260d,beta_dimson_21d,turnover_126d,turnover_var_126d,dolvol_126d,dolvol_var_126d,prc,ami_126d,zero_trades_21d,zero_trades_126d,zero_trades_252d,rmax1_21d,rskew_21d,iskew_capm_21d,iskew_ff3_21d,iskew_hxz4_21d,coskew_21d,ret_1_0,betadown_252d,bidaskhl_21d,ret_3_1,ret_9_1,ret_12_7,corr_1260d,rmax5_21d,rmax5_rvol_21d,ni_be,ocf_at,ocf_at_chg1,mispricing_perf,mispricing_mgmt,qmj,qmj_prof,qmj_growth,qmj_safety
0,1962-01-31,100104301,-0.039478,-0.037478,,,,,,,0.276667,,,,,0.076667,0.156667,0.05,,0.113333,0.653333,,,,0.623333,0.42,0.106667,0.08,0.333333,0.06,0.103333,0.343333,0.936667,0.336667,0.43,0.626667,0.366667,0.703333,0.27,0.17,0.376667,0.74,,,0.77,0.656667,0.72,0.703333,0.726667,0.773333,0.666667,0.233333,0.253333,0.293333,0.206667,0.333333,,0.423333,0.676667,0.73,0.353333,0.66,,,0.68,,,,,0.8,0.776667,0.563333,0.57,0.906667,0.886667,0.806667,0.783333,0.893333,0.883333,0.88,0.866667,0.738333,0.14,,0.863333,0.17,,0.036667,0.073333,0.56,0.773333,0.056667,1.0,0.223333,0.396667,0.7,0.643333,0.193333,0.65,,,0.566667,0.103333,0.16,0.21,0.686667,,,,,,,,,,,,0.84,,,,,,,,,,,,,0.906667,,,,,,,,,,,,,,,,,,,,0.783333,0.46,0.84,,,,,,
1,1962-01-31,100113301,0.051937,0.053937,,,,,,,0.053333,,,,,0.5,0.536667,0.966667,,0.02,0.18,,,,0.256667,0.13,0.016667,0.973333,0.36,0.5,0.98,0.1,0.5,0.31,0.05,0.08,0.5,0.498333,0.093333,0.216667,0.31,0.26,,,0.03,0.15,0.13,0.5,0.498333,0.113333,0.04,0.08,0.498333,0.182906,0.498333,0.583333,,0.783333,0.62,0.498333,0.498333,0.498333,,,0.613333,,,,,0.016667,0.023333,0.82,0.726667,0.333333,0.27,0.5,0.5,0.016667,0.016667,0.5,0.498333,0.161667,0.5,,0.5,0.5,,0.498333,0.498333,0.84,0.203333,0.5,1.0,0.776667,0.803333,0.143333,0.073333,0.22,0.13,,,0.403333,0.03,0.786667,0.906667,0.19,,,,,,,,,,,,0.133333,,,,,,,,,,,,,0.06,,,,,,,,,,,,,,,,,,,,0.5,0.31,0.246667,,,,,,
2,1962-01-31,100115701,0.062263,0.064263,,,,,,,0.796667,,,,,0.5,0.0,0.266667,,0.15,0.77,,,,0.963333,0.806667,0.226667,0.606667,0.66,0.5,0.133333,0.336667,0.5,0.103333,0.93,0.65,0.5,0.498333,0.716667,0.07,0.296667,0.801667,,,0.498333,0.143333,0.206667,0.5,0.498333,0.29,0.976667,1.0,0.498333,0.596667,0.498333,0.713333,,0.88,0.366667,0.498333,0.498333,0.498333,,,0.486667,,,,,0.856667,0.176667,0.96,0.95,0.583333,0.733333,0.5,0.5,0.596667,0.78,0.5,0.498333,0.448333,0.5,,0.786667,0.5,,0.498333,0.498333,0.94,0.796667,0.5,1.0,0.976667,0.05,0.81,0.153333,0.74,0.533333,,,0.046667,0.123333,0.133333,0.82,0.926667,,,,,,,,,,,,0.776667,,,,,,,,,,,,,0.893333,,,,,,,,,,,,,,,,,,,,0.5,0.833333,0.42,,,,,,
3,1962-01-31,100120901,0.06592,0.06792,,,,,,,0.84,,,,,0.26,0.823333,0.44,,0.243333,0.05,,,,0.886667,0.966667,0.27,0.306667,0.036667,0.18,0.403333,0.85,0.85,0.14,0.943333,0.956667,0.976667,0.966667,0.973333,0.06,0.66,0.796667,,,0.916667,0.953333,0.626667,0.97,0.913333,0.756667,0.863333,0.87,0.986667,0.98,0.016667,0.053333,,0.85,0.95,0.903333,0.97,0.953333,,,0.956667,,,,,0.26,0.636667,0.13,0.206667,0.353333,0.636667,0.76,0.873333,0.31,0.623333,0.013333,0.016667,0.448333,0.923333,,0.166667,0.93,,0.498333,0.498333,0.146667,0.96,0.5,1.0,0.623333,0.19,0.98,0.063333,0.77,0.176667,,,0.156667,0.756667,0.393333,0.373333,0.846667,,,,,,,,,,,,0.45,,,,,,,,,,,,,0.863333,,,,,,,,,,,,,,,,,,,,0.393333,0.016667,0.666667,,,,,,
4,1962-01-31,100121501,0.011359,0.013359,,,,,,,0.336667,,,,,0.643333,0.676667,0.523333,,0.6,0.406667,,,,0.206667,0.286667,0.616667,0.38,0.576667,0.686667,0.563333,0.726667,0.323333,0.08,0.253333,0.386667,0.803333,0.236667,0.286667,0.043333,0.076667,0.096667,,,0.716667,0.45,0.423333,0.243333,0.286667,0.34,0.266667,0.29,0.273333,0.403333,0.926667,0.62,,0.803333,0.383333,0.226667,0.25,0.296667,,,0.626667,,,,,0.446667,0.796667,0.153333,0.176667,0.383333,0.346667,0.633333,0.336667,0.586667,0.436667,0.763333,0.77,0.448333,0.643333,,0.77,0.403333,,0.498333,0.498333,0.18,0.263333,0.5,1.0,0.593333,0.836667,0.498333,0.5,0.593333,0.833333,,,0.24,0.746667,0.7,0.75,0.06,,,,,,,,,,,,0.703333,,,,,,,,,,,,,0.826667,,,,,,,,,,,,,,,,,,,,0.333333,0.466667,0.82,,,,,,
5,1962-01-31,100123901,0.05783,0.05983,,,,,,,1.0,,,,,0.0,0.153333,0.0,,0.043333,0.673333,,,,0.99,0.95,0.033333,0.04,0.363333,0.5,0.01,0.41,0.5,0.5,0.5,0.498333,0.5,0.498333,0.5,0.501667,0.5,0.5,,,0.498333,0.498333,0.5,0.5,0.498333,0.5,0.5,0.5,0.498333,0.5,0.498333,0.471667,,0.423333,0.503333,0.498333,0.498333,0.498333,,,0.486667,,,,,0.963333,0.326667,0.99,0.97,1.0,0.5,1.0,0.5,0.956667,0.5,0.5,0.498333,0.448333,0.76,,0.766667,0.986667,,0.498333,0.498333,0.976667,0.498333,0.5,0.123333,0.5,0.5,0.376667,0.806667,0.5,0.016667,,,0.226667,0.498333,0.5,0.5,0.5,,,,,,,,,,,,0.23,,,,,,,,,,,,,0.796667,,,,,,,,,,,,,,,,,,,,1.0,0.96,0.5,,,,,,
6,1962-01-31,100124301,-0.034111,-0.032111,,,,,,,0.69,,,,,0.843333,0.956667,0.86,,0.476667,0.683333,,,,0.293333,0.633333,0.406667,0.316667,0.806667,0.29,0.766667,0.966667,0.203333,0.053333,0.136667,0.12,0.666667,0.09,0.04,0.286667,0.056667,0.03,,,0.646667,0.66,0.573333,0.09,0.18,0.283333,0.413333,0.65,0.046667,0.05,0.693333,0.9,,0.823333,0.096667,0.06,0.073333,0.233333,,,0.086667,,,,,0.12,0.85,0.016667,0.016667,0.04,0.036667,0.136667,0.093333,0.103333,0.086667,0.656667,0.36,0.965,0.823333,,0.263333,0.846667,,0.498333,0.498333,0.016667,0.816667,0.5,1.0,0.74,0.41,0.836667,0.83,0.216667,0.663333,,,0.163333,0.793333,0.366667,0.626667,0.026667,,,,,,,,,,,,0.903333,,,,,,,,,,,,,0.22,,,,,,,,,,,,,,,,,,,,0.11,0.563333,0.793333,,,,,,
7,1962-01-31,100130201,-0.063995,-0.061995,,,,,,,0.88,,,,,0.706667,0.753333,0.716667,,0.936667,0.173333,,,,0.263333,0.65,0.92,0.99,0.27,0.926667,0.603333,0.6,0.226667,0.873333,0.41,0.786667,0.14,0.726667,0.696667,0.921667,0.37,0.776667,,,0.15,0.77,0.853333,0.726667,0.82,0.643333,0.816667,0.843333,0.743333,0.706667,0.863333,0.823333,,0.846667,0.186667,0.166667,0.723333,0.826667,,,0.183333,,,,,0.69,0.06,0.983333,0.99,0.953333,0.92,0.84,0.79,0.623333,0.603333,0.36,0.356667,0.738333,0.743333,,0.716667,0.8,,0.498333,0.498333,0.993333,0.9,0.5,0.123333,0.336667,0.11,0.498333,0.5,0.073333,0.626667,,,0.756667,0.316667,0.73,0.283333,0.446667,,,,,,,,,,,,0.27,,,,,,,,,,,,,0.056667,,,,,,,,,,,,,,,,,,,,0.746667,0.28,0.09,,,,,,
8,1962-01-31,100130802,-0.043647,-0.041647,,,,,,,0.13,,,,,0.99,0.91,0.96,,0.666667,0.976667,,,,0.116667,0.203333,0.613333,0.9,0.98,0.73,0.976667,0.87,0.006667,0.68,0.06,0.016667,0.653333,0.06,0.5,0.3,0.69,0.226667,,,0.17,0.03,0.01,0.05,0.073333,0.05,0.033333,0.16,0.256667,0.303333,0.733333,0.863333,,0.423333,0.116667,0.05,0.013333,0.043333,,,0.106667,,,,,0.063333,0.123333,0.25,0.253333,0.1,0.07,0.03,0.026667,0.05,0.04,0.333333,0.313333,0.448333,0.746667,,0.73,0.34,,0.498333,0.498333,0.366667,0.07,0.5,1.0,0.82,0.423333,0.053333,0.793333,0.866667,0.376667,,,0.3,0.183333,0.33,0.07,0.203333,,,,,,,,,,,,0.606667,,,,,,,,,,,,,0.103333,,,,,,,,,,,,,,,,,,,,0.04,0.64,0.956667,,,,,,
9,1962-01-31,100136201,0.051333,0.053333,,,,,,,0.21,,,,,0.83,0.27,0.733333,,0.976667,0.273333,,,,0.24,0.24,0.993333,0.753333,0.53,0.98,0.833333,0.033333,0.106667,0.913333,0.383333,0.443333,0.223333,0.656667,0.733333,0.833333,0.763333,0.846667,,,0.063333,0.291667,0.295,0.663333,0.72,0.383333,0.583333,0.653333,0.74,0.653333,0.273333,0.683333,,0.423333,0.273333,0.29,0.643333,0.723333,,,0.256667,,,,,0.74,0.693333,0.593333,0.5,0.156667,0.153333,0.706667,0.693333,0.696667,0.646667,0.743333,0.753333,0.161667,0.206667,,0.873333,0.326667,,0.498333,0.498333,0.453333,0.498333,0.5,1.0,0.39,0.156667,0.2,0.136667,0.5,0.783333,,,0.76,0.31,0.163333,0.233333,0.44,,,,,,,,,,,,0.13,,,,,,,,,,,,,0.08,,,,,,,,,,,,,,,,,,,,0.653333,0.406667,0.736667,,,,,,


In [3]:
pd.set_option('display.max_columns', None)
df_ipca.columns[df_ipca.isna().any(axis=0)]

Index(['ret_local', 'ret_exc', 'niq_su', 'ret_6_1', 'ret_12_1', 'saleq_su',
       'ni_inc8q', 'prc_highprc_252d', 'resff3_6_1', 'resff3_12_1',
       'ret_60_12', 'div12m_me', 'eqpo_me', 'eqnpo_me', 'chcsho_12m',
       'eqnpo_12m', 'sti_gr1a', 'netis_at', 'eqnetis_at', 'niq_be',
       'niq_be_chg1', 'niq_at', 'niq_at_chg1', 'z_score', 'saleq_gr1',
       'kz_index', 'ocfq_saleq_std', 'aliq_mat', 'seas_1_1an', 'seas_1_1na',
       'seas_2_5an', 'seas_2_5na', 'seas_6_10an', 'seas_6_10na',
       'seas_11_15an', 'seas_11_15na', 'seas_16_20an', 'seas_16_20na',
       'ivol_ff3_21d', 'ivol_capm_252d', 'ivol_capm_21d', 'ivol_hxz4_21d',
       'rvol_21d', 'beta_60m', 'betabab_1260d', 'beta_dimson_21d',
       'turnover_126d', 'turnover_var_126d', 'dolvol_126d', 'dolvol_var_126d',
       'ami_126d', 'zero_trades_21d', 'zero_trades_126d', 'zero_trades_252d',
       'rmax1_21d', 'rskew_21d', 'iskew_capm_21d', 'iskew_ff3_21d',
       'iskew_hxz4_21d', 'coskew_21d', 'ret_1_0', 'betadown_252d',


## IPCA Parameters

In [3]:
from logger import ErrorLogger
from tqdm import tqdm
import os
from datetime import datetime

In [4]:
window_size = 240 #rolling window size
K = 6 #num of principle components
unique_dates = sorted(df_ipca['eom'].unique()) #unique dates
T = len(unique_dates)

current_date = datetime.now().strftime('%Y-%m-%d')
# log_fp = "logs/"+f"{current_date}-w{window_size}-log-error.txt"
res_fp = "results/"+f"{current_date}-w{window_size}-results.csv"
logger = ErrorLogger() #each node/window has separate log file

print (f'There are {T} unique days starting with {unique_dates[0]}')
print (f'Total {len(characteristics)} charateristics starting with {characteristics[0]}')

There are 745 unique days starting with 1962-01-31
Total 153 charateristics starting with niq_su


In [5]:
problem_date = '1992-11-30'
for t in range(window_size, T):
    if unique_dates[t] == problem_date:
        print (t)
        break  

370


In [7]:
np.random.seed(102)

t = 370
K = 6
    
window_dates = unique_dates[t-window_size:t]
window_data = df_ipca[df_ipca['eom'].isin(window_dates)]
date_to_predict = unique_dates[t]

print (f'======Progress: {t}: {date_to_predict}======')

# calculate ipca
try:
    Gamma, Factors, r_t, excess_r_t, X_last = IPCA_factor(window_data, characteristics, K)
except Exception as e:
    raise

# regularization
V_t = inv(Gamma.T @ X_last.T @ X_last @ Gamma) @ Gamma.T @ X_last.T
reg_mat = np.zeros_like(V_t)
reg_mat[:K, :K] = np.eye(K)*1e-05
V_t += reg_mat

Sigma_t= np.cov(Factors, rowvar = True)
mu_t = np.array(np.mean(Factors, axis=1))

# Grid search? grid search for regularization terms
g1 = np.exp(np.linspace( np.log(1e-6),np.log(5),10))
g2 = np.exp(np.linspace( np.log(1e-6),np.log(5),10))

OptimalPortfolioWeights_t = pyport.portfolio_optimization(
    meanVec=np.array(mu_t),
    sigMat=np.array(Sigma_t),
    retTarget=0,
    longShort=0.2,
    maxAlloc=0.08, #
    lambda_l1=g1[0],
    lambda_l2=g2[0],
    riskfree=0,
    assetsOrder=None,
    maxShar=1,
    factor=np.array(V_t.T)[:,0:K+1],
    turnover=None,
    w_pre=None,
    individual=False,
    exposure_constrain=0,
    w_bench=None,
    factor_exposure_constrain=None,
    U_factor=None,
    general_linear_constrain=None,
    U_genlinear=0,
    w_general=None,
    TE_constrain=0,
    general_quad=0,
    Q_w=None,
    Q_b=None,
    Q_bench=None
)[0].reshape(-1, 1)

ret_t = (V_t @ r_t).T @ OptimalPortfolioWeights_t

w_individual = np.dot(np.array(V_t.T), OptimalPortfolioWeights_t).flatten()

df_weights = pd.DataFrame({
        'ID': X_last.index,
        'Weights': w_individual})
df_weights.to_csv("test.csv", index=False)

print (f'======Max_return: {t}: {ret_t}======')      
           
    



[=                                                                       ]   2%

The panel dimensions are:
n_samples: 6449 , L: 122 , T: 239




Step 1 - Aggregate Update: 1.4614621545791793
Step 2 - Aggregate Update: 0.42061623755485744
Step 3 - Aggregate Update: 0.5107371831299585
Step 4 - Aggregate Update: 0.027210795502617728
Step 5 - Aggregate Update: 0.02322222183731329
Step 6 - Aggregate Update: 0.02439563986542162
Step 7 - Aggregate Update: 0.020079973248044045
Step 8 - Aggregate Update: 0.015556375882053955
Step 9 - Aggregate Update: 0.012191570652635701
Step 10 - Aggregate Update: 0.009901453029899872
Step 11 - Aggregate Update: 0.008328379592647608
Step 12 - Aggregate Update: 0.0071797176145405005
Step 13 - Aggregate Update: 0.006270961072018383
Step 14 - Aggregate Update: 0.005499537690650055
Step 15 - Aggregate Update: 0.004814090901833589
Step 16 - Aggregate Update: 0.004192063375817002
Step 17 - Aggregate Update: 0.0036254445926941674
Step 18 - Aggregate Update: 0.003112404693915716
Step 19 - Aggregate Update: 0.0026527878063877194
Step 20 - Aggregate Update: 0.0022459924136628293
Step 21 - Aggregate Update: 0.00

KeyboardInterrupt: 

In [None]:
res = pd.read_csv('/gpfs/home/zilinchen/ipca_factor_portfolio/results/2024-03-31-w240-results.csv')
np.mean(res['P_Excess_Return'])/np.std(res['P_Return'])*np.sqrt(12)

In [None]:

#2.0692294717674766