In [1]:
import pandas as pd
import os
import sys
sys.path.insert(0, "../")
from factor_cal.config_loader import basic_config as cfg
from factor_cal.feature import features as fe
from factor_cal.factor import factors as fa
from factor_cal.utils import tools as tl
from factor_cal.factor import factor_func as ff

In [10]:
# read config file
config = cfg.BasicConfig('config/config.yml')
fac_names = []
for facType in config['factors']:
    fac_names += list(config['factors'][facType].keys())
    
features = fe.Features(config)
factors = fa.Factors(config, features)

In [11]:
# ic_results = pd.DataFrame({c: pd.Series(dtype=t) for c, t in {'IC Mean': 'float', 'IC Std.': 'float', 'Risk-Adjusted IC': 'float', 'IC_win rate': 'float'}.items()})

In [12]:
fac_names

['close_ret',
 'td_pv_corr_1min',
 'td_pv_corr',
 'ret_v_prod_5min',
 'ret_v_prod_1min',
 'td_ret_v_prod_5min',
 'td_ret_v_prod_1min',
 'td_p_v_ratio_3s_1min',
 'td_p_v_ratio_3s_5min',
 'close_ret_15min',
 'close_ret_5min',
 'close_ret_1min',
 'ohlc_rat',
 'clh',
 'clh_delta',
 'td_buy_rank',
 'td_sell_rank',
 'close_adjusted',
 'ret_skew',
 'ret_kurt',
 'retail_dir_tsrank_10min',
 'retail_dir_rowrank',
 'OB_price_2derivative',
 'OB_price_2derivative_tsrank',
 'wb1_tsrank',
 'wb5_tsrank',
 'wb10_tsrank',
 'td_v_ratio',
 'en_b_p10_tsrank',
 'en_s_p10_tsrank',
 'en_b_ret10_tsrank',
 'en_s_ret10_tsrank',
 'en_b_p5_tsrank',
 'en_s_p5_tsrank',
 'en_b_ret5_tsrank',
 'en_s_ret5_tsrank',
 'bs_v1_tsrank',
 'bs_v5_tsrank',
 'bs_pv1_tsrank',
 'bs_pv5_tsrank',
 'en_bs_price_diff',
 'en_b_price_tsrank',
 'en_s_price_tsrank',
 'en_b_sumprice_tsrank',
 'en_s_sumprice_tsrank',
 'ct_b_med_tsrank',
 'ct_s_med_tsrank',
 'can_en_v_ratio',
 'can_en_v_ratio_fillna']

In [20]:
base_dir = r'/home/wangzirui/workspace/factor_ic_summary/factor_comb_top_n/OB_price'

ic_results = None
pred_type = '1m'
for fac_name in fac_names:
    try:
        cur_res = pd.read_csv(os.path.join(base_dir, f'{fac_name}.csv'), index_col=0)
    except FileNotFoundError as e:
        print(e)
        continue
    cur_res = cur_res.loc[pred_type, ['IC Mean', 'IC Std.', 'Risk-Adjusted IC', 'IC win rate']]
    cur_df = pd.DataFrame(cur_res).T
    cur_df['factor'] = fac_name
    cur_df['formula'] = str(factors.fac_dict.get(fac_name))
    cur_df['description'] = factors.fac_dict.get(fac_name).desc
    ic_results = pd.concat([ic_results, cur_df], axis=0, ignore_index=True)

In [21]:
ic_results

Unnamed: 0,IC Mean,IC Std.,Risk-Adjusted IC,IC win rate,factor,formula,description
0,0.024624,0.023549,1.045641,0.86982,close_ret,"ret(close, shift=1)",收益率
1,0.02086,0.03264,0.639085,0.777084,td_pv_corr_1min,"ts_correlation(ffill_na(vwap), vol, window=20)",价量之间相关性
2,0.026429,0.029506,0.895731,0.848367,td_pv_corr,"ts_correlation(ffill_na(vwap), vol)",价量之间相关性
3,0.02628,0.051081,0.514469,0.703218,ret_v_prod_5min,"ts_rank(multiply(ret(close), vol), window=5*20)",收益率和成交量关系
4,0.02628,0.051081,0.514469,0.703218,ret_v_prod_1min,"ts_rank(multiply(ret(close), vol), window=1*20)",收益率和成交量关系
5,0.023899,0.039331,0.607647,0.73725,td_ret_v_prod_5min,"ts_rank(multiply(ret(ffill_na(vwap)), vol), wi...",加权成交价收益和成交量关系
6,0.014072,0.033134,0.424698,0.665188,td_ret_v_prod_1min,"ts_rank(multiply(ret(ffill_na(vwap)), vol), wi...",加权成交价收益和成交量关系
7,0.012682,0.032437,0.390967,0.649878,td_p_v_ratio_3s_1min,"ts_rank(fill_na(divide(delta(ffill_na(vwap), s...",加权成交价变动与成交量比值
8,0.022657,0.038842,0.583318,0.730327,td_p_v_ratio_3s_5min,"ts_rank(fill_na(divide(delta(ffill_na(vwap), s...",加权成交价变动与成交量比值
9,-0.018738,0.097505,-0.192176,0.373428,close_ret_15min,"ret(close, shift=15*20)",收益率


In [22]:
xlsx_file = os.path.join(base_dir, f'ic_summary_{pred_type}.xlsx')
ic_results.to_excel(xlsx_file, index=False)

In [23]:
from openpyxl import load_workbook
from openpyxl.styles import Font

df = pd.read_excel(xlsx_file, sheet_name='Sheet1')
book = load_workbook(xlsx_file)
sheet = book['Sheet1']

In [24]:
class Condition:
    def __init__(self, name, value, func):
        self._name = name
        self._value = value
        self._func = func
        
    @property
    def name(self):
        return self._name
    
    @property
    def value(self):
        return self._value
    
    @value.setter
    def value(self, value):
        self._value = value
    
    @property
    def index(self):
        return self._index
    
    @index.setter
    def index(self, index):
        self._index = index
    
    def is_satisfied(self, value):
        return self._func(value, self._value)

def abs_cond(value, threshold):
    return abs(value) > threshold

def win_rate_cond(value, threshold):
    return abs(value-0.5) > threshold

def determine_index(dic, conditions, name, index):
    for condition in conditions:
        if name == condition._name:
            condition.index = index
            dic[index] = condition

fac_index = None
satisfied_factors = []
conditions = [Condition('IC Mean', 0.03, abs_cond), Condition('Risk-Adjusted IC', 1.5, abs_cond), Condition('IC win rate', 0.3, win_rate_cond)]
cond_ind_dict = {}
for i, row in enumerate(sheet.iter_rows()):
    if i == 0:
        for j, cell in enumerate(row):
            if cell.value == 'factor':
                fac_index = j
                continue
            determine_index(cond_ind_dict, conditions, cell.value, j)
    else:
        satisfied_cond_num = 0
        factor_name = None
        for j, cell in enumerate(row):
            if j == fac_index:
                factor_name = cell.value
                continue
            if j in cond_ind_dict.keys():
                condition = cond_ind_dict[j]
                if condition.is_satisfied(cell.value):
                    satisfied_cond_num += 1
                    cell.font = Font(color='FF0000')
            
        if satisfied_cond_num >= 2:
            satisfied_factors.append(factor_name)
            
            
xlsx_file = os.path.join(base_dir, f'ic_summary_{pred_type}_modified.xlsx')                  
book.save(xlsx_file)

In [None]:
import yaml
yaml.dump(satisfied_factors, open(os.path.join(base_dir, f'satisfied_factors_{pred_type}.yml'), 'w'))

In [None]:
pred_type='1m'
base_dir = r'C:\Users\12552\Downloads\ic_summary\factor_ic_summary'
factor_filepath = os.path.join(base_dir, f'satisfied_factors_{pred_type}.yml')
with open(factor_filepath, 'r') as f:
    factor_names = yaml.load(f, Loader=yaml.FullLoader)

In [None]:
factor_names