In [2]:
import numpy as np
from jaqs.data import DataView
from jaqs.research import SignalDigger
import warnings

warnings.filterwarnings("ignore")
dataview_folder = 'JAQS_Data/hs300'
dv = DataView()
dv.load_dataview(dataview_folder)

OSError: ``JAQS_Data/hs300`` does not exist

In [None]:
def mask_index_member():
    df_index_member = dv.get_ts('index_member')
    mask_index_member = ~(df_index_member >0) #定义信号过滤条件-非指数成分
    return mask_index_member

def limit_up_down():
    # 定义可买卖条件——未停牌、未涨跌停
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == u'停牌'
    # 涨停
    dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False)
    # 跌停
    dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False)
    can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌
    can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌
    return can_enter,can_exit

In [3]:
mask = mask_index_member()
can_enter,can_exit = limit_up_down()

In [4]:
from jaqs.research import Optimizer

In [5]:
# 优化因子
price = dv.get_ts('close_adj')
high = dv.get_ts('high_adj')
low = dv.get_ts('low_adj')
price_bench = dv.data_benchmark
optimizer = Optimizer(dataview=dv,
                      formula='- Correlation(vwap_adj, volume, LEN)',
                      params={"LEN":range(2,4,1)},
                      name='divert',
                      price=price,
                      high=high,
                      low=low,
                      benchmark_price=None,#=None求绝对收益 #=price_bench求相对收益
                      period=5,
                      n_quantiles=5,
                      mask=mask,
                      can_enter=can_enter,
                      can_exit=can_exit,
                      commission=0.0008,#手续费 默认0.0008
                      is_event=False,#是否是事件(0/1因子)
                      is_quarterly=False)#是否是季度因子 默认为False

In [6]:
ret_best = optimizer.enumerate_optimizer(target_type="long_ret",#优化目标类型 
    #ic
    #long_ret/short_ret/long_short_ret/top_quantile_ret/bottom_quantile_ret/tmb_ret
    #long_space/short_space/long_short_space/top_quantile_space/bottom_quantile_space/tmb_space
                                     target="Ann. IR",#优化目标 
#ic的 "IC Mean", "IC Std.", "t-stat(IC)", "p-value(IC)", "IC Skew", "IC Kurtosis", "Ann. IR",
#收益类的 't-stat', "p-value", "skewness", "kurtosis", "Ann. Ret", "Ann. Vol", "Ann. IR", "occurance"
#空间类的'upside_space_mean','upside_space_std','upside_space_mean/std','upside_space_max','upside_space_min',
#'upside_space_percentile25','upside_space_percentile50','upside_space_percentile75','upside_space_occurance',
#'downside_space_mean','downside_space_std','downside_space_mean/std','downside_space_max',downside_space_min',
#'downside_space_percentile25','downside_space_percentile50','downside_space_percentile75',
#'downside_space_occurance','up&down_space_mean_sum',　    
                                     in_sample_range=[20170101,20170701],#样本内范围 默认为None,在全样本上优化
                                     ascending=False)#是否按优化目标升序排列(从小到大)

Nan Data Count (should be zero) : 0;  Percentage of effective data: 67%
Nan Data Count (should be zero) : 0;  Percentage of effective data: 68%


In [7]:
print(ret_best[0]["signal_name"])
print(ret_best[0]["ret"])
print(ret_best[0]["ic"])
print(ret_best[0]["space"])

divert{'LEN': 3}
             long_ret   short_ret  long_short_ret  top_quantile_ret  \
t-stat       1.651698    0.127259        0.551727          6.409743   
p-value      0.101250    0.898950        0.582180          0.000000   
skewness    -0.347678    0.347192        0.234277          0.684339   
kurtosis    -0.297840    0.438304        2.124405         14.505309   
Ann. Ret     0.114892    0.009147        0.016518          0.148677   
Ann. Vol     0.108612    0.112229        0.046747          0.272277   
Ann. IR      1.057822    0.081502        0.353350          0.546051   
occurance  119.000000  119.000000      119.000000       6670.000000   

           bottom_quantile_ret     tmb_ret  
t-stat               -0.075431    3.432355  
p-value               0.939870    0.000830  
skewness              1.577147   -0.159136  
kurtosis             26.451335    0.969194  
Ann. Ret             -0.001845    0.149774  
Ann. Vol              0.289122    0.068134  
Ann. IR              -0.0063

In [8]:
ic_best = optimizer.enumerate_optimizer(target_type="ic",#优化目标类型
                                        target = "IC Mean",
                                        in_sample_range=None,
                                        ascending=False)
                                        

In [9]:
print(ic_best[0]["signal_name"])
print(ic_best[0]["ret"])
print(ic_best[0]["ic"])

divert{'LEN': 3}
             long_ret   short_ret  long_short_ret  top_quantile_ret  \
t-stat       0.897656    0.718618        1.374921          5.923684   
p-value      0.369810    0.472720        0.169790          0.000000   
skewness    -1.143842    0.914871        0.045719          0.308722   
kurtosis     4.949478    5.284813        9.441826          6.345131   
Ann. Ret     0.047686    0.041418        0.050968          0.074346   
Ann. Vol     0.168335    0.182634        0.117468          0.298537   
Ann. IR      0.283279    0.226779        0.433893          0.249034   
occurance  487.000000  487.000000      487.000000      27386.000000   

           bottom_quantile_ret     tmb_ret  
t-stat               -3.405724    5.063616  
p-value               0.000660    0.000000  
skewness              0.591349    0.124428  
kurtosis              7.735980    0.595633  
Ann. Ret             -0.046545    0.120775  
Ann. Vol              0.327406    0.075581  
Ann. IR              -0.1421

## 自定义Signal优化

In [10]:
close = price.dropna(how='all', axis=1)

In [11]:
import pandas as pd
import talib as ta

signals = dict()
for param in [2,3,4,5]:
    slope_df = pd.DataFrame({name: -ta.LINEARREG_SLOPE(value.values, param) for name, value in close.iteritems()}, index=close.index)
    dv.append_df(slope_df,'slope_param='+str(param))
    signals['slope_param='+str(param)] = optimizer.cal_signal(dv.get_ts('slope_param='+str(param)))
optimizer.all_signals = signals

Nan Data Count (should be zero) : 0;  Percentage of effective data: 24%
Nan Data Count (should be zero) : 0;  Percentage of effective data: 24%
Nan Data Count (should be zero) : 0;  Percentage of effective data: 24%
Nan Data Count (should be zero) : 0;  Percentage of effective data: 24%


In [15]:
ic_best = optimizer.enumerate_optimizer(target_type="long_ret",#优化目标类型
                                        target = "Ann. Ret",
                                        in_sample_range=None,
                                        ascending=False)                                      

In [17]:
ic_best[0]

{'ic':                    ic
 IC Mean      0.036087
 IC Std.      0.163851
 t-stat(IC)   4.815237
 p-value(IC)  0.000002
 IC Skew      0.167275
 IC Kurtosis  0.206846
 Ann. IR      0.220244,
 'ret':              long_ret   short_ret  long_short_ret  top_quantile_ret  \
 t-stat       1.474180    0.110363        1.827880          3.590692   
 p-value      0.141090    0.912170        0.068190          0.000330   
 skewness     0.104087   -0.270758        0.639742          0.604468   
 kurtosis     2.316515    1.724054        3.300459          5.824864   
 Ann. Ret     0.130162    0.009663        0.097608          0.099809   
 Ann. Vol     0.278634    0.276303        0.168515          0.387294   
 Ann. IR      0.467143    0.034972        0.579224          0.257709   
 occurance  483.000000  483.000000      483.000000       9397.000000   
 
            bottom_quantile_ret     tmb_ret  
 t-stat                0.549364    2.079622  
 p-value               0.582770    0.038090  
 skewness     