In [1]:
import numpy as np
from jaqs.data import DataView
from jaqs.research import SignalDigger
import warnings

warnings.filterwarnings("ignore")
dataview_folder = 'stockdata/hs300'
dv = DataView()
dv.load_dataview(dataview_folder)

  from pandas.core import datetools


Dataview loaded successfully.


In [2]:
def mask_index_member():
    df_index_member = dv.get_ts('index_member')
    mask_index_member = ~(df_index_member ==0) #定义信号过滤条件-非指数成分
    return mask_index_member

def limit_up_down():
    # 定义可买卖条件——未停牌、未涨跌停
    trade_status = dv.get_ts('trade_status')
    mask_sus = trade_status == u'停牌'
    # 涨停
    dv.add_formula('up_limit', '(close - Delay(close, 1)) / Delay(close, 1) > 0.095', is_quarterly=False)
    # 跌停
    dv.add_formula('down_limit', '(close - Delay(close, 1)) / Delay(close, 1) < -0.095', is_quarterly=False)
    can_enter = np.logical_and(dv.get_ts('up_limit') < 1, ~mask_sus) # 未涨停未停牌
    can_exit = np.logical_and(dv.get_ts('down_limit') < 1, ~mask_sus) # 未跌停未停牌
    return can_enter,can_exit

In [3]:
mask = mask_index_member()
can_enter,can_exit = limit_up_down()

In [4]:
from jaqs.research import Optimizer

In [5]:
# 优化因子
price = dv.get_ts('close_adj')
price_bench = dv.data_benchmark
optimizer = Optimizer(dataview=dv,
                      formula='- Correlation(vwap_adj, volume, LEN)',
                      params={"LEN":range(2,4,1)},
                      name='divert',
                      price=price,
                      benchmark_price=None,#=None求绝对收益 #=price_bench求相对收益
                      period=5,
                      n_quantiles=5,
                      mask=mask,
                      can_enter=can_enter,
                      can_exit=can_exit,
                      commission=0.0008,#手续费 默认0.0008
                      is_event=False,#是否是事件(0/1因子)
                      is_quarterly=False)#是否是季度因子 默认为False

In [6]:
ret_best = optimizer.enumerate_optimizer(target_type="long_ret",#优化目标类型 
    #ic/long_ret/short_ret/long_short_ret/top_quantile_ret/bottom_quantile_ret/tmb_ret
                                     target="Ann. IR",#优化目标 
#ic的 "IC Mean", "IC Std.", "t-stat(IC)", "p-value(IC)", "IC Skew", "IC Kurtosis", "Ann. IR",
#收益类的 't-stat', "p-value", "skewness", "kurtosis", "Ann. Ret", "Ann. Vol", "Ann. IR", "occurance"
                                     in_sample_range=[20170101,20170701],#样本内范围 默认为None,在全样本上优化
                                     ascending=False)#是否按优化目标升序排列(从小到大)

Nan Data Count (should be zero) : 0;  Percentage of effective data: 23%
Nan Data Count (should be zero) : 0;  Percentage of effective data: 24%


In [7]:
print(ret_best[0]["signal_name"])
print(ret_best[0]["ret"])
print(ret_best[0]["ic"])

divert{'LEN': 3}
             long_ret   short_ret  long_short_ret  top_quantile_ret  \
t-stat       2.169635   -0.154924        0.858689          4.371540   
p-value      0.032040    0.877150        0.392250          0.000010   
skewness    -0.369824    0.458860        0.400939          1.421650   
kurtosis    -0.172374   -0.073033        0.460765         14.360039   
Ann. Ret     0.201382   -0.015089        0.035861          0.199790   
Ann. Vol     0.144928    0.152079        0.065208          0.329971   
Ann. IR      1.389531   -0.099220        0.549943          0.605478   
occurance  119.000000  119.000000      119.000000       2524.000000   

           bottom_quantile_ret     tmb_ret  
t-stat               -0.117371    2.892489  
p-value               0.906580    0.004550  
skewness              1.950315   -0.582227  
kurtosis             21.141576    2.049555  
Ann. Ret             -0.005898    0.205738  
Ann. Vol              0.369856    0.111061  
Ann. IR              -0.0159

In [8]:
ic_best = optimizer.enumerate_optimizer(target_type="ic",#优化目标类型
                                        target = "IC Mean",
                                        in_sample_range=None,
                                        ascending=False)
                                        

In [9]:
print(ic_best[0]["signal_name"])
print(ic_best[0]["ret"])
print(ic_best[0]["ic"])

divert{'LEN': 3}
             long_ret   short_ret  long_short_ret  top_quantile_ret  \
t-stat       1.752927    0.457018        1.111232          5.066922   
p-value      0.080250    0.647860        0.267020          0.000000   
skewness    -1.125253    0.715259       -0.713462          0.599139   
kurtosis     7.251679    4.555108       12.617046          6.543380   
Ann. Ret     0.133120    0.031888        0.048437          0.135169   
Ann. Vol     0.240644    0.221100        0.138125          0.372362   
Ann. IR      0.553182    0.144224        0.350679          0.363004   
occurance  487.000000  487.000000      487.000000       9431.000000   

           bottom_quantile_ret     tmb_ret  
t-stat               -1.922711    4.503325  
p-value               0.054550    0.000010  
skewness              0.798446    0.341901  
kurtosis              7.705257    4.544866  
Ann. Ret             -0.052583    0.194061  
Ann. Vol              0.389748    0.136553  
Ann. IR              -0.1349

## 自定义Signal优化

In [10]:
close = price.dropna(how='all', axis=1)

In [11]:
import pandas as pd
import talib as ta

signals = dict()
for param in [2,3,4,5]:
    slope_df = pd.DataFrame({name: -ta.LINEARREG_SLOPE(value.values, param) for name, value in close.iteritems()}, index=close.index)
    dv.append_df(slope_df,'slope_param='+str(param))
    signals['slope_param='+str(param)] = optimizer.cal_signal(dv.get_ts('slope_param='+str(param)))
optimizer.all_signals = signals
optimizer.all_signals_perf = None

Nan Data Count (should be zero) : 0;  Percentage of effective data: 24%
Nan Data Count (should be zero) : 0;  Percentage of effective data: 24%
Nan Data Count (should be zero) : 0;  Percentage of effective data: 24%
Nan Data Count (should be zero) : 0;  Percentage of effective data: 24%


In [12]:
ic_best = optimizer.enumerate_optimizer(target_type="ic",#优化目标类型
                                        target = "IC Mean",
                                        in_sample_range=None,
                                        ascending=False)                                      

In [13]:
ic_best

[{'ic':                    ic
  IC Mean      0.036087
  IC Std.      0.163851
  t-stat(IC)   4.815237
  p-value(IC)  0.000002
  IC Skew      0.167275
  IC Kurtosis  0.206846
  Ann. IR      0.220244,
  'ret':              long_ret   short_ret  long_short_ret  top_quantile_ret  \
  t-stat       1.474180    0.110363        1.827880          3.590692   
  p-value      0.141090    0.912170        0.068190          0.000330   
  skewness     0.104087   -0.270758        0.639742          0.604468   
  kurtosis     2.316515    1.724054        3.300459          5.824864   
  Ann. Ret     0.130162    0.009663        0.097608          0.099809   
  Ann. Vol     0.278634    0.276303        0.168515          0.387294   
  Ann. IR      0.467143    0.034972        0.579224          0.257709   
  occurance  483.000000  483.000000      483.000000       9397.000000   
  
             bottom_quantile_ret     tmb_ret  
  t-stat                0.549364    2.079622  
  p-value               0.582770    0.03