## **Python for Quantitative Trading: Futures**
### **Integrated CTA Strategies Episode 5 - Cross-variety Factor Backtesting & Statistical Arbitrage**

In [1]:
import numpy as np
import pandas as pd
import math
import gzip
import _pickle as cPickle
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from datetime import datetime

import os
from helper import *
from stats import *
from product_info import *
import helper
import stats
import dask
from dask import compute, delayed


plt.rcParams['font.family'] = ['sans-serif']
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签`
plt.rcParams['axes.unicode_minus'] = False

%config InlineBackend.figure_format = 'svg' #在notebook中可以更好的显示，svg输出是一种向量化格式，缩放网页并不会导致图片失真。这行代码似乎只用在ipynb文件中才能使用。

%matplotlib inline 

import warnings
warnings.filterwarnings('ignore')  # 忽略一些warnings

# This allows multiple outputs from a single jupyter notebook cell:
# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"

pd.set_option('expand_frame_repr', False)  
pd.set_option("display.max_rows", 500) 
pd.set_option('display.max_columns', None) #防止column太多中间变成省略号


  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,
  from pandas import (to_datetime, Int64Index, DatetimeIndex, Period,


In [5]:
from imp import reload  
reload(helper)
reload(stats)

<module 'stats' from 'e:\\My_Strategies_CTA\\stats.py'>

In [6]:
HEAD_PATH = r"E:\My_Strategies_CTA" 
DATA_PATH = HEAD_PATH + "\\pkl_tick" 
NIGHT_PATH = HEAD_PATH + "\\night_pkl_tick\\"
SAVE_PATH = r"E:\My_Strategies_CTA"  #保存计算的因子数据的地方
SIGNAL_PATH = r"E:\My_Strategies_CTA" 
CORE_NUM = int(os.environ['NUMBER_OF_PROCESSORS'])
CORE_NUM

16

In [7]:
product_list = ["rb", "hc", "i", "j", "jm"]
product = product_list[0] #随便取一个商品期货，可以更改
print(product)
all_dates = os.listdir(DATA_PATH + "\\" + product)
all_dates.sort()
n_days = len(all_dates)
print(len(all_dates), all_dates[:6])

rb
1067 ['20170103.pkl', '20170104.pkl', '20170105.pkl', '20170106.pkl', '20170109.pkl', '20170110.pkl']


In [8]:
period = 4096

# train and test
train_sample = np.array(all_dates) < "201901"
test_sample = np.array(all_dates) > "201901"

# 记录时间的字符串和时间格式
date_str = [n[0:8] for n in all_dates]
format_dates = np.array([pd.to_datetime(d) for d in date_str])


In [9]:
os.makedirs(HEAD_PATH+"\\all_signals", exist_ok=True)
dire_signal_list = ["trade.imb", "total.trade.imb", "nr", "dbook", "range.pos", "price.osci", "ma.dif.10", "kdj.k", "kdj.j"]
range_signal_list = ["", "range", "std", "volume.open.ratio", "trend.index"]
all_signal_list = np.array([])
for range_signal in range_signal_list:
    for dire_signal in dire_signal_list:
        if len(range_signal)==0:
            signal_name = dire_signal
        else:
            signal_name = dire_signal+"."+range_signal
        all_signal_list = np.append(all_signal_list,signal_name)
len(all_signal_list)
all_period_signal = [signal+".4096" for signal in all_signal_list]

In [10]:
product_x = "rb"
product_y = "hc"
data_x = load(HEAD_PATH+"\\pkl_tick\\"+product_x+"\\"+all_dates[100])
data_y = load(HEAD_PATH+"\\pkl_tick\\"+product_y+"\\"+all_dates[100])

In [11]:
data_x["date.time"][0:10]

0    2017-06-05 20:59:00.500
1    2017-06-05 21:00:00.500
2    2017-06-05 21:00:01.000
3    2017-06-05 21:00:01.500
4    2017-06-05 21:00:02.000
5    2017-06-05 21:00:02.500
6    2017-06-05 21:00:03.000
7    2017-06-05 21:00:03.500
8    2017-06-05 21:00:04.000
9    2017-06-05 21:00:04.500
Name: date.time, dtype: object

In [9]:
data_y["date.time"][0:10]

0    2017-06-05 20:59:00.500
1    2017-06-05 21:00:00.500
2    2017-06-05 21:00:01.000
3    2017-06-05 21:00:01.500
4    2017-06-05 21:00:02.000
5    2017-06-05 21:00:02.500
6    2017-06-05 21:00:03.000
7    2017-06-05 21:00:03.500
8    2017-06-05 21:00:04.000
9    2017-06-05 21:00:04.500
Name: date.time, dtype: object

In [11]:
data_y["date.time"]

0        2017-06-05 20:59:00.500
1        2017-06-05 21:00:00.500
2        2017-06-05 21:00:01.000
3        2017-06-05 21:00:01.500
4        2017-06-05 21:00:02.000
                  ...           
75483    2017-06-07 14:59:58.500
75484    2017-06-07 14:59:59.000
75485    2017-06-07 14:59:59.500
75486    2017-06-07 15:00:00.000
75487    2017-06-07 15:00:00.500
Name: date.time, Length: 75488, dtype: object

In [12]:
os.makedirs(HEAD_PATH+"\\comb_time", exist_ok=True)
os.makedirs(HEAD_PATH+"\\comb_time\\rb_hc", exist_ok=True)

In [10]:
# 使用计数器Counter
from collections import Counter
def get_shfe_comb_pos(date, product_x, product_y, HEAD_PATH):
    data_x = load(HEAD_PATH+"\\pkl_tick\\"+product_x+"\\"+date)
    data_y = load(HEAD_PATH+"\\pkl_tick\\"+product_y+"\\"+date)
    comb_time = np.intersect1d(data_x["date.time"], data_y["date.time"])
    comb_time_set = set(comb_time)
    time_x = [element in comb_time_set for element in data_x["date.time"]]
    time_y = [element in comb_time_set for element in data_y["date.time"]]
    if len(comb_time_set)!=len(time_x) or len(comb_time_set)!=len(time_y):
        count_x = Counter(data_x["date.time"])
        count_y = Counter(data_y["date.time"])
        # 不是很理解这里用个counter有什么意义，那些时间点都只出现过一次…… good_x也就是x中的时间点同时出现在y中的时间点，不知道为什么用个counter秀一波
        good_x = [count_x[x]==1 and count_y[x]==1 for x in data_x["date.time"]]
        good_y = [count_x[x]==1 and count_y[x]==1 for x in data_y["date.time"]]
        time_x = (time_x and good_x)
        time_y = (time_y and good_y)
    save([time_x, time_y], HEAD_PATH+"\\comb_time\\"+product_x+"_"+product_y+"\\"+date)

In [14]:
%%time
result = parLapply(CORE_NUM, all_dates, get_shfe_comb_pos, product_x=product_x, product_y=product_y, HEAD_PATH=HEAD_PATH)

Wall time: 1min 46s


In [11]:
reverse = 1
date = "20200804.pkl"
data_x = load(HEAD_PATH+"\\pkl_tick\\"+product_x+"\\"+date)
data_y = load(HEAD_PATH+"\\pkl_tick\\"+product_y+"\\"+date)
comb_time = np.intersect1d(data_x["date.time"], data_y["date.time"])
comb_time_set = set(comb_time)
time_x = [element in comb_time_set for element in data_x["date.time"]]
time_y = [element in comb_time_set for element in data_y["date.time"]]
file = "20200804.pkl"
good_x = load(SAVE_PATH+"\\good_pkl\\"+product_x+"\\"+file)
print(HEAD_PATH+"\\pkl_tick\\"+product_y+"\\"+file)
data = load(HEAD_PATH+"\\pkl_tick\\"+product_y+"\\"+file)
[time_x, time_y] = load(HEAD_PATH+"\\comb_time\\"+product_x+"_"+product_y+ "\\"+file)
S = load(SAVE_PATH+"\\factor_pkl\\"+product_x+"\\"+signal_name+".4096" +"\\"+file)
pred = S*reverse
pred = pred[time_x]
atr = load(SAVE_PATH+"\\factor_pkl\\"+product_x+"\\"+"atr.4096"+"\\"+file)
atr = atr[time_x].reset_index(drop=True)
data = data[time_y].reset_index(drop=True)
#atr = atr[data["good"]].reset_index(drop=True)
#pred = pred[data["good"]]
#data = data[data["good"]].reset_index(drop=True)    

D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200804.pkl


In [12]:
data_y[data_y["good"]]

Unnamed: 0,contract,date.time,price,cum.open.int,open.int,turnover,qty,bid,ask,bid.qty,ask.qty,date,time,milli,good,mid.price,buy.trade,buy2.trade,sell.trade,sell2.trade,log.price,ret,wpr,wpr.ret,next.ask,next.bid,min.1024,max.1024,min.2048,max.2048,min.4096,max.4096
9620,hc2010,2020-08-04 09:00:00.500,3915.0,447481,4,548040.0,14,3914.0,3915.0,203,29,20200804,09:00:00,0,True,3914.5,8.0,0.0,6.0,0.0,8.272539,-0.000031,3914.875000,-0.120327,3916.0,3915.0,3910.527397,3919.29661,3906.402778,3919.29661,3902.181818,3919.29661
9621,hc2010,2020-08-04 09:00:01.000,3915.0,447500,19,1644350.0,42,3915.0,3916.0,55,31,20200804,09:00:01,0,True,3915.5,37.0,5.0,0.0,0.0,8.272734,0.000195,3915.639535,0.764535,3916.0,3914.0,3910.527397,3919.29661,3906.402778,3919.29661,3902.181818,3919.29661
9622,hc2010,2020-08-04 09:00:01.500,3916.0,447558,58,3523530.0,90,3914.0,3916.0,253,58,20200804,09:00:01,0,True,3915.0,3.0,0.0,87.0,0.0,8.272731,-0.000003,3915.627010,-0.012525,3916.0,3915.0,3910.527397,3919.29661,3906.402778,3919.29661,3902.181818,3919.29661
9623,hc2010,2020-08-04 09:00:02.000,3916.0,447571,13,1722820.0,44,3915.0,3916.0,1,26,20200804,09:00:02,0,True,3915.5,33.0,0.0,11.0,0.0,8.272580,-0.000151,3915.037037,-0.589973,3917.0,3916.0,3910.527397,3919.29661,3906.402778,3919.29661,3902.181818,3919.29661
9624,hc2010,2020-08-04 09:00:02.500,3917.0,447622,51,3014870.0,77,3916.0,3917.0,12,257,20200804,09:00:02,0,True,3916.5,32.0,0.0,45.0,0.0,8.272837,0.000257,3916.044610,1.007573,3916.0,3914.0,3910.527397,3919.29661,3906.402778,3919.29661,3902.181818,3919.29661
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37226,hc2010,2020-08-04 22:59:57.500,3917.0,443795,0,78340.0,2,3916.0,3917.0,19,22,20200804,22:59:57,0,True,3916.5,2.0,0.0,0.0,0.0,8.272944,-0.000012,3916.463415,-0.046789,3917.0,3916.0,3913.669811,3919.35255,3909.529412,3919.35255,3904.190476,3919.35255
37227,hc2010,2020-08-04 22:59:58.000,3917.0,443795,0,156680.0,4,3916.0,3917.0,13,18,20200804,22:59:58,0,True,3916.5,4.0,0.0,0.0,0.0,8.272933,-0.000011,3916.419355,-0.044060,3917.0,3916.0,3913.669811,3919.35255,3909.529412,3919.35255,3904.190476,3919.35255
37228,hc2010,2020-08-04 22:59:58.500,3917.0,443795,0,156680.0,4,3916.0,3917.0,13,14,20200804,22:59:58,0,True,3916.5,4.0,0.0,0.0,0.0,8.272949,0.000016,3916.481481,0.062127,3918.0,3915.0,3913.669811,3919.35255,3909.529412,3919.35255,3904.190476,3919.35255
37229,hc2010,2020-08-04 22:59:59.000,3918.0,443785,-10,665920.0,17,3915.0,3918.0,1,3,20200804,22:59:59,0,True,3916.5,14.0,3.0,0.0,0.0,8.272762,-0.000187,3915.750000,-0.731481,3918.0,3915.0,3913.669811,3919.35255,3909.529412,3919.35255,3904.190476,3919.35255


In [13]:
comb_time

array(['2020-08-03 20:59:00.500', '2020-08-03 21:00:00.500',
       '2020-08-03 21:00:01.000', ..., '2020-08-05 14:59:59.500',
       '2020-08-05 15:00:00.000', '2020-08-05 15:00:00.500'], dtype=object)

In [14]:
from collections import OrderedDict
def get_cross_signal_pnl(file, product_x, product_y, signal_name, thre_mat, reverse=1, tranct=1.1e-4, max_spread=0.61, 
                         tranct_ratio=True, HEAD_PATH=HEAD_PATH,SAVE_PATH=SAVE_PATH,
                   atr_filter=0, rebate=0):
    ## load data
    good_x = load(SAVE_PATH+"\\good_pkl\\"+product_x+"\\"+file)
    print(HEAD_PATH+"\\pkl_tick\\"+product_y+"\\"+file)
    data = load(HEAD_PATH+"\\pkl_tick\\"+product_y+"\\"+file)
    [time_x, time_y] = load(HEAD_PATH+"\\comb_time\\"+product_x+"_"+product_y+"\\"+file)
    S = load(SAVE_PATH+"\\factor_pkl\\"+product_x+"\\"+signal_name+"\\"+file)
    pred = S*reverse
    pred = pred[time_x]
    atr = load(SAVE_PATH+"\\factor_pkl\\"+product_x+"\\"+"atr.4096"+"\\"+file)
    atr = atr[time_x].reset_index(drop=True)
    data = data[time_y].reset_index(drop=True)
    atr = atr[data["good"]].reset_index(drop=True)
    pred = pred[data["good"]]
    data = data[data["good"]].reset_index(drop=True)    
    #atr = atr[time_x].reset_index(drop=True)
    #data = data[time_y].reset_index(drop=True)
    #n_bar = len(data)
    
    ## load signal
    
    ## we don't know the signal is positive correlated or negative correlated  
    #n_thre = len(thre_mat)
    result = pd.DataFrame(data=OrderedDict([("open", thre_mat["open"].values), ("close", thre_mat["close"].values),
                               ("num", 0), ("avg.pnl", 0), ("pnl", 0), ("avg.ret", 0), ("ret", 0)]), 
                          index=thre_mat.index)
    count = 0
    cur_spread = data["ask"]-data["bid"]
    for thre in thre_mat.iterrows():
        count = count+1
        buy = pred>thre[1]["open"]
        sell = pred<-thre[1]["open"]
        signal = pd.Series(data=0, index=data.index)
        position = signal.copy()
        signal[buy] = 1
        signal[sell] = -1
        signal[atr<atr_filter]=0
        scratch = -thre[1]["close"]
        position_pos = pd.Series(data=np.nan, index=data.index)
        position_pos.iloc[0] = 0
        position_pos[(signal==1) & (data["next.ask"]>0) & (data["next.bid"]>0) & (cur_spread<max_spread)] = 1
        position_pos[(pred< -scratch) & (data["next.bid"]>0) & (cur_spread<max_spread)] = 0
        position_pos.ffill(inplace=True)
        pre_pos = position_pos.shift(1)
        notional_position_pos = pd.Series(data=0, index=data.index)
        notional_position_pos[position_pos==1] = 1
        notional_position_pos[(position_pos==1) & (pre_pos==1)] = np.nan
        notional_position_pos[(notional_position_pos==1)] = 1/data["next.ask"][(notional_position_pos==1)]
        notional_position_pos.ffill(inplace=True)
        position_neg = pd.Series(data=np.nan, index=data.index)
        position_neg.iloc[0] = 0
        position_neg[(signal==-1) & (data["next.ask"]>0) & (data["next.bid"]>0) & (cur_spread<max_spread)] = -1
        position_neg[(pred> scratch) & (data["next.ask"]>0) & (cur_spread<max_spread)] = 0
        position_neg.ffill(inplace=True)
        pre_neg = position_neg.shift(1)
        notional_position_neg = pd.Series(data=0, index=data.index)
        notional_position_neg[position_neg==-1] = -1
        notional_position_neg[(position_neg==-1) & (pre_neg==-1)] = np.nan
        notional_position_neg[(notional_position_neg==-1)] = -1/data["next.bid"][(notional_position_neg==-1)]
        notional_position_neg.ffill(inplace=True)
        position = position_pos + position_neg
        notional_position = notional_position_pos+notional_position_neg
        #position[n_bar-1] = 0
        position.iloc[0] = 0
        position.iloc[-10:] = 0
        notional_position.iloc[0] = 0
        notional_position.iloc[-2:] = 0
        change_pos = position - position.shift(1)
        notional_change_pos = notional_position-notional_position.shift(1)
        change_pos.iloc[0] = 0
        notional_change_pos.iloc[0] = 0
        change_base = pd.Series(data=0, index=data.index)
        change_buy = change_pos>0
        change_sell = change_pos<0
        if (tranct_ratio):
            change_base[change_buy] = data["next.ask"][change_buy]*(1+tranct)
            change_base[change_sell] = data["next.bid"][change_sell]*(1-tranct)
        else:
            change_base[change_buy] = data["next.ask"][change_buy]+tranct
            change_base[change_sell] = data["next.bid"][change_sell]-tranct
        final_pnl = -sum(change_base*change_pos)
        ret = -sum(change_base*notional_change_pos)
        num = sum((position!=0) & (change_pos!=0))
        if num == 0:
            result.loc[thre[0], ("num", "avg.pnl", "pnl", "avg.ret", "ret")] = (0,0,0,0,0)
            return result
        else:
            avg_pnl = np.divide(final_pnl, num)
            avg_ret = np.divide(ret,num)
            result.loc[thre[0], ("num", "avg.pnl", "pnl", "avg.ret", "ret")] = (num, avg_pnl, final_pnl, avg_ret,ret)
    return result


In [28]:
split_str = "2019"
train_sample = np.array(all_dates) < split_str
test_sample = np.array(all_dates) > "20200804"
signal = "trade.imb"
signal_name = signal+"."+str(period)
all_signal = load(HEAD_PATH+"\\all_signals\\"+product_x+"."+signal_name+".pkl")
tranct = product_info[product_y]["tranct"]
tranct_ratio = product_info[product_y]["tranct.ratio"]
open_list = np.quantile(abs(all_signal), np.append(np.arange(0.991,0.999,0.001),np.arange(0.9991,0.9999,0.0001)))
thre_list = []
for cartesian in itertools.product(open_list, np.array([0.2, 0.4, 0.6, 0.8, 1.0])):
    thre_list.append((cartesian[0], -cartesian[0] * cartesian[1]))
thre_list = np.array(thre_list)
thre_mat = pd.DataFrame(data=OrderedDict([("open", thre_list[:, 0]), ("close", thre_list[:, 1])]))
reverse = 1
max_spread = 1.1
atr_filter = 0
#with dask.config.set(scheduler='processes', num_workers=CORE_NUM):
#    f_par = functools.partial(get_cross_signal_pnl, product_x=product_x, product_y=product_y, signal_name=signal_name, thre_mat=thre_mat,
#                              reverse=reverse, tranct=tranct, max_spread=max_spread, tranct_ratio=tranct_ratio, 
#                              HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH,
#                              atr_filter=atr_filter)
#    test_result = compute([delayed(f_par)(file) for file in all_dates[test_sample]])[0]
for date in np.array(all_dates)[test_sample]:
    get_cross_signal_pnl(date, product_x, product_y, signal_name, thre_mat, reverse, tranct, max_spread, tranct_ratio, 
                      HEAD_PATH,SAVE_PATH,atr_filter)

D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200804.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200805.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200806.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200807.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200810.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200811.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200812.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200813.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200814.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200817.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200818.pkl
D:\self_learning_python\self_learning\My_Strategies_CTA\pkl_tick\hc\20200819.pkl
D:\self_learning_python\self

In [15]:
from collections import OrderedDict
def get_cross_signal_stat(signal_name, thre_mat, product_x, product_y, all_dates, CORE_NUM, split_str="2018", 
                          reverse=1, tranct=1.1e-4, 
                            max_spread=0.61, tranct_ratio=True, min_pnl=2, min_num=20, HEAD_PATH=HEAD_PATH,SAVE_PATH=SAVE_PATH,
                          atr_filter=0):
    train_sample = np.array(all_dates) < split_str
    test_sample = np.array(all_dates) > split_str
    with dask.config.set(scheduler='processes', num_workers=CORE_NUM):
        f_par = functools.partial(get_cross_signal_pnl, product_x=product_x, product_y=product_y, signal_name=signal_name, thre_mat=thre_mat,
                                 reverse=reverse, tranct=tranct, max_spread=max_spread, tranct_ratio=tranct_ratio, 
                                  HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH,
                                  atr_filter=atr_filter)
        train_result = compute([delayed(f_par)(file) for file in np.array(all_dates)[train_sample]])[0]
    train_stat = get_hft_summary(train_result, thre_mat, sum(train_sample))
    with dask.config.set(scheduler='processes', num_workers=CORE_NUM):
        f_par = functools.partial(get_cross_signal_pnl, product_x=product_x, product_y=product_y, signal_name=signal_name, thre_mat=thre_mat,
                                  reverse=reverse, tranct=tranct, max_spread=max_spread, tranct_ratio=tranct_ratio, 
                                  HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH,
                                  atr_filter=atr_filter)
        test_result = compute([delayed(f_par)(file) for file in np.array(all_dates)[test_sample]])[0]
    test_stat = get_hft_summary(test_result, thre_mat, sum(test_sample))
    return OrderedDict([("train.stat", train_stat), ("test.stat", test_stat)])


In [16]:
def evaluate_cross_signal(signal, all_dates, product_x, product_y, min_pnl, min_num, HEAD_PATH, SAVE_PATH,
                    CORE_NUM, period=4096, split_str="2018", tranct=1.1e-4, 
                    max_spread=0.61, tranct_ratio=True, atr_filter=0, save_path="signal_result_atr"):
    signal_name = signal+"."+str(period)
    all_signal = load(HEAD_PATH+"\\all_signals\\"+product_x+"."+signal_name+".pkl")
    tranct = product_info[product_y]["tranct"]
    tranct_ratio = product_info[product_y]["tranct.ratio"]
    open_list = np.quantile(abs(all_signal), np.append(np.arange(0.991,0.999,0.001),np.arange(0.9991,0.9999,0.0001)))
    thre_list = []
    for cartesian in itertools.product(open_list, np.array([0.2, 0.4, 0.6, 0.8, 1.0])):
        thre_list.append((cartesian[0], -cartesian[0] * cartesian[1]))
    thre_list = np.array(thre_list)
    thre_mat = pd.DataFrame(data=OrderedDict([("open", thre_list[:, 0]), ("close", thre_list[:, 1])]))
    print("reverse=1")
    trend_signal_stat = get_cross_signal_stat(signal_name, thre_mat, product_x, product_y, all_dates, CORE_NUM, split_str=split_str,
                                              reverse=1, tranct=tranct, max_spread=max_spread, tranct_ratio=tranct_ratio, 
                                              min_pnl=min_pnl, min_num=min_num, HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH,
                                              atr_filter=atr_filter)
    print("reverse=-1")
    reverse_signal_stat = get_cross_signal_stat(signal_name, thre_mat, product_x, product_y, all_dates, CORE_NUM, split_str=split_str, 
                                                reverse=-1, tranct=tranct, 
                    max_spread=max_spread, tranct_ratio=tranct_ratio, min_pnl=min_pnl, min_num=min_num, 
                                                HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH,
                                                atr_filter=atr_filter)
    stat_result = OrderedDict([("trend.signal.stat", trend_signal_stat), ("reverse.signal.stat", reverse_signal_stat)])    
    save(stat_result, HEAD_PATH+"\\"+save_path+"\\"+product_x+"_"+product_y+"."+signal_name+".pkl")


In [40]:
%%time
result = evaluate_cross_signal("trade.imb", all_dates, "rb","hc", 2, 20, HEAD_PATH, SAVE_PATH,
                CORE_NUM, period=4096, split_str="2019", tranct=1.1e-4, 
                max_spread=1+0.1, tranct_ratio=True, atr_filter=0, 
                        save_path="signal_result_atr")

reverse=1
reverse=-1
Wall time: 3min 26s


In [17]:
result = load(HEAD_PATH+"\\signal_result_atr\\rb_hc.trade.imb.4096.pkl")

In [18]:
result["reverse.signal.stat"]["train.stat"]["final.result"]

Unnamed: 0,open,close,num,avg.pnl,total.pnl,sharpe,drawdown,max.drawdown,avg.ret,total.ret,sharpe.ret,drawdown.ret,max.drawdown.ret,mar,mar.ret
0,0.097419,-0.019484,113,1.743086,196.968695,0.389376,143.49484,269.03894,-0.319903,-36.149076,-3.251147,36.149076,36.149163,0.73212,-0.999998
1,0.097419,-0.038968,112,2.998026,335.778875,0.55344,185.15724,289.704385,-0.447524,-50.122712,-3.989345,50.122712,50.122712,1.15904,-1.0
2,0.097419,-0.058452,112,3.31041,370.76596,0.605628,202.164065,330.70701,-0.491726,-55.073346,-4.04867,55.073346,55.073346,1.121131,-1.0
3,0.097419,-0.077935,112,2.872814,321.755145,0.508155,274.46044,401.003595,-0.527347,-59.062823,-4.272493,59.062823,59.062823,0.802375,-1.0
4,0.097419,-0.097419,112,2.596023,290.75462,0.457725,286.46296,419.005485,-0.545022,-61.042498,-4.338531,61.042498,61.042498,0.693916,-1.0
5,0.098251,-0.01965,108,1.526713,164.884985,0.333971,159.239945,264.781945,-0.316055,-34.133888,-3.112215,34.133888,34.133888,0.62272,-1.0
6,0.098251,-0.039301,108,3.082084,332.865035,0.555285,167.349265,284.890845,-0.463974,-50.109232,-4.038482,50.109232,50.109232,1.168395,-1.0
7,0.098251,-0.058951,108,2.915349,314.857685,0.519462,218.35252,324.893575,-0.491409,-53.072139,-3.976882,53.072139,53.072139,0.96911,-1.0
8,0.098251,-0.078601,108,2.470805,266.846975,0.425615,281.64858,386.189845,-0.528344,-57.061098,-4.204054,57.061098,57.061098,0.690974,-1.0
9,0.098251,-0.098251,108,2.155985,232.846345,0.370054,296.651205,407.19184,-0.546683,-59.041802,-4.270842,59.041802,59.041802,0.571835,-1.0


In [43]:
result["reverse.signal.stat"]["test.stat"]["final.result"]

Unnamed: 0,open,close,num,avg.pnl,total.pnl,sharpe,drawdown,max.drawdown,avg.ret,total.ret,sharpe.ret,drawdown.ret,max.drawdown.ret,mar,mar.ret
0,0.097419,-0.019484,145,2.668736,386.966785,0.71528,0.0,225.31037,-0.23554,-34.153356,-2.39621,34.153356,35.165686,1.717483,-0.971213
1,0.097419,-0.038968,144,2.226809,320.66052,0.578894,0.0,176.044705,-0.265005,-38.160742,-2.469008,38.160742,39.171124,1.821472,-0.974206
2,0.097419,-0.058452,143,1.961136,280.442455,0.495194,0.0,213.300935,-0.301914,-43.173659,-2.664077,43.173659,44.173554,1.314774,-0.977364
3,0.097419,-0.077935,142,3.015381,428.18407,0.762984,0.0,151.66949,-0.261004,-37.062517,-2.151047,37.062517,38.062412,2.823139,-0.97373
4,0.097419,-0.097419,142,3.444955,489.183545,0.880319,0.0,138.9837,-0.281878,-40.026692,-2.249036,40.026692,41.026587,3.519719,-0.975628
5,0.098251,-0.01965,139,3.492161,485.410385,0.905454,0.0,185.51228,-0.231128,-32.126849,-2.277014,32.126849,33.139373,2.616594,-0.969446
6,0.098251,-0.039301,138,2.928307,404.106325,0.737673,0.0,187.50934,-0.261996,-36.155512,-2.382723,36.155512,37.166088,2.155126,-0.972809
7,0.098251,-0.058951,137,2.845877,389.88511,0.697577,0.0,214.511965,-0.30045,-41.161669,-2.582336,41.161669,42.161564,1.817545,-0.976284
8,0.098251,-0.078601,136,3.931081,534.62704,0.965743,0.0,137.014625,-0.265098,-36.053292,-2.133261,36.053292,37.053187,3.901971,-0.973015
9,0.098251,-0.098251,136,4.203158,571.629455,1.03928,0.0,122.254265,-0.279601,-38.025787,-2.182659,38.025787,39.025682,4.675742,-0.974379


In [19]:
%%time
for signal in all_signal_list:
    print(signal)
    result = evaluate_cross_signal(signal, all_dates, "rb","hc", 2, 20, HEAD_PATH, SAVE_PATH,
                CORE_NUM, period=4096, split_str="201901", tranct=1.1e-4, 
                max_spread=1+0.1, tranct_ratio=False, atr_filter=0.01, save_path="signal_result_atr")

trade.imb
reverse=1
reverse=-1
total.trade.imb
reverse=1
reverse=-1
nr
reverse=1
reverse=-1
dbook
reverse=1
reverse=-1
range.pos
reverse=1
reverse=-1
price.osci
reverse=1
reverse=-1
ma.dif.10
reverse=1
reverse=-1
kdj.k
reverse=1
reverse=-1
kdj.j
reverse=1
reverse=-1
trade.imb.range
reverse=1
reverse=-1
total.trade.imb.range
reverse=1
reverse=-1
nr.range
reverse=1
reverse=-1
dbook.range
reverse=1
reverse=-1
range.pos.range
reverse=1
reverse=-1
price.osci.range
reverse=1
reverse=-1
ma.dif.10.range
reverse=1
reverse=-1
kdj.k.range
reverse=1
reverse=-1
kdj.j.range
reverse=1
reverse=-1
trade.imb.std
reverse=1
reverse=-1
total.trade.imb.std
reverse=1
reverse=-1
nr.std
reverse=1
reverse=-1
dbook.std
reverse=1
reverse=-1
range.pos.std
reverse=1
reverse=-1
price.osci.std
reverse=1
reverse=-1
ma.dif.10.std
reverse=1
reverse=-1
kdj.k.std
reverse=1
reverse=-1
kdj.j.std
reverse=1
reverse=-1
trade.imb.volume.open.ratio
reverse=1
reverse=-1
total.trade.imb.volume.open.ratio
reverse=1
reverse=-1
nr.vo

In [20]:
for signal in all_signal_list:
    result = load(HEAD_PATH+"\\signal_result_atr\\rb_hc."+signal+".4096.pkl")
    train_stat =result["trend.signal.stat"]["train.stat"]
    good_strat = (train_stat["final.result"]["avg.pnl"]>1) & (train_stat["final.result"]["num"]>0)
    if sum(good_strat)>1:
        train_pnl = train_stat["daily.pnl"].loc[:, good_strat].sum(axis=1)/sum(good_strat)
        test_stat =result["trend.signal.stat"]["test.stat"]
        test_pnl = test_stat["daily.pnl"].loc[:, good_strat].sum(axis=1)/sum(good_strat)
        print(signal, sharpe(train_pnl), sharpe(test_pnl))

trade.imb 0.7164823951691123 -0.7620355865737675
total.trade.imb 0.3223521525000639 0.0063376079432512725
ma.dif.10 0.4660200871546677 -0.14545040847331953
kdj.k 0.9166196812988318 -0.007513485491348998
kdj.j 1.199085421807297 -0.2139819480779584
total.trade.imb.std 0.44523330792068466 0.476651268337135
nr.std 0.5205771861997849 0.1850335834366045
range.pos.std 0.2691576566760325 0.30994580196461446
ma.dif.10.std 0.30762352631690854 0.03402093020547046
kdj.k.std 0.7179877357588137 0.5018662707054219
kdj.j.std 0.7086218886606209 0.5615174020439552
trade.imb.volume.open.ratio 0.7164823951691123 -0.9266594535193153
total.trade.imb.volume.open.ratio 0.8227127866971728 -0.5958869097307294
nr.volume.open.ratio 0.38450909307794595 -0.2947951953442086
range.pos.volume.open.ratio 0.6120782793784493 -0.6056343376947994
ma.dif.10.volume.open.ratio 0.2536195059709148 -0.20135553645420534
kdj.k.volume.open.ratio 0.4552681952893692 0.48915623748489295
kdj.j.volume.open.ratio 0.4045438489585599 -0.09

In [21]:
def par_get_arb_all_signal(signal_name, file_list, product_x, product_y, period, HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH):
    n_files = len(file_list)
    all_signal = np.array([])
    for file in file_list:
        S_x = load(SAVE_PATH+"\\factor_pkl\\"+product_x+"\\"+signal_name+"\\"+file)
        S_y = load(SAVE_PATH+"\\factor_pkl\\"+product_y+"\\"+signal_name+"\\"+file)
        [time_x, time_y] = load(HEAD_PATH+"\\comb_time\\"+product_x+"_"+product_y+"\\"+file)
        signal = S_x[time_x] - S_y[time_y]
        chosen = (np.arange(len(signal))+1) % period == 0
        all_signal = np.concatenate((all_signal, signal[chosen]), axis=0)
    save(all_signal, HEAD_PATH+"\\all_signals\\"+product_x+"_"+product_y+"."+signal_name+".pkl")


In [22]:
%%time
result = par_get_arb_all_signal("trade.imb.4096",  file_list=all_dates, product_x="rb", 
                   product_y = "hc", period=4096, HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH)

Wall time: 17.8 s


In [None]:
#%%time
#result = parLapply(CORE_NUM, "total.imb.4096", par_get_arb_all_signal, file_list=all_dates, product_x="rb", 
#                   product_y = "hc", period=4096, HEAD_PATH=HEAD_PATH)

In [23]:
from collections import OrderedDict
def get_arb_signal_pnl(file, product_x, product_y, signal_name, thre_mat, reverse=1, tranct=1.1e-4, max_spread_x=1.1,
                       max_spread_y=1.1,
                       tranct_ratio=True, HEAD_PATH=HEAD_PATH, SAVE_PATH = SAVE_PATH, rebate=0):
    ## load data
    data_x = load(HEAD_PATH+"\\pkl_tick\\"+product_x+"\\"+file)
    data_y = load(HEAD_PATH+"\\pkl_tick\\"+product_y+"\\"+file)
    [time_x, time_y] = load(HEAD_PATH+"\\comb_time\\"+product_x+"_"+product_y+"\\"+file)
    S_x = load(SAVE_PATH+"\\factor_pkl\\"+product_x+"\\"+signal_name+"\\"+file)
    S_y = load(SAVE_PATH+"\\factor_pkl\\"+product_y+"\\"+signal_name+"\\"+file)
    S = S_x[time_x] - S_y[time_y]                  
    pred = S*reverse
    data_y = data_y[time_y].reset_index(drop=True)
    data_x = data_x[time_x].reset_index(drop=True)
    good_x = data_x["good"]
    good_y = data_y["good"]
    pred = pred[good_x & good_y]
    data_x = data_x[good_x & good_y].reset_index(drop=True)    
    data_y = data_y[good_x & good_y].reset_index(drop=True)
    result = pd.DataFrame(data=OrderedDict([("open", thre_mat["open"].values), ("close", thre_mat["close"].values),
                               ("num", 0), ("avg.pnl", 0), ("pnl", 0), ("avg.ret", 0), ("ret", 0)]), 
                          index=thre_mat.index)
    count = 0
    cur_spread_x = data_x["ask"]-data_x["bid"]
    cur_spread_y = data_y["ask"]-data_y["bid"]
    for thre in thre_mat.iterrows():
        count = count+1
        buy = pred>thre[1]["open"]
        sell = pred<-thre[1]["open"]
        signal = pd.Series(data=0, index=data_x.index)
        position = signal.copy()
        signal[buy] = 1
        signal[sell] = -1
        scratch = -thre[1]["close"]
        position_pos = pd.Series(data=np.nan, index=data_x.index)
        position_pos.iloc[0] = 0
        position_pos[(signal==1) & (data_x["next.ask"]>0) & (data_x["next.bid"]>0) & (cur_spread_x<max_spread_x) &
                    (data_y["next.ask"]>0) & (data_y["next.bid"]>0) & (cur_spread_y<max_spread_y)] = 1
        position_pos[(pred< -scratch) & (data_x["next.bid"]>0) & (data_y["next.ask"]>0) & (cur_spread_x<max_spread_x) &
                    (cur_spread_y<max_spread_y)] = 0
        position_pos.ffill(inplace=True)
        pre_pos = position_pos.shift(1)
        notional_position_pos_x = pd.Series(data=0, index=data_x.index)
        notional_position_pos_y = pd.Series(data=0, index=data_y.index)
        notional_position_pos_x[position_pos==1] = 1
        notional_position_pos_x[(position_pos==1) & (pre_pos==1)] = np.nan
        notional_position_pos_y[position_pos==1] = 1
        notional_position_pos_y[(position_pos==1) & (pre_pos==1)] = np.nan
        notional_position_pos_x[(notional_position_pos_x==1)] = 1/data_x["next.ask"][(notional_position_pos_x==1)]
        notional_position_pos_y[(notional_position_pos_y==1)] = -1/data_y["next.bid"][(notional_position_pos_y==1)]
        notional_position_pos_x.ffill(inplace=True)
        notional_position_pos_y.ffill(inplace=True)
        position_neg = pd.Series(data=np.nan, index=data_x.index)
        position_neg.iloc[0] = 0
        position_neg[(signal==-1) & (data_x["next.ask"]>0) & (data_x["next.bid"]>0) & (cur_spread_x<max_spread_x) &
                    (data_y["next.ask"]>0) & (data_y["next.bid"]>0) & (cur_spread_y<max_spread_y)] = -1
        position_neg[(pred> scratch) & (data_x["next.ask"]>0) & (data_y["next.bid"]>0) & (cur_spread_y<max_spread_y)] = 0
        position_neg.ffill(inplace=True)
        pre_neg = position_neg.shift(1)
        notional_position_neg_x = pd.Series(data=0, index=data_x.index)
        notional_position_neg_y = pd.Series(data=0, index=data_y.index)
        notional_position_neg_x[position_neg==-1] = -1
        notional_position_neg_x[(position_neg==-1) & (pre_neg==-1)] = np.nan
        notional_position_neg_y[position_neg==-1] = -1
        notional_position_neg_y[(position_neg==-1) & (pre_neg==-1)] = np.nan
        notional_position_neg_x[(notional_position_neg_x==-1)] = -1/data_x["next.bid"][(notional_position_neg_x==-1)]
        notional_position_neg_y[(notional_position_neg_y==-1)] = 1/data_y["next.ask"][(notional_position_neg_y==-1)]
        notional_position_neg_x.ffill(inplace=True)
        notional_position_neg_y.ffill(inplace=True)
        position = position_pos + position_neg
        notional_position_x = notional_position_pos_x+notional_position_neg_x
        notional_position_y = notional_position_pos_y+notional_position_neg_y
        #position[n_bar-1] = 0
        position.iloc[0] = 0
        position.iloc[-10:] = 0
        notional_position_x.iloc[0] = 0
        notional_position_y.iloc[0] = 0
        notional_position_x.iloc[-10:] = 0
        notional_position_y.iloc[-10:] = 0
        notional_change_pos_x = notional_position_x-notional_position_x.shift(1)
        notional_change_pos_y = notional_position_y-notional_position_y.shift(1)
        notional_change_pos_x.iloc[0] = 0
        notional_change_pos_y.iloc[0] = 0
        change_pos = position - position.shift(1)
        change_pos.iloc[0] = 0
        change_base_x = pd.Series(data=0, index=data_x.index)
        change_base_y = pd.Series(data=0, index=data_y.index)

        change_buy = change_pos>0
        change_sell = change_pos<0        
        if (tranct_ratio):
            change_base_x[change_buy] = data_x["next.ask"][change_buy]*(1+tranct)
            change_base_x[change_sell] = data_x["next.bid"][change_sell]*(1-tranct)
            change_base_y[change_buy] = data_y["next.bid"][change_buy]*(1-tranct)
            change_base_y[change_sell] = data_y["next.ask"][change_sell]*(1+tranct)
        else:
            change_base_x[change_buy] = data_x["next.ask"][change_buy]+tranct
            change_base_x[change_sell] = data_x["next.bid"][change_sell]-tranct
            change_base_y[change_buy] = data_y["next.bid"][change_buy]-tranct
            change_base_y[change_sell] = data_y["next.ask"][change_sell]+tranct

        final_pnl = -sum(change_base_x*change_pos)+sum(change_base_y*change_pos)
        ret = -sum(change_base_x*notional_change_pos_x)-sum(change_base_y*notional_change_pos_y)
        num = sum((position!=0) & (change_pos!=0))
        if num == 0:
            result.loc[thre[0], ("num", "avg.pnl", "pnl", "avg.ret", "ret")] = (0,0,0,0,0)
            return result
        else:
            avg_pnl = np.divide(final_pnl, num)
            avg_ret = np.divide(ret,num)
            result.loc[thre[0], ("num", "avg.pnl", "pnl", "avg.ret", "ret")] = (num, avg_pnl, final_pnl, avg_ret,ret)
    return result


In [24]:
signal_name = "trade.imb.4096" 
all_signal = load(HEAD_PATH+"\\all_signals\\"+product_x+"_"+product_y+"."+signal_name+".pkl")
thre_list = []
open_list = np.quantile(abs(all_signal), np.append(np.arange(0.991,0.999,0.001),np.arange(0.9991,0.9999,0.0001)))
for cartesian in itertools.product(open_list, np.array([0.2, 0.4, 0.6, 0.8, 1.0])):
        thre_list.append((cartesian[0], -cartesian[0] * cartesian[1]))
thre_list = np.array(thre_list)
thre_mat = pd.DataFrame(data=OrderedDict([("open", thre_list[:, 0]), ("close", thre_list[:, 1])]))

In [27]:
%%time
train_result = parLapply(CORE_NUM, np.array(all_dates)[train_sample], get_arb_signal_pnl, product_x="rb", product_y="hc", signal_name="trade.imb.4096",
                  thre_mat=thre_mat, reverse=-1)
test_result = parLapply(CORE_NUM, np.array(all_dates)[test_sample], get_arb_signal_pnl, product_x="rb", product_y="hc", signal_name="trade.imb.4096",
                  thre_mat=thre_mat, reverse=-1)

Wall time: 1min 14s


In [28]:
train_stat = get_hft_summary(train_result, thre_mat,sum(train_sample))
good_strat = (train_stat["final.result"]["avg.ret"]>0.000) & (train_stat["final.result"]["num"]>0)
sum(good_strat)

79

In [29]:
test_stat = get_hft_summary(test_result, thre_mat,sum(train_sample))
good_strat = (test_stat["final.result"]["avg.ret"]>0.000) & (test_stat["final.result"]["num"]>0)
sum(good_strat)

5

In [30]:
%%time
result = parLapply(CORE_NUM, all_period_signal, par_get_arb_all_signal, file_list=all_dates, product_x="rb", product_y="hc",
          period=4096, HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH)

Wall time: 2min 19s


In [31]:
from collections import OrderedDict
def get_arb_signal_stat(signal_name, thre_mat, product_x, product_y, all_dates, CORE_NUM, split_str="2018", reverse=1, tranct=1.1e-4, 
                    max_spread_x=1.1, max_spread_y=1.1, tranct_ratio=True, HEAD_PATH=HEAD_PATH, SAVE_PATH = SAVE_PATH,min_pnl=2, min_num=20):
    train_sample = all_dates<split_str
    test_sample = all_dates>split_str
    with dask.config.set(scheduler='processes', num_workers=CORE_NUM):
        f_par = functools.partial(get_arb_signal_pnl, product_x=product_x, product_y=product_y, signal_name=signal_name, thre_mat=thre_mat,
                                 reverse=reverse, tranct=tranct, max_spread_x=max_spread_x, max_spread_y=max_spread_y,
                                  tranct_ratio=tranct_ratio,HEAD_PATH=HEAD_PATH, SAVE_PATH = SAVE_PATH)
        train_result = compute([delayed(f_par)(file) for file in all_dates[train_sample]])[0]
    train_stat = get_hft_summary(train_result, thre_mat, sum(train_sample))
    with dask.config.set(scheduler='processes', num_workers=CORE_NUM):
        f_par = functools.partial(get_arb_signal_pnl, product_x=product_x, product_y=product_y, signal_name=signal_name, thre_mat=thre_mat,
                                 reverse=reverse, tranct=tranct, max_spread_x=max_spread_x, max_spread_y=max_spread_y,
                                  tranct_ratio=tranct_ratio,HEAD_PATH=HEAD_PATH, SAVE_PATH = SAVE_PATH)
        test_result = compute([delayed(f_par)(file) for file in all_dates[test_sample]])[0]
    test_stat = get_hft_summary(test_result, thre_mat, sum(test_sample))
    return OrderedDict([("train.stat", train_stat), ("test.stat", test_stat)])


In [34]:
def evaluate_arb_signal(signal, all_dates, product_x, product_y, min_pnl, min_num, HEAD_PATH, SAVE_PATH,
                    CORE_NUM, period=4096, split_str="2018", tranct=1.1e-4, 
                    max_spread_x=1.1, max_spread_y=1.1, tranct_ratio=True, save_path="signal_result_atr"):
    signal_name = signal+"."+str(period)
    tranct_x = product_info[product_x]["tranct"]
    tranct_y = product_info[product_y]["tranct"]
    tranct_ratio = product_info[product_y]["tranct.ratio"]
    all_signal = load(HEAD_PATH+"\\all_signals\\"+product_x+"_"+product_y+"."+signal_name+".pkl")
    thre_list = []
    open_list = np.quantile(abs(all_signal), np.append(np.arange(0.991,0.999,0.001),np.arange(0.9991,0.9999,0.0001)))
    for cartesian in itertools.product(open_list, np.array([0.2, 0.4, 0.6, 0.8, 1.0])):
            thre_list.append((cartesian[0], -cartesian[0] * cartesian[1]))
    thre_list = np.array(thre_list)
    thre_mat = pd.DataFrame(data=OrderedDict([("open", thre_list[:, 0]), ("close", thre_list[:, 1])]))
    print("reverse=1")
    trend_signal_stat = get_arb_signal_stat(signal_name, thre_mat, product_x, product_y, all_dates, CORE_NUM, split_str=split_str, 
                                            reverse=1, tranct=tranct, max_spread_x=max_spread_x, max_spread_y=max_spread_y, 
                                            tranct_ratio=tranct_ratio, HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH,
                                            min_pnl=min_pnl, min_num=min_num)
    print("reverse=-1")
    reverse_signal_stat = get_arb_signal_stat(signal_name, thre_mat, product_x, product_y, all_dates, CORE_NUM, split_str=split_str, 
                                            reverse=-1, tranct=tranct, max_spread_x=max_spread_x, max_spread_y=max_spread_y, 
                                            tranct_ratio=tranct_ratio, HEAD_PATH=HEAD_PATH, SAVE_PATH=SAVE_PATH,
                                               min_pnl=min_pnl, min_num=min_num)
    stat_result = OrderedDict([("trend.signal.stat", trend_signal_stat), ("reverse.signal.stat", reverse_signal_stat)])    
    save(stat_result, HEAD_PATH+"\\"+save_path+"\\"+product_x+"_"+product_y+"."+signal_name+".arb.pkl")


In [47]:
all_signal_list

array(['trade.imb', 'total.trade.imb', 'nr', 'dbook', 'range.pos',
       'price.osci', 'ma.dif.10', 'kdj.k', 'kdj.j', 'trade.imb.range',
       'total.trade.imb.range', 'nr.range', 'dbook.range',
       'range.pos.range', 'price.osci.range', 'ma.dif.10.range',
       'kdj.k.range', 'kdj.j.range', 'trade.imb.std',
       'total.trade.imb.std', 'nr.std', 'dbook.std', 'range.pos.std',
       'price.osci.std', 'ma.dif.10.std', 'kdj.k.std', 'kdj.j.std',
       'trade.imb.volume.open.ratio', 'total.trade.imb.volume.open.ratio',
       'nr.volume.open.ratio', 'dbook.volume.open.ratio',
       'range.pos.volume.open.ratio', 'price.osci.volume.open.ratio',
       'ma.dif.10.volume.open.ratio', 'kdj.k.volume.open.ratio',
       'kdj.j.volume.open.ratio', 'trade.imb.trend.index',
       'total.trade.imb.trend.index', 'nr.trend.index',
       'dbook.trend.index', 'range.pos.trend.index',
       'price.osci.trend.index', 'ma.dif.10.trend.index',
       'kdj.k.trend.index', 'kdj.j.trend.index'], dt

In [46]:
%%time
for signal in all_signal_list[:18]:
    print(signal)
    evaluate_arb_signal(signal, np.array(all_dates), product_x="rb", product_y="hc", min_pnl=2, min_num=20, HEAD_PATH=HEAD_PATH,SAVE_PATH=SAVE_PATH, 
                    CORE_NUM=CORE_NUM, period=4096, split_str="2019", tranct=1.1e-4, 
                    max_spread_x=1.1, max_spread_y=1.1, tranct_ratio=True, save_path="signal_result_atr")

trade.imb
reverse=1
reverse=-1
total.trade.imb
reverse=1
reverse=-1
nr
reverse=1
reverse=-1
dbook
reverse=1
reverse=-1
range.pos
reverse=1
reverse=-1
price.osci
reverse=1
reverse=-1
ma.dif.10
reverse=1
reverse=-1
kdj.k
reverse=1
reverse=-1
kdj.j
reverse=1
reverse=-1
trade.imb.range
reverse=1
reverse=-1
total.trade.imb.range
reverse=1
reverse=-1
nr.range
reverse=1
reverse=-1
dbook.range
reverse=1
reverse=-1
range.pos.range
reverse=1
reverse=-1
price.osci.range
reverse=1
reverse=-1
ma.dif.10.range
reverse=1
reverse=-1
kdj.k.range
reverse=1
reverse=-1
kdj.j.range
reverse=1
reverse=-1
Wall time: 45min


In [37]:
%%time
for signal in all_signal_list:
    print(signal)
    evaluate_arb_signal(signal, np.array(all_dates), product_x="rb", product_y="hc", min_pnl=2, min_num=20, HEAD_PATH=HEAD_PATH,SAVE_PATH=SAVE_PATH, 
                    CORE_NUM=CORE_NUM, period=4096, split_str="2019", tranct=1.1e-4, 
                    max_spread_x=1.1, max_spread_y=1.1, tranct_ratio=True, save_path="signal_result_atr")

trade.imb.std
reverse=1
reverse=-1
total.trade.imb.std
reverse=1
reverse=-1
nr.std
reverse=1
reverse=-1
dbook.std
reverse=1
reverse=-1
range.pos.std
reverse=1
reverse=-1
price.osci.std
reverse=1
reverse=-1
ma.dif.10.std
reverse=1
reverse=-1
kdj.k.std
reverse=1
reverse=-1
kdj.j.std
reverse=1
reverse=-1
trade.imb.volume.open.ratio
reverse=1
reverse=-1
total.trade.imb.volume.open.ratio
reverse=1
reverse=-1
nr.volume.open.ratio
reverse=1
reverse=-1
dbook.volume.open.ratio
reverse=1
reverse=-1
range.pos.volume.open.ratio
reverse=1
reverse=-1
price.osci.volume.open.ratio
reverse=1
reverse=-1
ma.dif.10.volume.open.ratio
reverse=1
reverse=-1
kdj.k.volume.open.ratio
reverse=1
reverse=-1
kdj.j.volume.open.ratio
reverse=1
reverse=-1
trade.imb.trend.index
reverse=1
reverse=-1
total.trade.imb.trend.index
reverse=1
reverse=-1
nr.trend.index
reverse=1
reverse=-1
dbook.trend.index
reverse=1
reverse=-1
range.pos.trend.index
reverse=1
reverse=-1
price.osci.trend.index
reverse=1
reverse=-1
ma.dif.10.tren

In [38]:
all_arb_signal = [x+".arb" for x in all_period_signal]
all_arb_signal

['trade.imb.4096.arb',
 'total.trade.imb.4096.arb',
 'nr.4096.arb',
 'dbook.4096.arb',
 'range.pos.4096.arb',
 'price.osci.4096.arb',
 'ma.dif.10.4096.arb',
 'kdj.k.4096.arb',
 'kdj.j.4096.arb',
 'trade.imb.range.4096.arb',
 'total.trade.imb.range.4096.arb',
 'nr.range.4096.arb',
 'dbook.range.4096.arb',
 'range.pos.range.4096.arb',
 'price.osci.range.4096.arb',
 'ma.dif.10.range.4096.arb',
 'kdj.k.range.4096.arb',
 'kdj.j.range.4096.arb',
 'trade.imb.std.4096.arb',
 'total.trade.imb.std.4096.arb',
 'nr.std.4096.arb',
 'dbook.std.4096.arb',
 'range.pos.std.4096.arb',
 'price.osci.std.4096.arb',
 'ma.dif.10.std.4096.arb',
 'kdj.k.std.4096.arb',
 'kdj.j.std.4096.arb',
 'trade.imb.volume.open.ratio.4096.arb',
 'total.trade.imb.volume.open.ratio.4096.arb',
 'nr.volume.open.ratio.4096.arb',
 'dbook.volume.open.ratio.4096.arb',
 'range.pos.volume.open.ratio.4096.arb',
 'price.osci.volume.open.ratio.4096.arb',
 'ma.dif.10.volume.open.ratio.4096.arb',
 'kdj.k.volume.open.ratio.4096.arb',
 'kdj

In [48]:
def get_arb_signal_performance_result(all_arb_signal, signal_dire, period, product_x, product_y, train_sample, test_sample, min_pnl=2,
                                     min_num=10):
    trend_signal_result = pd.DataFrame(data=OrderedDict([("signal", all_arb_signal), ("reverse",1),
                               ("trainSharpe", 0), ("testSharpe", 0)]))
    reverse_signal_result = pd.DataFrame(data=OrderedDict([("signal", all_arb_signal), ("reverse",-1),
                               ("trainSharpe", 0), ("testSharpe", 0)]))
    n_signal = len(all_arb_signal)
    for k in range(n_signal):
        test_all_pnl = np.zeros(sum(test_sample))
        train_all_pnl = np.zeros(sum(train_sample))
        signal_name = all_arb_signal[k]
        stat_result = load(HEAD_PATH+"\\" + signal_dire +"\\"+product_x+"_"+product_y+"."+signal_name+".pkl")
        trend_signal_stat = stat_result['trend.signal.stat']
        spread_x = product_info[product_x]["spread"]
        spread_y = product_info[product_y]["spread"]
        train_stat = trend_signal_stat["train.stat"]
        test_stat = trend_signal_stat["test.stat"]
        good_strat = (train_stat["final.result"]["avg.pnl"]>min_pnl*(spread_x+spread_y)) & (train_stat["final.result"]["num"]>min_num)
        if sum(good_strat)>2:
            train_pnl = train_stat["daily.pnl"].loc[:, good_strat].sum(axis=1)/sum(good_strat)
            test_pnl = test_stat["daily.pnl"].loc[:, good_strat].sum(axis=1)/sum(good_strat)
            all_portfolio = np.append(train_pnl, test_pnl)
            trend_signal_result.loc[k, ("signal", "trainSharpe", "testSharpe")] = (signal_name, sharpe(train_pnl),sharpe(test_pnl))
        test_all_pnl = np.zeros([sum(test_sample), len(product_list)])
        train_all_pnl = np.zeros([sum(train_sample), len(product_list)])
        stat_result = load(HEAD_PATH + "\\" + signal_dire +"\\"+product_x+"_"+product_y+"."+signal_name+".pkl")
        reverse_signal_stat = stat_result['reverse.signal.stat']
        train_stat = reverse_signal_stat["train.stat"]
        test_stat = reverse_signal_stat["test.stat"]
        good_strat = (train_stat["final.result"]["avg.pnl"]>min_pnl*(spread_x+spread_y)) & (train_stat["final.result"]["num"]>min_num)
        if sum(good_strat)>2:
            train_pnl = train_stat["daily.pnl"].loc[:, good_strat].sum(axis=1)/sum(good_strat)
            test_pnl = test_stat["daily.pnl"].loc[:, good_strat].sum(axis=1)/sum(good_strat)
            all_portfolio = np.append(train_pnl, test_pnl)
            reverse_signal_result.loc[k, ("signal", "trainSharpe", "testSharpe")] = (signal_name, sharpe(train_pnl),sharpe(test_pnl))
    return OrderedDict([("trend.signal.stat", trend_signal_result), 
                        ("reverse.signal.stat", reverse_signal_result)])


In [49]:
result_arb = get_arb_signal_performance_result(all_arb_signal, "signal_result_atr", 4906, "rb","hc", train_sample, test_sample, 0, 10)

In [50]:
reverse_arb = result_arb["reverse.signal.stat"]
good= (reverse_arb["trainSharpe"]>0) & (reverse_arb["testSharpe"]>0)
reverse_arb[good]

Unnamed: 0,signal,reverse,trainSharpe,testSharpe
3,dbook.4096.arb,-1,0.518194,0.929844
6,ma.dif.10.4096.arb,-1,0.352109,0.845634


In [51]:
trend_arb = result_arb["trend.signal.stat"]
good= (trend_arb["trainSharpe"]>0) & (trend_arb["testSharpe"]>0)
trend_arb[good]

Unnamed: 0,signal,reverse,trainSharpe,testSharpe
21,dbook.std.4096.arb,1,0.368667,0.123619
