In [121]:
import os
import glob
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('max_rows', 100)
pd.set_option('max_columns', 100)

perc = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]

In [122]:
startDate = '20200918'
endDate = '20201013'


readPath = r'\\192.168.10.30\Kevin_zhenyu\orderLog\equityTradeLogs'
dataPathLs = np.array(glob.glob(os.path.join(readPath, 'speedCompare***.csv')))
dateLs = np.array([os.path.basename(i).split('_')[1].split('.')[0] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
rawOrderLog = []
for thisDate, thisPath in zip(dateLs, dataPathLs):
    data = pd.read_csv(thisPath)
    data = data.rename(columns={'mdClockAtArrival': 'caamd'})
    rawOrderLog += [data]
rawOrderLog = pd.concat(rawOrderLog, sort=False)

for col in ['clockAtArrival', 'caamd', 'secid', 'updateType', 'vai', 'absFilledThisUpdate', 'orderDirection', 'absOrderSize',
            'absOrderSizeCumFilled', 'date', 'accCode', 'mse']:
    rawOrderLog[col] = rawOrderLog[col].astype('int64')   
rawOrderLog = rawOrderLog.sort_values(by=['date', 'secid', 'vai', 'accCode', 'clockAtArrival']).reset_index(drop=True)

rawOrderLog = rawOrderLog[rawOrderLog["secid"] >= 1000000]

rawOrderLog['clock'] = rawOrderLog['clockAtArrival'].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
rawOrderLog['broker'] = rawOrderLog['accCode'] // 100
rawOrderLog["broker"] = np.where(rawOrderLog["accCode"].astype(str).apply(lambda x: len(x) == 6), rawOrderLog['accCode'] // 10000, rawOrderLog["broker"])
rawOrderLog['colo_broker'] = rawOrderLog['colo'].str[:2] + '_' + rawOrderLog['broker'].astype('str')
rawOrderLog['order'] = rawOrderLog.groupby(['date', 'accCode', 'secid', 'vai']).grouper.group_info[0]
rawOrderLog['group'] = rawOrderLog.groupby(['date', 'secid', 'vai']).grouper.group_info[0]
rawOrderLog['startClock'] = rawOrderLog.groupby(['order'])['clockAtArrival'].transform('first')
rawOrderLog['duration'] = rawOrderLog['clockAtArrival'] - rawOrderLog['startClock']
rawOrderLog['orderPrice'] = rawOrderLog['orderPrice'].apply(lambda x: round(x, 2))
rawOrderLog['tradePrice'] = rawOrderLog['tradePrice'].apply(lambda x: round(x, 2))
rawOrderLog['orderDirection1'] = np.where(rawOrderLog["orderDirection"] == -2, -1, np.where(
    rawOrderLog["orderDirection"] == 2, 1, rawOrderLog["orderDirection"]))
orderLog = rawOrderLog.copy()

### Assertion 1:  make sure same direction in same date, secid, vai
print('=======================================================================================')
print('1. same date, secid, vai: same direction')
orderLog['directNum'] = orderLog.groupby(['date', 'secid', 'vai'])['orderDirection1'].transform('nunique')
if len(orderLog[orderLog['directNum'] != 1]) > 0:
    print('opposite direction for same date, same secid, same vai')
    display(orderLog[(orderLog['directNum'] != 1) & (orderLog['updateType'] == 0)][['date', 'accCode', 'secid', 'vai', 'orderDirection', 'order']])
    orderLog = orderLog[orderLog['directNum'] == 1]

assert((orderLog.groupby(['date', 'secid', 'vai'])['orderDirection1'].nunique() == 1).all() == True)

## Assertion 2:  make sure each account, secid, vai only has one insertion
print('=======================================================================================')
print('2. same date, secid, vai, accCode: one insertion')
a = orderLog[orderLog['updateType'] == 0].groupby(['date', 'accCode', 'secid', 'vai', 'order'])['clockAtArrival'].count()
if len(a[a > 1]) > 0:
    print('more than one insertion at same time')
    a = a[a>1].reset_index()
    display(a)
    orderLog = orderLog[~(orderLog['order'].isin(a['order'].unique()))]

orderLog['isMsg'] = np.where(orderLog['updateType'] == 0, 
                             np.where(orderLog['mse'] == 100, 1, 0), np.nan)
orderLog['isMsg'] = orderLog.groupby(['order'])['isMsg'].ffill()

placeSZE = orderLog[(orderLog['secid'] >= 2000000) & (orderLog['updateType'] == 0)]
print('%.2f%% SZE orders triggered by msg data'%(placeSZE[placeSZE['isMsg'] == 1].shape[0]/placeSZE.shape[0]*100))


### Assertion 3:  check IPO stocks selling status
print('=======================================================================================')
print('3. IPO stocks selling (ars = 301, 302)')
if orderLog[orderLog['ars'].isin([301, 302])].shape[0] != 0:
    kk = orderLog[orderLog['ars'].isin([301, 302])]
    print(kk)
    try:
        assert(kk[kk['orderDirection1'] == 1].shape[0] == 0)
        print('we only sell, never buy')
    except:
        print('There are IPO buy side orders!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print(kk[kk['orderDirection1'] == 1])
    kk1 = kk[kk['updateType'] == 0]
    kk1 = kk1.sort_values(by=['accCode', 'secid','clockAtArrival'])
    kk1['diff'] = kk1.groupby(['accCode', 'secid'])['clockAtArrival'].apply(lambda x: x-x.shift(1))
    kk1['diff'] = kk1['diff'].fillna(0)
    try:
        assert(kk1[kk1['diff'] < 10e6].shape[0] == 0)
        print('for each stock in the same account, there is no insertion within 10 seconds of the previous insertion')
    except:
        print('There are insertion within 10 seconds for orders under same account same stock!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print(kk1[kk1['diff'] < 10e6])
    kk2 = kk[(kk['updateType'] == 1)]
    try:
        assert(kk2[kk2['duration'] < 3e6].shape[0] == 0)
        print('for each stock in the same account, the cancellation of an order happens more than 3 seconds after the insertion')
    except:
        print('There are cancellation within 3 seconds for orders under same account same stock!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print(kk2[kk2['duration'] < 3e6])


### Assertion 4: check updateType == 7 orders, make sure updateType == 7 orders < 20 per account, < 100 in total
print('=======================================================================================')
print('4. updateType 7 orders')
if orderLog[orderLog['updateType'] == 7].shape[0] != 0:
    assert(orderLog[orderLog['updateType'] == 7].groupby('accCode')['order'].nunique().max() < 20)
    assert(orderLog[orderLog['updateType'] == 7].groupby('accCode')['order'].nunique().sum() < 100)

### Assertion 5: check updateType == 6 orders, make sure updateType == 6 orders < 5% per account
print('=======================================================================================')
print('5. updateType 6 orders')
k1 = orderLog[orderLog['updateType'] == 6].groupby('accCode')['order'].nunique().reset_index()
k2 = orderLog.groupby('accCode')['order'].nunique().reset_index()
k = pd.merge(k1, k2, on='accCode', how='left')
k['prob'] = k['order_x']/k['order_y']
try:
    assert(sum(k['prob'] >= 0.05) == 0)
except:
    print('There are accounts with more than 5% updateType 6 orders!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
    print(k[k['prob'] >= 0.05])

### Assertion 6: check CYB orders, make sure CYB stocks total absOrderSize < 30w
print('=======================================================================================')
print('6. CYB stocks total order size < 30w')
try:
    assert(orderLog[(orderLog['secid'] >= 2300000) & (orderLog['updateType'] == 0)]['absOrderSize'].max() <= 300000)
except:
    print('CYB stocks total absOrderSize >= 30w!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
    
     
### Assertion 7:  make sure there is no unexpected updateType 
print('=======================================================================================')
print('7. unexpected updateType')
def getTuple(x):
    return tuple(i for i in x)

checkLog = orderLog[~((orderLog['updateType'] == 4) & (orderLog.groupby(['order'])['updateType'].shift(-1) == 4))]
checkLog = checkLog.groupby(['order'])['updateType'].apply(lambda x: getTuple(x)).reset_index()
checkLog['status'] = np.where(checkLog['updateType'].isin([(0, 2, 4), (0, 2, 1, 4), (0, 2, 1, 2, 4), (0, 2, 4, 1, 4), (0, 4), (0, 1, 4), (0, 4, 1, 4), (0, 2, 2, 4), (0, 4, 2, 4), (0, 2, 2, 1, 4), (0, 2, 2, 4, 1, 4)]),0,
                     np.where(checkLog['updateType'].isin([(0, 2, 4, 1, 3), (0, 2, 4, 1, 4, 3), (0, 2, 1, 4, 3), (0, 4, 1, 3), (0, 1, 4, 3),
                                                               (0, 2, 2, 4, 1, 3), (0, 2, 2, 4, 1, 4, 3), (0, 2, 2, 1, 4, 3), (0, 4, 2, 4, 1, 3),
                                                               (0, 4, 2, 1, 3), (0, 4, 1, 4, 3), (0, 4, 1)]), 1,
                     np.where(checkLog['updateType'].isin([(0, 2, 1, 3), (0, 2, 2, 1, 3), (0, 2, 3), (0, 3), (0, 1, 3), (0, ), (0, 2), (0, 2, 1), (0, 2, 2)]), 2, 3)))

orderLog = pd.merge(orderLog, checkLog[['order', 'status']], how='left', on=['order'], validate='many_to_one')
orderLog = orderLog[orderLog['status'].isin([0, 1, 2])].reset_index(drop=True)

### Assertion 8:  make sure status==0 got all traded
print('=======================================================================================')
print('8. status == 0: all traded')
a = orderLog[orderLog['status'] == 0]
a = a.groupby(['order'])[['absOrderSizeCumFilled', 'absOrderSize']].max().reset_index()
a.columns = ['order', 'filled', 'total']
print('in total trade, any fill != total cases')
display(a[a['filled'] != a['total']])
if a[a['filled'] != a['total']].shape[0] > 0:
    removeOrderLs = a[a['filled'] != a['total']]['order'].unique()
    orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]
    
### Assertion 9:  make sure status==1 got partial traded
print('=======================================================================================')
print('9. status == 1: partial traded')
a = orderLog[orderLog['status'] == 1]
a = a.groupby(['order'])[['absOrderSizeCumFilled', 'absOrderSize']].max().reset_index()
a.columns = ['order', 'filled', 'total']
print('in partial trade, any fill >= total or fill is 0 cases for updateType 4')
display(a[(a['filled'] >= a['total']) | (a['filled'] == 0)])
if a[(a['filled'] >= a['total']) | (a['filled'] == 0)].shape[0] > 0:
    removeOrderLs = a[(a['filled'] >= a['total']) | (a['filled'] == 0)]['order'].unique()
    orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]
    
### Assertion 10: make sure no cancellation within 1 sec
print('=======================================================================================')
print('10. no cancellation within 1 sec')
a = orderLog[(orderLog['updateType'] == 1) & (orderLog['duration'] < 1e6)]
print('any cancellation within 1 sec')
display(a)
if a.shape[0] > 0:
    removeOrderLs = a['order'].unique()
    orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]


### Assertion 11: make sure no order has shares > 80w or notional > 800w
print('=======================================================================================')
print('11. Orders with size > 80w or notional > 800w')
orderLog['orderNtl'] = orderLog['absOrderSize'] * orderLog['orderPrice']
if orderLog[orderLog['absOrderSize'] > 800000].shape[0] > 0:
    print('some order quantity are > 80w')
    print(orderLog[orderLog['absOrderSize'] > 800000].groupby(['colo', 'accCode'])['order'].nunique())
    display(orderLog[orderLog['absOrderSize'] > 800000][['date', 'accCode', 'secid', 'vai', 'absOrderSize', 'orderPrice',
                                                         'orderNtl', 'orderDirection', 'clock', 'order']])
            
if orderLog[orderLog['orderNtl'] > 8000000].shape[0] > 0:
    print('some order ntl are > 800w')
    print(orderLog[orderLog['orderNtl'] > 8000000].groupby(['colo', 'accCode'])['order'].nunique())
    display(orderLog[orderLog['orderNtl'] > 8000000][['date', 'accCode', 'secid', 'vai', 'absOrderSize', 'orderPrice',
                                                      'orderNtl', 'orderDirection', 'clock', 'order', "updateType", 
                                                      "tradePrice", "absOrderSizeCumFilled", "absFilledThisUpdate"]])

removeOrderLs = list(set(orderLog[orderLog['absOrderSize'] > 800000]['order'].unique()) | set(orderLog[orderLog['orderNtl'] > 8000000]['order'].unique()))
orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]


orderLog = orderLog.sort_values(by=['date', 'secid', 'vai', 'accCode', 'clockAtArrival']).reset_index(drop=True)

orderLog['exchange'] = np.where(orderLog['secid'] >= 2000000, 'SZE', 'SSE')
orderLog['orderNtl'] = orderLog['orderPrice'] * orderLog['absOrderSize']
orderLog['tradeNtl'] = np.where(orderLog['updateType'] == 4, orderLog['tradePrice']*orderLog['absFilledThisUpdate'], 0)
orderLog["ars"] = orderLog.groupby(['order'])['ars'].transform('first')
orderLog['sta'] = np.where(orderLog['ars'].isin([121, 221, 321, 131, 231, 331]), 'statwo', 'staone')

  interactivity=interactivity, compiler=compiler, result=result)


1. same date, secid, vai: same direction
opposite direction for same date, same secid, same vai


Unnamed: 0,date,accCode,secid,vai,orderDirection,order
71063,20200918,8854,1603222,1126105,-1,27872
71067,20200918,8943,1603222,1126105,1,30254
230613,20200918,5474,2300470,1431580,-1,19891
230616,20200918,6480,2300470,1431580,-1,22826
230621,20200918,9756,2300470,1431580,1,45906
309797,20200921,8854,1601001,18229865,-1,92584
309800,20200921,528401,1601001,18229865,1,119917
369786,20200921,8854,2000582,5091740,1,92704
369790,20200921,9243,2000582,5091740,-1,98393
472744,20200921,5474,2300277,5728600,-1,84034


2. same date, secid, vai, accCode: one insertion
more than one insertion at same time


Unnamed: 0,date,accCode,secid,vai,order,clockAtArrival
0,20200918,5226,2002968,331550,3721,2
1,20200918,6683,2300659,783140,26748,2
2,20200918,8854,2002309,13759576,28246,2
3,20200918,8854,2002309,13865676,28247,2
4,20200918,8854,2300533,798800,28514,2
...,...,...,...,...,...,...
440,20200930,9741,2300492,151700,614474,2
441,20200930,9741,2300522,1074820,614560,2
442,20200930,9741,2300681,468700,614985,2
443,20201009,5273,2300622,31500,645925,2


99.59% SZE orders triggered by msg data
3. IPO stocks selling (ars = 301, 302)
4. updateType 7 orders
5. updateType 6 orders
There are accounts with more than 5% updateType 6 orders!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   accCode  order_x  order_y     prob
4     9551      578    11369  0.05084
6. CYB stocks total order size < 30w
7. unexpected updateType
8. status == 0: all traded
in total trade, any fill != total cases


Unnamed: 0,order,filled,total
2399,3328,600,3200
3608,5062,500,1600
4327,6076,900,2200
7544,10371,77200,81500
10639,14172,20400,35300
...,...,...,...
580029,808485,2600,6500
580303,808876,100,2300
581872,811023,3200,4900
582239,811557,10045,11100


9. status == 1: partial traded
in partial trade, any fill >= total or fill is 0 cases for updateType 4


Unnamed: 0,order,filled,total


10. no cancellation within 1 sec
any cancellation within 1 sec


Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,internalId,tradeId,sdd,aaa,ApplSeqNum,mrm,mta,mrsb,mrss,mrv,mrb100,mra100,l4tr,cancellationPending,cfe,clock,cumSharesBought,cumSharesBuyInserted,cumSharesSellInserted,cumSharesSold,finalState,gfe,hee,insertedShortOrder,insertionPending,inv_L,inv_L0,inv_S,inv_S0,locateShares,locateSharesTotal,mfe,ms,mt,mv,orderId,orderOutstanding,sequenceNo,session,threadId,totalActions,totalCanceled,underlyingIndex,zipFile,broker,colo_broker,order,group,startClock,duration,orderDirection1,directNum,isMsg,status


11. Orders with size > 80w or notional > 800w
some order ntl are > 800w
colo      accCode
zt_88_03  8971       1
Name: order, dtype: int64


Unnamed: 0,date,accCode,secid,vai,absOrderSize,orderPrice,orderNtl,orderDirection,clock,order,updateType,tradePrice,absOrderSizeCumFilled,absFilledThisUpdate
2901692,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:10.787934,734595,0,-1.0,0,0
2901693,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:11.009690,734595,2,-1.0,0,0
2901694,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:11.221120,734595,4,80.73,500,500
2901695,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:11.221207,734595,4,80.73,600,100
2901696,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:11.221644,734595,4,80.73,1100,500
2901697,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:11.224045,734595,4,80.73,1200,100
2901698,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:11.224693,734595,4,80.73,2200,1000
2901699,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:11.225246,734595,4,80.73,2700,500
2901700,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:11.225330,734595,4,80.73,2700,0
2901701,20201012,8971,1601318,27740763,99100,80.73,8000343.0,1,2020-10-12 10:01:11.226904,734595,4,80.73,2900,200


In [123]:
checkLog = orderLog[(orderLog["updateType"] == 0)]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")
checkLog['colo_account'] = checkLog['colo'].str[:2] + '_' + checkLog['accCode'].astype(str)
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog = checkLog[checkLog['colo'] == 'zs_66_01']
checkLog['accountNum'] = checkLog.groupby(['date', 'sta'])['accCode'].transform('nunique')
re = checkLog.groupby(['date', 'sta'])['internal_latency'].describe().reset_index()
re = pd.merge(re, checkLog.groupby(['date', 'sta'])['accountNum'].first().reset_index(), on=['date', 'sta'])
re = re[['date', 'sta', 'count', 'accountNum', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']]
for i in re.columns[re.columns != 'sta']:
    re[i] = re[i].astype('int64')
    
from IPython.display import display, HTML
HTML(re.groupby(['date', 'sta'])[['count', 'accountNum', 'mean', 'std', 'min', '25%', '50%', '75%', 'max']].first().to_html())

Unnamed: 0_level_0,Unnamed: 1_level_0,count,accountNum,mean,std,min,25%,50%,75%,max
date,sta,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
20200918,staone,843,4,49,18,15,34,53,59,181
20200918,statwo,3034,4,65,363,42,52,56,60,20027
20200921,staone,954,4,85,259,17,47,59,72,7158
20200921,statwo,2871,4,71,94,40,58,63,73,4957
20200922,staone,1012,4,44,18,14,34,46,50,329
20200922,statwo,3241,4,50,12,37,45,47,51,252
20200923,staone,1262,4,40,16,13,26,44,50,131
20200923,statwo,3240,4,51,13,37,45,48,52,241
20200924,staone,1494,4,46,74,13,40,45,49,2768
20200924,statwo,4184,4,51,13,36,45,48,52,197


In [268]:
checkLog = orderLog[(orderLog["updateType"] == 0) & (orderLog['accCode'].isin([6282, 9685]))]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")
checkLog = checkLog[checkLog['strategy'] == 'statwo']
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"

c1 = SSE.groupby(['colo', 'exchange', "strategy", "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'exchange', "strategy"])["internal_latency"].mean().reset_index()
c2 = SSE.groupby(['colo', 'exchange', "strategy", "date"])["internal_latency"].median().reset_index().groupby(['colo', 'exchange', "strategy"])["internal_latency"].mean().reset_index()
c3 = SSE.groupby(['colo', 'exchange', "strategy"])["internal_latency"].count().reset_index()
c4 = SSE.groupby(['colo', 'exchange', "strategy"])["date"].unique().str.len().reset_index()
c5 = SSE.groupby(['colo', 'exchange', "strategy",  "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'exchange', "strategy"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=['colo', 'exchange', "strategy"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=['colo', 'exchange', "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=['colo', 'exchange', "strategy"])
re2 = pd.merge(re2, c5, on=['colo', 'exchange', "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2

c1 = SZE.groupby(['colo', 'exchange', "strategy", "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'exchange', "strategy"])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(['colo', 'exchange', "strategy", "date"])["internal_latency"].median().reset_index().groupby(['colo', 'exchange', "strategy"])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(['colo', 'exchange', "strategy"])["internal_latency"].count().reset_index()
c4 = SZE.groupby(['colo', 'exchange', "strategy"])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(['colo', 'exchange', "strategy",  "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'exchange', "strategy"])["internal_latency"].std().reset_index()

re1 = pd.merge(c3, c1, on=['colo', 'exchange', "strategy"])
re1 = re1.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re1 = pd.merge(re1, c2, on=['colo', 'exchange', "strategy"])
re1 = re1.rename(columns = {'internal_latency': 'median'})
re1 = pd.merge(re1, c4, on=['colo', 'exchange', "strategy"])
re1 = pd.merge(re1, c5, on=['colo', 'exchange', "strategy"])
re1 = re1.rename(columns = {'internal_latency': 'std'})
re1


# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = pd.concat([re1, re2]).reset_index(drop=True)


for col in ['median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
    
from IPython.display import display, HTML

display(HTML(re[(re['strategy'] == 'statwo')].groupby(['colo', 'exchange', "strategy"])["count", "median", "95 percentile", "std"].first().to_html()))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,median,95 percentile,std
colo,exchange,strategy,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
zs_96_08,SH,statwo,1342,133,293,55.1
zs_96_08,SZ,statwo,7593,95,206,18.64


In [259]:
result = pd.concat([result, result1])
result

Unnamed: 0,colo,exchange,previous_count,current_count,previous_med,current_med,previous_95p,current_95p
0,zs_96_08,SZ,5035,7901,112,93,195,193
1,zs_96_08,SH,2713,4125,138,137,345,357
0,zt_52_07,SH,2144,4122,122,110,254,239
0,zt_96_09,SH,920,1147,133,113,336,602
0,zs_52_08,SZ,4376,4932,109,90,180,139
0,zs_96_06,SZ,923,808,106,76,168,146
1,zs_96_06,SH,2582,2563,136,123,269,328
0,zt_52_04,SH,4457,4238,124,101,232,208
0,zt_52_05,SH,4901,4915,132,129,276,453
0,zs_52_06,SZ,15499,5993,103,88,231,228


In [270]:
from IPython.display import display, HTML
HTML(result1.groupby(["colo", "exchange"]).first().to_html())

Unnamed: 0_level_0,Unnamed: 1_level_0,previous_count,current_count,previous_med,current_med,previous_95p,current_95p
colo,exchange,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
zs_96_08,SH,744,1342,109,133,197,293
zs_96_08,SZ,3931,7593,89,95,125,206


In [269]:
re = re[['colo', 'exchange', 'count', '95 percentile', 'median']]
re = re.rename(columns={"95 percentile":"current_95p", "median":"current_med", "count":"current_count"})
result1 = pd.merge(re, result1, on=['colo', 'exchange'])
result1 = result1[['colo', 'exchange', 'previous_count', 'current_count', 'previous_med', 'current_med', 'previous_95p', 'current_95p']]
result1

Unnamed: 0,colo,exchange,previous_count,current_count,previous_med,current_med,previous_95p,current_95p
0,zs_96_08,SZ,3931,7593,89,95,125,206
1,zs_96_08,SH,744,1342,109,133,197,293


In [265]:
re = re[['colo', 'exchange', 'count', '95 percentile', 'median']]
re = re.rename(columns={"95 percentile":"previous_95p", "median":"previous_med", "count":"previous_count"})
result1 = re
result1

Unnamed: 0,colo,exchange,previous_count,previous_95p,previous_med
0,zs_96_08,SZ,3931,125,89
1,zs_96_08,SH,744,197,109


In [257]:
checkLog = orderLog[(orderLog["updateType"] == 0) & (orderLog['accCode'].isin([8865, 8967]))]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")
checkLog = checkLog[checkLog['strategy'] == 'statwo']
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"

c1 = SZE.groupby(['colo', "exchange", "strategy", "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', "exchange", "strategy"])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(['colo', "exchange", "strategy", "date"])["internal_latency"].median().reset_index().groupby(['colo', "exchange", "strategy"])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(['colo', "exchange", "strategy"])["internal_latency"].count().reset_index()
c4 = SZE.groupby(['colo', "exchange", "strategy"])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(['colo', "exchange", "strategy",  "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', "exchange", "strategy"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=['colo', "exchange", "strategy"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=['colo', "exchange", "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=['colo', "exchange", "strategy"])
re2 = pd.merge(re2, c5, on=['colo', "exchange", "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2


# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = re2


for col in ['median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
    
from IPython.display import display, HTML

display(HTML(re[(re['strategy'] == 'statwo')].groupby(['colo', "exchange", "strategy"])["count", "median", "95 percentile", "std"].first().to_html()))


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,median,95 percentile,std
colo,exchange,strategy,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
zs_88_04,SZ,statwo,868,50,114,


### 95 percentile and median

In [3]:
checkLog = orderLog[(orderLog["updateType"] == 0)]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog['colo_account'] = checkLog['colo'].str[:2] + '_' + checkLog['accCode'].astype(str)
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"
c1 = SZE.groupby(["exchange", "colo_account", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy", "isMsg"])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(["exchange", "colo_account", "strategy", "isMsg", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo_account", "strategy", "isMsg"])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(["exchange", "colo_account", "strategy", "isMsg"])["internal_latency"].count().reset_index()
c4 = SZE.groupby(["exchange", "colo_account", "strategy", "isMsg"])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(["exchange", "colo_account", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy", "isMsg"])["internal_latency"].std().reset_index()

re1 = pd.merge(c3, c1, on=["exchange", "colo_account", "strategy", "isMsg"])
re1 = re1.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re1 = pd.merge(re1, c2, on=["exchange", "colo_account", "strategy", "isMsg"])
re1 = re1.rename(columns = {'internal_latency': 'median'})
re1 = pd.merge(re1, c4, on=["exchange", "colo_account", "strategy", "isMsg"])
re1 = pd.merge(re1, c5, on=["exchange", "colo_account", "strategy", "isMsg"])
re1 = re1.rename(columns = {'internal_latency': 'std'})
re1 = pd.merge(re1, re1[re1["isMsg"] == 1].loc[:, ["exchange", "colo_account", "strategy", "count"]], on=["exchange", "colo_account","strategy"], how="outer")
re1 = pd.merge(re1, re1[re1["isMsg"] == 0].loc[:, ["exchange", "colo_account", "strategy", "count_x"]], on=["exchange", "colo_account","strategy"], how="outer")
re1 = re1.rename(columns = {'count_x_x': 'count', 'count_y': 'TradeByMsg','count_x_y': 'TradeBySs'})
re1["TradeBySsPerc(%)"] = re1["TradeBySs"]/re1["count"]
# re1 = re1[re1["count"] >= 150]
re1 = re1[re1["isMsg"] == 1]
re1

c1 = SSE.groupby(["exchange", "colo_account", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy", "isMsg"])["internal_latency"].mean().reset_index()
c2 = SSE.groupby(["exchange", "colo_account", "strategy", "isMsg", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo_account", "strategy", "isMsg"])["internal_latency"].mean().reset_index()
c3 = SSE.groupby(["exchange", "colo_account", "strategy", "isMsg"])["internal_latency"].count().reset_index()
c4 = SSE.groupby(["exchange", "colo_account", "strategy", "isMsg"])["date"].unique().str.len().reset_index()
c5 = SSE.groupby(["exchange", "colo_account", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy", "isMsg"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=["exchange", "colo_account", "strategy", "isMsg"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=["exchange", "colo_account", "strategy", "isMsg"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=["exchange", "colo_account", "strategy", "isMsg"])
re2 = pd.merge(re2, c5, on=["exchange", "colo_account", "strategy", "isMsg"])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2 = pd.merge(re2, re2[re2["isMsg"] == 1].loc[:, ["exchange", "colo_account", "strategy", "count"]], on=["exchange", "colo_account", "strategy"], how="outer")
re2 = pd.merge(re2, re2[re2["isMsg"] == 0].loc[:, ["exchange", "colo_account", "strategy", "count_x"]], on=["exchange", "colo_account", "strategy"], how="outer")
re2 = re2.rename(columns = {'count_x_x': 'count', 'count_y': 'TradeByMsg','count_x_y': 'TradeBySs'})
re2["TradeBySsPerc(%)"] = re2["TradeBySs"]/re2["count"]
re2




# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = pd.concat([re1, re2]).reset_index(drop=True)

savePath = r'L:\orderLog\result\internal latency'
re.to_csv(os.path.join(savePath, 'internal_latency_%s_%s.csv'%(startDate, endDate)), index=False)

for col in ['isMsg','median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
    
from IPython.display import display, HTML
HTML(re.groupby(["exchange", "colo_account", "isMsg"]).first().to_html())

display(HTML(re[re["exchange"] == "SH"].groupby(["exchange", "colo_account", "strategy", "isMsg"])["count", "median", "95 percentile", "std"].first().to_html()))
display(HTML(re[re["exchange"] == "SZ"].groupby(["exchange", "colo_account", "strategy", "isMsg"])["count", "median", "95 percentile", "std"].first().to_html()))
display(HTML(re[re["strategy"] == "statwo"].groupby(["exchange", "colo_account", "strategy", "isMsg"])["count", "median", "95 percentile", "std"].first().to_html()))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,median,95 percentile,std
exchange,colo_account,strategy,isMsg,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SH,zs_5386,staone,0,21,44,77,14.39
SH,zs_5386,staone,1,8,23,43,10.96
SH,zs_5386,statwo,0,238,77,144,7.85
SH,zs_5386,statwo,1,119,56,129,11.0
SH,zs_6237,staone,0,3788,29,41,2.69
SH,zs_6237,staone,1,1151,14,25,3.82
SH,zs_6282,staone,0,224,53,118,43.44
SH,zs_6282,staone,1,54,28,69,105.97
SH,zs_6282,statwo,0,3352,113,185,8.69
SH,zs_6282,statwo,1,1083,99,213,37.75


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,median,95 percentile,std
exchange,colo_account,strategy,isMsg,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SZ,zs_5222,staone,1,1978,27,52,14.14
SZ,zs_5222,statwo,1,18276,51,90,6.78
SZ,zs_5225,staone,1,5105,28,116,34.96
SZ,zs_5225,statwo,1,18607,49,92,8.87
SZ,zs_5226,staone,1,1630,28,75,40.6
SZ,zs_5226,statwo,1,14527,49,92,7.04
SZ,zs_5229,staone,1,173,33,103,55.29
SZ,zs_5229,statwo,1,1863,59,107,14.39
SZ,zs_5230,staone,1,558,29,85,28.1
SZ,zs_5230,statwo,1,5752,50,104,4.83


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,median,95 percentile,std
exchange,colo_account,strategy,isMsg,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SH,zs_5386,statwo,0,238,77,144,7.85
SH,zs_5386,statwo,1,119,56,129,11.0
SH,zs_6282,statwo,0,3352,113,185,8.69
SH,zs_6282,statwo,1,1083,99,213,37.75
SH,zs_8967,statwo,0,1329,64,126,7.32
SH,zs_8967,statwo,1,696,55,122,8.98
SH,zs_9741,statwo,0,3874,104,179,7.2
SH,zs_9741,statwo,1,2174,99,202,19.64
SH,zs_9754,statwo,0,7702,115,194,8.17
SH,zs_9754,statwo,1,2535,106,280,72.14


In [273]:
checkLog = orderLog[(orderLog["updateType"] == 0) & (orderLog['colo'].isin(['zs_96_08']))]
checkLog = checkLog[checkLog['caamd'] != 0]
# checkLog['colo_account'] = checkLog['colo'] + '_' + checkLog['accCode'].astype(str)
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"
c1 = SZE.groupby(["exchange", "colo_account", "strategy", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(["exchange", "colo_account", "strategy", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(["exchange", "colo_account", "strategy"])["internal_latency"].count().reset_index()
c4 = SZE.groupby(["exchange", "colo_account", "strategy"])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(["exchange", "colo_account", "strategy",  "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy"])["internal_latency"].std().reset_index()

re1 = pd.merge(c3, c1, on=["exchange", "colo_account", "strategy"])
re1 = re1.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re1 = pd.merge(re1, c2, on=["exchange", "colo_account", "strategy"])
re1 = re1.rename(columns = {'internal_latency': 'median'})
re1 = pd.merge(re1, c4, on=["exchange", "colo_account", "strategy"])
re1 = pd.merge(re1, c5, on=["exchange", "colo_account", "strategy"])
re1 = re1.rename(columns = {'internal_latency': 'std'})


c1 = SSE.groupby(["exchange", "colo_account", "strategy", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c2 = SSE.groupby(["exchange", "colo_account", "strategy", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c3 = SSE.groupby(["exchange", "colo_account", "strategy"])["internal_latency"].count().reset_index()
c4 = SSE.groupby(["exchange", "colo_account", "strategy"])["date"].unique().str.len().reset_index()
c5 = SSE.groupby(["exchange", "colo_account", "strategy",  "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=["exchange", "colo_account", "strategy"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=["exchange", "colo_account", "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=["exchange", "colo_account", "strategy"])
re2 = pd.merge(re2, c5, on=["exchange", "colo_account", "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'std'})




# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = pd.concat([re1, re2]).reset_index(drop=True)

savePath = r'L:\orderLog\result\internal latency'
re.to_csv(os.path.join(savePath, 'internal_latency_%s_%s.csv'%(startDate, endDate)), index=False)

for col in ['median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
    
from IPython.display import display, HTML

display(HTML(re[re["exchange"] == "SH"].groupby(["exchange", "colo_account", "strategy"])["count", "median", "95 percentile", "std"].first().to_html()))
display(HTML(re[re["exchange"] == "SZ"].groupby(["exchange", "colo_account", "strategy"])["count", "median", "95 percentile", "std"].first().to_html()))
display(HTML(re[re["strategy"] == "statwo"].groupby(["exchange", "colo_account", "strategy"])["count", "median", "95 percentile", "std"].first().to_html()))
re1 = pd.concat([re[(re['colo_account'] == 'zs_9756') & (re['strategy'] == 'staone')],
               re[(re['colo_account'] == 'zs_9758') & (re['strategy'] == 'staone')],
               re[(re['colo_account'] == 'zs_9765') & (re['strategy'] == 'statwo')],
               re[(re['colo_account'] == 'zs_975602') & (re['strategy'] == 'statwo')]])
display(HTML(re1.groupby(["exchange", "colo_account", "strategy"])["count", "median", "95 percentile", "std"].first().to_html()))
re2 = pd.concat([re[(re['colo_account'] == 'zs_9655') & (re['strategy'] == 'staone')],
               re[(re['colo_account'] == 'zs_9741') & (re['strategy'] == 'staone')]])
display(HTML(re2.groupby(["exchange", "colo_account", "strategy"])["count", "median", "95 percentile", "std"].first().to_html()))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,median,95 percentile,std
exchange,colo_account,strategy,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SH,zs_6282,staone,35,84,179,
SH,zs_6282,statwo,298,139,354,
SH,zs_9741,staone,365,107,279,
SH,zs_9741,statwo,470,137,406,
SH,zs_9754,staone,74,64,173,
SH,zs_9754,statwo,714,143,474,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,median,95 percentile,std
exchange,colo_account,strategy,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SZ,zs_6282,staone,56,49,99,
SZ,zs_6282,statwo,695,85,183,
SZ,zs_9685,staone,40,52,142,
SZ,zs_9685,statwo,877,96,220,
SZ,zs_9741,staone,714,95,255,
SZ,zs_9741,statwo,995,95,244,
SZ,zs_9754,staone,115,47,140,
SZ,zs_9754,statwo,1274,87,177,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,median,95 percentile,std
exchange,colo_account,strategy,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SH,zs_6282,statwo,298,139,354,
SH,zs_9741,statwo,470,137,406,
SH,zs_9754,statwo,714,143,474,
SZ,zs_6282,statwo,695,85,183,
SZ,zs_9685,statwo,877,96,220,
SZ,zs_9741,statwo,995,95,244,
SZ,zs_9754,statwo,1274,87,177,


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,median,95 percentile,std
exchange,colo_account,strategy,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,median,95 percentile,std
exchange,colo_account,strategy,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
SH,zs_9741,staone,365,107,279,
SZ,zs_9741,staone,714,95,255,


In [8]:
re1 = pd.concat([re[(re['colo_account'] == 'zs_9655') & (re['strategy'] == 'staone')],
               re[(re['colo_account'] == 'zs_9741') & (re['strategy'] == 'staone')],
               re[(re['colo_account'] == 'zs_9756') & (re['strategy'] == 'staone')],
               re[(re['colo_account'] == 'zs_9758') & (re['strategy'] == 'staone')],
               re[(re['colo_account'] == 'zs_9765') & (re['strategy'] == 'statwo')],
               re[(re['colo_account'] == 'zs_975602') & (re['strategy'] == 'statwo')]])

Unnamed: 0,exchange,colo_account,strategy,count,95 percentile,median,date,std
0,SZ,zs_9655,staone,81,41,30,1,
1,SZ,zs_9741,staone,633,281,104,1,
2,SZ,zs_9741,statwo,774,275,116,1,
3,SZ,zs_9756,staone,614,238,92,1,
4,SZ,zs_9756,statwo,860,234,106,1,
5,SZ,zs_9758,staone,18,82,51,1,
6,SZ,zs_9758,statwo,123,185,98,1,
7,SZ,zs_9765,staone,73,150,47,1,
8,SZ,zs_9765,statwo,137,137,81,1,
9,SH,zs_9741,staone,296,237,118,1,


In [34]:
orderLog[(orderLog['updateType'] == 0) & (orderLog['accCode'].isin([527103, 527701, 528101, 537401]))]['ars'].unique()

array([ 11., 131., 231., 121., 331., 221., 321.])

In [274]:
checkLog = orderLog[(orderLog["updateType"] == 0) & (orderLog['colo'].isin(['zt_52_07']))]
checkLog = checkLog[checkLog['caamd'] != 0]
# checkLog['colo_account'] = checkLog['colo'] + '_' + checkLog['accCode'].astype(str)
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"

c1 = SSE.groupby(["exchange", "colo_account", "strategy", "date", "ars"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy", "ars"])["internal_latency"].mean().reset_index()
c2 = SSE.groupby(["exchange", "colo_account", "strategy", "date", "ars"])["internal_latency"].median().reset_index().groupby(["exchange", "colo_account", "strategy", "ars"])["internal_latency"].mean().reset_index()
c3 = SSE.groupby(["exchange", "colo_account", "strategy", "ars"])["internal_latency"].count().reset_index()
c4 = SSE.groupby(["exchange", "colo_account", "strategy", "ars"])["date"].unique().str.len().reset_index()
c5 = SSE.groupby(["exchange", "colo_account", "strategy",  "date", "ars"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy", "ars"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=["exchange", "colo_account", "strategy", "ars"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=["exchange", "colo_account", "strategy", "ars"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=["exchange", "colo_account", "strategy", "ars"])
re2 = pd.merge(re2, c5, on=["exchange", "colo_account", "strategy", "ars"])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2




# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = re2

for col in ['median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
    
from IPython.display import display, HTML

display(HTML(re[(re["strategy"] == "statwo") & (re['ars'].isin([131, 231, 331]))].groupby(["exchange", "colo_account", "strategy","ars"])["count", "median", "95 percentile", "std"].first().to_html()))


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,median,95 percentile,std
exchange,colo_account,strategy,ars,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SH,zt_527103,statwo,131.0,185,100,187,
SH,zt_527103,statwo,231.0,28,139,225,
SH,zt_527103,statwo,331.0,9,190,395,
SH,zt_527701,statwo,131.0,706,101,231,
SH,zt_527701,statwo,231.0,158,133,338,
SH,zt_527701,statwo,331.0,49,152,329,
SH,zt_528101,statwo,131.0,500,103,251,
SH,zt_528101,statwo,231.0,107,135,354,
SH,zt_528101,statwo,331.0,39,172,462,
SH,zt_537401,statwo,131.0,83,101,222,


In [17]:
orderLog[(orderLog['updateType'] == 0) & (orderLog['accCode'].isin([965801, 966501, 968501]))].groupby(['accCode'])['ars'].unique()

accCode
965801                                        [11.0]
966501                                        [11.0]
968501    [131.0, 231.0, 221.0, 331.0, 121.0, 321.0]
Name: ars, dtype: object

In [52]:
checkLog = orderLog[(orderLog["updateType"] == 0) & (orderLog['accCode'].isin([6282]))]
checkLog = checkLog[checkLog['caamd'] != 0]
# checkLog['colo_account'] = checkLog['colo'] + '_' + checkLog['accCode'].astype(str)
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")

In [57]:
checkLog[(checkLog['date'] == 20200817) & (checkLog['strategy'] == 'statwo') & (checkLog['secid'] >= 2000000)]['internal_latency'].describe([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]).astype('int64')

count    612
mean      81
std       46
min       54
10%       65
20%       69
30%       71
40%       73
50%       75
60%       78
70%       81
80%       85
90%       91
95%      101
99%      164
max      963
Name: internal_latency, dtype: int64

In [55]:
checkLog[(checkLog['date'] == 20200818) & (checkLog['strategy'] == 'statwo') & (checkLog['secid'] >= 2000000)]['internal_latency'].describe([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]).astype('int64')

count    381
mean      78
std       13
min       55
10%       67
20%       70
30%       72
40%       74
50%       76
60%       79
70%       81
80%       84
90%       92
95%      101
99%      133
max      163
Name: internal_latency, dtype: int64

In [56]:
checkLog[(checkLog['date'] == 20200819)  & (checkLog['strategy'] == 'statwo') & (checkLog['secid'] >= 2000000)]['internal_latency'].describe([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]).astype('int64')

count    444
mean      87
std       24
min       59
10%       70
20%       74
30%       77
40%       79
50%       81
60%       84
70%       87
80%       91
90%      108
95%      133
99%      194
max      260
Name: internal_latency, dtype: int64

In [282]:
checkLog = orderLog[(orderLog["updateType"] == 0) & (orderLog['colo'].isin(['zt_88_03']))]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")
checkLog = checkLog[checkLog['strategy'] == 'statwo']
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"

c1 = SSE.groupby(['colo', "exchange", "colo_account", "strategy", "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c2 = SSE.groupby(['colo', "exchange", "colo_account", "strategy", "date"])["internal_latency"].median().reset_index().groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c3 = SSE.groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].count().reset_index()
c4 = SSE.groupby(['colo', "exchange", "colo_account", "strategy"])["date"].unique().str.len().reset_index()
c5 = SSE.groupby(['colo', "exchange", "colo_account", "strategy",  "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=['colo', "exchange", "colo_account", "strategy"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=['colo', "exchange", "colo_account", "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=['colo', "exchange", "colo_account", "strategy"])
re2 = pd.merge(re2, c5, on=['colo', "exchange", "colo_account", "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2

c1 = SZE.groupby(['colo', "exchange", "colo_account", "strategy", "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(['colo', "exchange", "colo_account", "strategy", "date"])["internal_latency"].median().reset_index().groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].count().reset_index()
c4 = SZE.groupby(['colo', "exchange", "colo_account", "strategy"])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(['colo', "exchange", "colo_account", "strategy",  "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].std().reset_index()

re1 = pd.merge(c3, c1, on=['colo', "exchange", "colo_account", "strategy"])
re1 = re1.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re1 = pd.merge(re1, c2, on=['colo', "exchange", "colo_account", "strategy"])
re1 = re1.rename(columns = {'internal_latency': 'median'})
re1 = pd.merge(re1, c4, on=['colo', "exchange", "colo_account", "strategy"])
re1 = pd.merge(re1, c5, on=['colo', "exchange", "colo_account", "strategy"])
re1 = re1.rename(columns = {'internal_latency': 'std'})
re1


# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = pd.concat([re1, re2]).reset_index(drop=True)


for col in ['median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
    
from IPython.display import display, HTML

display(HTML(re[(re['strategy'] == 'statwo')].groupby(['colo', "exchange", "colo_account", "strategy"])["count", "median", "95 percentile", "std"].first().to_html()))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,median,95 percentile,std
colo,exchange,colo_account,strategy,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
zt_88_03,SH,zt_8943,statwo,618,86,170,
zt_88_03,SH,zt_8970,statwo,576,94,226,
zt_88_03,SH,zt_897002,statwo,368,97,213,
zt_88_03,SH,zt_8971,statwo,790,98,326,
zt_88_03,SZ,zt_8970,statwo,577,59,125,
zt_88_03,SZ,zt_8971,statwo,409,58,133,


In [284]:
checkLog = orderLog[(orderLog["updateType"] == 0) & (orderLog['colo'].isin(["zs_88_04"]))]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")
# checkLog = checkLog[checkLog['strategy'] == 'statwo']
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"

c1 = SZE.groupby(['colo', "exchange", "colo_account", "strategy", "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(['colo', "exchange", "colo_account", "strategy", "date"])["internal_latency"].median().reset_index().groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].count().reset_index()
c4 = SZE.groupby(['colo', "exchange", "colo_account", "strategy"])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(['colo', "exchange", "colo_account", "strategy",  "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', "exchange", "colo_account", "strategy"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=['colo', "exchange", "colo_account", "strategy"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=['colo', "exchange", "colo_account", "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=['colo', "exchange", "colo_account", "strategy"])
re2 = pd.merge(re2, c5, on=['colo', "exchange", "colo_account", "strategy"])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2


# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = re2


for col in ['median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
    
from IPython.display import display, HTML

display(HTML(re[(re['strategy'] == 'statwo')].groupby(['colo', "exchange", "colo_account", "strategy"])["count", "median", "95 percentile", "std"].first().to_html()))


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,median,95 percentile,std
colo,exchange,colo_account,strategy,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
zs_88_04,SZ,zs_8865,statwo,481,51,114,
zs_88_04,SZ,zs_8967,statwo,387,49,114,
zs_88_04,SZ,zs_896702,statwo,436,49,113,


In [71]:
re

Unnamed: 0,colo,exchange,colo_account,strategy,count,95 percentile,median,date,std
0,zt_52_05,SH,zt_527501,staone,27,185,57,3,152.69
1,zt_52_05,SH,zt_527501,statwo,725,257,133,3,30.05
2,zt_52_05,SH,zt_527601,staone,244,370,45,3,271.41
3,zt_52_05,SH,zt_527601,statwo,2061,281,131,3,15.3
4,zt_52_05,SH,zt_528701,staone,219,121,45,1,
5,zt_52_05,SH,zt_537403,staone,943,99,41,3,8.98


In [49]:
orderLog[(orderLog["updateType"] == 0) & (orderLog['colo'].isin(['zs_96_08', 'zs_96_02']))]['colo'].unique()

array(['zs_96_02', 'zs_96_08'], dtype=object)

In [51]:
SSE.groupby(["exchange", "colo_account", "strategy", "date", "ars"])["internal_latency"].median()

exchange  colo_account  strategy  date      ars  
SH        zt_965801     staone    20200818  11.0     225.0
                        statwo    20200818  131.0    144.0
                                            231.0    500.0
                                            331.0    949.0
          zt_966501     staone    20200818  11.0      77.0
                        statwo    20200818  131.0    129.0
                                            231.0    335.5
                                            331.0    800.0
          zt_968501     staone    20200818  11.0      97.0
                        statwo    20200818  131.0    132.0
                                            231.0    318.0
                                            331.0    816.0
Name: internal_latency, dtype: float64

In [48]:
pp = pd.read_csv(r'\\mentos\dailyRawData\logs_20200818_zt_88_03_day_pcap\mdL2Pcap_SH_20200818_0900.csv')

In [8]:
orderLog[(orderLog['caamd'] == 0) & (orderLog['updateType'] == 0)].groupby(['exchange', 'colo', 'accCode'])['secid'].count()

exchange  colo      accCode
SSE       zs_64_01  6479       1
SZE       zs_52_02  5276       1
          zs_64_01  6479       1
          zs_66_01  6678       1
          zt_96_01  966301     1
Name: secid, dtype: int64

In [75]:
SSE[(SSE['colo_account'] == 'zt_8971') & (SSE['strategy'] == 'staone') \
        & (SSE['isMsg'] == 0) & (SSE['date'] == 20200707) & (SSE['internal_latency'] > 200 )]

Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,tradeId,sdd,aaa,ApplSeqNum,clock,broker,colo_broker,colo_account,order,group,startClock,duration,orderDirection1,orderNtl,directNum,isMsg,status,exchange,tradeNtl,internal_latency,strategy,high,time
98583,94749,1594085493339745,1594085493339205,1600061,0,3205000,11.0,0,-1,900,0,15.03,-1.0,20200707,8971,23,zt_88_03,,,93059000.0,0.000429,0.0,2020-07-07 09:31:33.339745,89,zt_89,zt_8971,29031,14385,1594085493339745,0,-1,13527.0,1,0.0,0,SH,0.0,540,staone,1,93133
104031,94572,1594085433777540,1594085433776715,1600325,0,411192,11.0,0,2,3400,0,8.21,-1.0,20200707,8971,23,zt_88_03,,,93000000.0,0.000504,0.0,2020-07-07 09:30:33.777540,89,zt_89,zt_8971,29074,15054,1594085433777540,0,1,27914.0,1,0.0,2,SH,0.0,825,staone,1,93033
104876,94576,1594085434005793,1594085434005048,1600350,0,57783,11.0,0,2,900,0,6.62,-1.0,20200707,8971,23,zt_88_03,,,93000000.0,0.001623,0.0,2020-07-07 09:30:34.005793,89,zt_89,zt_8971,29091,15181,1594085434005793,0,1,5958.0,1,0.0,2,SH,0.0,745,staone,1,93034
122169,94604,1594085439633380,1594085439633114,1601066,0,4321945,11.0,0,-1,15900,0,52.28,-1.0,20200707,8971,23,zt_88_03,,,93006000.0,0.0017,0.0,2020-07-07 09:30:39.633380,89,zt_89,zt_8971,29255,17301,1594085439633380,0,-1,831252.0,1,0.0,0,SH,0.0,266,staone,1,93039


In [42]:
SSE[SSE['colo_account'] == 'zt_965801'].groupby(["exchange", "colo_account", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "strategy", "isMsg"])["internal_latency"].std().reset_index()

Unnamed: 0,exchange,colo_account,strategy,isMsg,internal_latency
0,SH,zt_965801,staone,0.0,144.176411
1,SH,zt_965801,staone,1.0,23.905099


In [67]:
SSE[SSE['colo_account'] == 'zt_8971'].groupby(["exchange", "colo_account", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95)

exchange  colo_account  strategy  isMsg  date    
SH        zt_8971       staone    0.0    20200706    199.80
                                         20200707    652.75
                                         20200708    322.10
                                         20200709    209.15
                                         20200713    261.80
                                         20200714    368.00
                                         20200715    456.60
                                         20200716    332.15
                                         20200717    363.10
                                  1.0    20200707     58.60
                                         20200709     63.10
                                         20200713     88.10
                                         20200716    423.00
                        statwo    0.0    20200706    299.45
                                         20200707    351.00
                                         20200708 

In [20]:
orderLog[orderLog['colo_account'] == 'zs_6479']

Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,tradeId,sdd,aaa,ApplSeqNum,clock,broker,colo_broker,colo_account,order,group,startClock,duration,orderDirection1,orderNtl,directNum,isMsg,status,exchange,tradeNtl
41597,104908,1594003811910933,1594003811910914,2000009,0,65384355,11.0,0,1,1600,0,9.01,-1.00,20200706,6479,100,zs_64_01,,,105012660.0,0.007790,14192598.0,2020-07-06 10:50:11.910933,64,zs_64,zs_6479,5251,5935,1594003811910933,0,1,14416.0,1,1.0,0,SZE,0.0
41598,104909,1594003811915527,1594003811910914,2000009,2,65384355,-1.0,0,1,1600,0,9.01,-1.00,20200706,6479,0,zs_64_01,B2B2DQ7Y,,-1.0,-1.000000,0.0,2020-07-06 10:50:11.915527,64,zs_64,zs_6479,5251,5935,1594003811910933,4594,1,14416.0,1,1.0,0,SZE,0.0
41599,104910,1594003811915791,1594003811915704,2000009,4,65384355,-1.0,1600,1,1600,1600,9.01,9.01,20200706,6479,0,zs_64_01,,1.04e+14,-1.0,-1.000000,0.0,2020-07-06 10:50:11.915791,64,zs_64,zs_6479,5251,5935,1594003811910933,4858,1,14416.0,1,1.0,0,SZE,14416.0
41617,104925,1594004620347128,1594004620347111,2000009,0,73073579,11.0,0,1,1100,0,8.80,-1.00,20200706,6479,100,zs_64_01,,,110341100.0,0.011676,15366786.0,2020-07-06 11:03:40.347128,64,zs_64,zs_6479,5252,5937,1594004620347128,0,1,9680.0,1,1.0,2,SZE,0.0
41618,104926,1594004620350882,1594004620350576,2000009,2,73073579,-1.0,0,1,1100,0,8.80,-1.00,20200706,6479,0,zs_64_01,B2B2DQ84,,-1.0,-1.000000,0.0,2020-07-06 11:03:40.350882,64,zs_64,zs_6479,5252,5937,1594004620347128,3754,1,9680.0,1,1.0,2,SZE,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
889261,86278,1594949401560712,1594949401560695,2300482,1,10700,2.0,0,-1,100,0,98.77,-1.00,20200717,6479,100,zs_64_01,B2B2DPU1,,-1.0,-1.000000,0.0,2020-07-17 09:30:01.560712,64,zs_64,zs_6479,178484,134844,1594949400146156,1414556,-1,9877.0,1,1.0,2,SZE,0.0
889262,86279,1594949401565727,1594949401560695,2300482,3,10700,-1.0,0,-1,100,0,98.77,-1.00,20200717,6479,0,zs_64_01,,,-1.0,-1.000000,0.0,2020-07-17 09:30:01.565727,64,zs_64,zs_6479,178484,134844,1594949400146156,1419571,-1,9877.0,1,1.0,2,SZE,0.0
889297,86439,1594949422382370,1594949422382304,2300482,0,34800,11.0,0,-1,100,0,98.41,-1.00,20200717,6479,100,zs_64_01,,,93023030.0,-0.000088,839283.0,2020-07-17 09:30:22.382370,64,zs_64,zs_6479,178485,134847,1594949422382370,0,-1,9841.0,1,1.0,0,SZE,0.0
889298,86440,1594949422388805,1594949422388423,2300482,2,34800,-1.0,0,-1,100,0,98.41,-1.00,20200717,6479,0,zs_64_01,B2B2DPW1,,-1.0,-1.000000,0.0,2020-07-17 09:30:22.388805,64,zs_64,zs_6479,178485,134847,1594949422382370,6435,-1,9841.0,1,1.0,0,SZE,0.0


In [78]:
SSE['high'] = np.where((SSE["strategy"] == 'statwo') & (SSE['isMsg'] == 0) & (SSE['internal_latency'] > 400), 1, np.where(
    (SSE['isMsg'] == 1) & (SSE['internal_latency'] > 400), 1, 0))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [79]:
SSE["time"] = SSE["clock"].apply(lambda x: x.strftime("%H%M%S")).astype(int)
SSE[SSE['colo_account'] == 'zt_8971'].groupby(['strategy', 'isMsg', 'date', 'high'])['time'].describe()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,mean,std,min,25%,50%,75%,max
strategy,isMsg,date,high,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
staone,0.0,20200706,0,89.0,93092.157303,38.235484,93033.0,93047.0,93111.0,93124.0,93133.0
staone,0.0,20200707,0,30.0,93083.133333,42.024405,93033.0,93039.75,93082.5,93126.25,93133.0
staone,0.0,20200708,0,82.0,93091.585366,35.031891,93034.0,93055.0,93105.0,93119.75,93135.0
staone,0.0,20200709,0,48.0,93096.4375,38.703953,93035.0,93047.75,93115.5,93128.0,93135.0
staone,0.0,20200713,0,109.0,93097.119266,35.807025,93038.0,93052.0,93112.0,93126.0,93138.0
staone,0.0,20200714,0,49.0,93097.795918,37.733705,93039.0,93048.0,93116.0,93126.0,93140.0
staone,0.0,20200715,0,43.0,93086.697674,39.790245,93040.0,93046.0,93103.0,93126.5,93140.0
staone,0.0,20200716,0,50.0,93095.02,37.821002,93042.0,93052.0,93108.0,93128.0,93141.0
staone,0.0,20200717,0,92.0,93032.032609,24.410163,93000.0,93014.75,93029.0,93045.0,93100.0
staone,1.0,20200707,0,3.0,93034.333333,0.57735,93034.0,93034.0,93034.0,93034.5,93035.0


In [29]:
SSE[(SSE['colo_account'] == 'zt_8971') & (SSE['date']==20200701) & (SSE['strategy'] == 'statwo') & (SSE['isMsg'] == 1) & (SSE['high']==1)].shape[0]

26

In [9]:
rawOrderLog[(rawOrderLog["updateType"] == 0) & (rawOrderLog["caamd"] == 0)]

Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,tradeId,sdd,aaa,ApplSeqNum,clock,broker,colo_broker,colo_account,order,group,startClock,duration,orderDirection1
844444,88205,1592547846990715,0,1600390,0,-1,-1,0,-1,100,0,7.0,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:24:06.990715,5374,zt_5374,zt_537401,201211,129577,1592547846990715,0,-1
856813,88208,1592547867126198,0,1601155,0,-1,-1,0,-1,100,0,32.4,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:24:27.126198,5374,zt_5374,zt_537401,201252,131299,1592547867126198,0,-1


In [10]:
rawOrderLog[rawOrderLog["order"] == 201211]

Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,tradeId,sdd,aaa,ApplSeqNum,clock,broker,colo_broker,colo_account,order,group,startClock,duration,orderDirection1
844444,88205,1592547846990715,0,1600390,0,-1,-1,0,-1,100,0,7.0,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:24:06.990715,5374,zt_5374,zt_537401,201211,129577,1592547846990715,0,-1
844445,88206,1592547847211509,0,1600390,2,-1,-1,0,-1,100,0,7.0,-1.0,20200619,537401,0,zt_52_03,148203.0,,-1,-1.0,0,2020-06-19 14:24:07.211509,5374,zt_5374,zt_537401,201211,129577,1592547846990715,220794,-1
844446,88207,1592547847258990,0,1600390,4,-1,-1,100,-1,100,100,7.0,7.09,20200619,537401,0,zt_52_03,,15289200.0,-1,-1.0,0,2020-06-19 14:24:07.258990,5374,zt_5374,zt_537401,201211,129577,1592547846990715,268275,-1


In [26]:
orderLog["internal_latency"] = orderLog["clockAtArrival"] - orderLog["caamd"]
orderLog[(orderLog["updateType"] == 0) & (orderLog["accCode"] == 8854) & (orderLog["colo"] == "zt_88_02") & (\
~orderLog["ars"].isin([121, 221, 321, 131, 231, 331])) & (orderLog["isMsg"] == 0) & (orderLog["exchange"] == 'SSE')
       & (orderLog["internal_latency"] > 10000) & (orderLog["date"] == 20200616)]

Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,tradeId,sdd,aaa,ApplSeqNum,clock,broker,colo_broker,colo_account,order,group,startClock,duration,orderDirection1,orderNtl,directNum,isMsg,status,exchange,tradeNtl,internal_latency
557654,25440,1592271011692115,1592271011479200,1600131,0,2475200,11,0,-1,100,0,21.9,-1.0,20200616,8854,22,zt_88_02,,,93015000,0.001885,0,2020-06-16 09:30:11.692115,88,zt_88,zt_8854,127688,86284,1592271011692115,0,-1,2190.0,1,0.0,0,SSE,0.0,212915
572615,25158,1592270996339456,1592270996304895,1600850,0,12300,11,0,-1,300,0,23.15,-1.0,20200616,8854,23,zt_88_02,,,93000000,0.0027,0,2020-06-16 09:29:56.339456,88,zt_88,zt_8854,127740,88102,1592270996339456,0,-1,6945.0,1,0.0,2,SSE,0.0,34561


In [19]:
orderLog[(orderLog["vai"]==-1) & (orderLog["updateType"] == 0)]

Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,tradeId,sdd,aaa,ApplSeqNum,clock,broker,colo_broker,colo_account,order,group,startClock,duration,orderDirection1,orderNtl,directNum,isMsg,status,exchange,tradeNtl,internal_latency
231672,34077,1591856970875129,1591856970865859,1600201,0,-1,-1,0,-1,1700,0,24.63,-1.0,20200611,6237,0,zs_94_04,,,-1,-1.0,0,2020-06-11 14:29:30.875129,62,zs_62,zs_6237,54330,35596,1591856970875129,0,-1,41871.0,1,0.0,0,SSE,0.0,9270
837307,88149,1592547355845014,1592547354651770,1600132,0,-1,-1,0,-1,300,0,60.9,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:15:55.845014,5374,zt_5374,zt_537401,201200,128878,1592547355845014,0,-1,18270.0,1,0.0,0,SSE,0.0,1193244
839819,88152,1592547401881241,1592547398935806,1600259,0,-1,-1,0,-1,100,0,30.5,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:16:41.881241,5374,zt_5374,zt_537401,201205,129202,1592547401881241,0,-1,3050.0,1,0.0,0,SSE,0.0,2945435
842288,88205,1592547846990715,0,1600390,0,-1,-1,0,-1,100,0,7.0,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:24:06.990715,5374,zt_5374,zt_537401,201211,129577,1592547846990715,0,-1,700.0,1,0.0,0,SSE,0.0,1592547846990715
843698,88155,1592547433220101,1592547432363779,1600466,0,-1,-1,0,-1,1200,0,5.35,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:17:13.220101,5374,zt_5374,zt_537401,201216,129775,1592547433220101,0,-1,6420.0,1,0.0,0,SSE,0.0,856322
845913,88158,1592547457523575,1592547453650804,1600545,0,-1,-1,0,-1,100,0,4.75,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:17:37.523575,5374,zt_5374,zt_537401,201220,130024,1592547457523575,0,-1,475.0,1,0.0,0,SSE,0.0,3872771
846333,88161,1592547488859846,1592547486654554,1600575,0,-1,-1,0,-1,1000,0,2.14,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:18:08.859846,5374,zt_5374,zt_537401,201221,130090,1592547488859846,0,-1,2140.0,1,0.0,0,SSE,0.0,2205292
847402,88164,1592547517266971,1592547515260856,1600639,0,-1,-1,0,-1,400,0,15.35,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:18:37.266971,5374,zt_5374,zt_537401,201222,130232,1592547517266971,0,-1,6140.0,1,0.0,0,SSE,0.0,2006115
849903,88169,1592547544907324,1592547542747565,1600779,0,-1,-1,0,-1,400,0,56.0,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:19:04.907324,5374,zt_5374,zt_537401,201225,130597,1592547544907324,0,-1,22400.0,1,0.0,0,SSE,0.0,2159759
850238,88172,1592547577178722,1592547576673082,1600811,0,-1,-1,0,-1,2960,0,4.3,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:19:37.178722,5374,zt_5374,zt_537401,201230,130652,1592547577178722,0,-1,12728.0,1,0.0,0,SSE,0.0,505640


In [32]:
orderLog[orderLog["order"] == 201252]

Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,tradeId,sdd,aaa,ApplSeqNum,clock,broker,colo_broker,colo_account,order,group,startClock,duration,orderDirection1,orderNtl,directNum,isMsg,status,exchange,tradeNtl,internal_latency
854637,88208,1592547867126198,0,1601155,0,-1,-1,0,-1,100,0,32.4,-1.0,20200619,537401,0,zt_52_03,,,-1,-1.0,0,2020-06-19 14:24:27.126198,5374,zt_5374,zt_537401,201252,131299,1592547867126198,0,-1,3240.0,1,0.0,0,SSE,0.0,1592547867126198
854638,88209,1592547867340731,0,1601155,2,-1,-1,0,-1,100,0,32.4,-1.0,20200619,537401,0,zt_52_03,148339.0,,-1,-1.0,0,2020-06-19 14:24:27.340731,5374,zt_5374,zt_537401,201252,131299,1592547867126198,214533,-1,3240.0,1,0.0,0,SSE,0.0,1592547867340731
854639,88210,1592547867450979,0,1601155,4,-1,-1,100,-1,100,100,32.4,32.44,20200619,537401,0,zt_52_03,,15312300.0,-1,-1.0,0,2020-06-19 14:24:27.450979,5374,zt_5374,zt_537401,201252,131299,1592547867126198,324781,-1,3240.0,1,0.0,0,SSE,3244.0,1592547867450979


In [36]:
checkLog = orderLog[orderLog["updateType"] == 0]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog["strategy"] = np.where(checkLog["ars"].isin([121, 221, 321, 131, 231, 331]), "statwo", "staone")
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"
c1 = SZE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["internal_latency"].count().reset_index()
c4 = SZE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["internal_latency"].std().reset_index()

re1 = pd.merge(c3, c1, on=["exchange", "colo", "accCode", "strategy", "isMsg"])
re1 = re1.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re1 = pd.merge(re1, c2, on=["exchange", "colo", "accCode", "strategy", "isMsg"])
re1 = re1.rename(columns = {'internal_latency': 'median'})
re1 = pd.merge(re1, c4, on=["exchange", "colo", "accCode", "strategy", "isMsg"])
re1 = pd.merge(re1, c5, on=["exchange", "colo", "accCode", "strategy", "isMsg"])
re1 = re1.rename(columns = {'internal_latency': 'std'})
re1 = re1[re1["isMsg"] == 1]
re1

c1 = SSE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["internal_latency"].mean().reset_index()
c2 = SSE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["internal_latency"].mean().reset_index()
c3 = SSE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["internal_latency"].count().reset_index()
c4 = SSE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["date"].unique().str.len().reset_index()
c5 = SSE.groupby(["exchange", "colo", "accCode", "strategy", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo", "accCode", "strategy", "isMsg"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=["exchange", "colo", "accCode", "strategy", "isMsg"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=["exchange", "colo", "accCode", "strategy", "isMsg"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=["exchange", "colo", "accCode", "strategy", "isMsg"])
re2 = pd.merge(re2, c5, on=["exchange", "colo", "accCode", "strategy", "isMsg"])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2




# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = pd.concat([re1, re2]).reset_index(drop=True)

for col in ['isMsg','median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
re = re.rename(columns={"colo": "server", "accCode": "account"}) 

from IPython.display import display, HTML
# HTML(re.groupby(["exchange", "colo_account", "isMsg"]).first().to_html())

display(HTML(re[re["exchange"] == "SH"].groupby(["exchange", "server", "account", "strategy", "isMsg"])["count", "median", "95 percentile", "std"].first().to_html()))
display(HTML(re[re["exchange"] == "SZ"].groupby(["exchange", "server", "account", "strategy", "isMsg"])["count", "median", "95 percentile", "std"].first().to_html()))
# display(HTML(re.groupby(["exchange", "server", "account", "strategy", "isMsg"])["count", "median", "95 percentile", "std"].first().to_html()))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count,median,95 percentile,std
exchange,server,account,strategy,isMsg,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SH,zs_52_08,5386,staone,0,21,44,77,14.39
SH,zs_52_08,5386,staone,1,8,23,43,10.96
SH,zs_52_08,5386,statwo,0,238,77,144,7.85
SH,zs_52_08,5386,statwo,1,119,56,129,11.0
SH,zs_88_04,8967,staone,0,151,38,627,1805.07
SH,zs_88_04,8967,staone,1,62,23,33,12.51
SH,zs_88_04,8967,statwo,0,1329,64,126,7.32
SH,zs_88_04,8967,statwo,1,696,55,122,8.98
SH,zs_94_04,6237,staone,0,3788,29,41,2.69
SH,zs_94_04,6237,staone,1,1151,14,25,3.82


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,count,median,95 percentile,std
exchange,server,account,strategy,isMsg,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
SZ,zs_52_06,5222,staone,1,1978,27,52,14.14
SZ,zs_52_06,5222,statwo,1,18276,51,90,6.78
SZ,zs_52_06,5269,staone,1,756,27,53,11.34
SZ,zs_52_06,5269,statwo,1,7157,51,90,5.86
SZ,zs_52_06,5273,staone,1,7057,27,68,15.4
SZ,zs_52_06,5273,statwo,1,39224,50,85,2.9
SZ,zs_52_06,5275,staone,1,4187,28,66,11.96
SZ,zs_52_06,5275,statwo,1,25032,50,82,3.05
SZ,zs_52_06,5287,staone,1,1381,28,46,3.92
SZ,zs_52_06,5287,statwo,1,23711,50,81,2.78


In [51]:
checkLog.head()

Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,internalId,tradeId,sdd,aaa,ApplSeqNum,mrm,mta,mrsb,mrss,mrv,mrb100,mra100,l4tr,clock,broker,colo_broker,order,group,startClock,duration,orderDirection1,directNum,isMsg,status,orderNtl,exchange,tradeNtl,sta,internal_latency,strategy
0,0,1600066546254426,1600066546253763,1600004,0,28490694,131.0,0,-1,1400,0,14.29,-1.0,20200914,966701,13,zt_96_09,,609.0,,145546000.0,0.000569,0.0,-0.003703,-0.003703,-0.001268,0.000569,28490694.0,1429.0,1430.0,0.0,2020-09-14 14:55:46.254426,96,zt_96,67222,0,1600066546254426,0,-1,1,0.0,0,20006.0,SSE,0.0,statwo,663,statwo
3,3,1600047111746364,1600047111746279,1600006,0,1357300,11.0,0,2,21200,0,4.84,-1.0,20200914,8971,100,zt_88_03,,81.0,,93059000.0,0.001223,99404.0,0.004545,0.004545,0.001223,-0.003151,1357300.0,483.0,484.0,0.0,2020-09-14 09:31:51.746364,89,zt_89,35482,1,1600047111746364,0,1,1,1.0,2,102608.0,SSE,0.0,staone,85,staone
7,7,1600047110338240,1600047110338219,1600006,0,1357300,11.0,0,1,84500,0,4.84,-1.0,20200914,527301,100,zt_52_04,,108.0,,93059000.0,0.001349,99404.0,0.004545,0.004545,0.001349,-0.003284,1357300.0,483.0,484.0,0.0,2020-09-14 09:31:50.338240,52,zt_52,56258,1,1600047110338240,0,1,1,1.0,1,408980.0,SSE,0.0,staone,21,staone
21,21,1600047110354085,1600047110354044,1600006,0,1373300,11.0,0,1,30200,0,4.84,-1.0,20200914,522201,100,zt_52_04,,71.0,,93059000.0,0.001441,99426.0,0.004545,0.004545,0.001441,-0.003367,1373300.0,483.0,484.0,0.0,2020-09-14 09:31:50.354085,52,zt_52,52757,2,1600047110354085,0,1,1,1.0,2,146168.0,SSE,0.0,staone,41,staone
25,25,1600051849494369,1600051849487140,1600006,0,21217536,11.0,0,1,800,0,4.91,-1.0,20200914,9441,4,zt_94_02,,664.0,,105213000.0,0.000202,0.0,0.003101,0.003101,0.000202,-0.002226,21217536.0,490.0,491.0,0.0,2020-09-14 10:50:49.494369,94,zt_94,37965,3,1600051849494369,0,1,1,0.0,0,3928.0,SSE,0.0,staone,7229,staone


In [57]:
checkLog['colo_account'] = checkLog['colo'].str[:2] + '_' + checkLog['accCode'].astype(str)
checkLog[(checkLog['colo_account'] == 'zt_8943') & (checkLog['sta'] == 'statwo')
        & (checkLog['isMsg'] == 1)].groupby('date')['internal_latency'].quantile(.95)

date
20200914     291.50
20200915     241.65
20200916     221.80
20200917     330.00
20200918    1101.60
20200921    1182.45
20200922     285.00
20200923     361.60
20200924     279.60
20200925     429.80
20200928     419.00
20200929     268.70
20200930     352.40
Name: internal_latency, dtype: float64

In [67]:
checkLog[(checkLog['colo_account'] == 'zt_8943') & (checkLog['sta'] == 'statwo')
        & (checkLog['isMsg'] == 1) & (checkLog['date'].isin([20200918, 20200921])) & (checkLog['internal_latency'] < 300)]['sdd'].describe([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]).astype(np.int64)

count          182
mean     117702170
std       19490942
min       95804000
10%       95914700
20%       95949600
30%      100241300
40%      105754400
50%      110046000
60%      130388400
70%      138781899
80%      140497000
90%      143133600
max      145623000
Name: sdd, dtype: int64

### Basic check

In [31]:
checkLog = orderLog[(orderLog["updateType"] == 0) & (~orderLog["ars"].isin([121, 221, 321, 131, 231, 331]))]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"
SZE = SZE[SZE["isMsg"] == 1]
df = pd.DataFrame()
df["exchange"] = ["SZE", "SSE", "SSE"]
df["isMsg"] = [1, 1, 0]
df["median"] = [SZE.groupby("date")["internal_latency"].median().mean(), SSE[SSE["isMsg"] == 1].groupby("date")["internal_latency"].median().mean(),
               SSE[SSE["isMsg"] == 0].groupby("date")["internal_latency"].median().mean()]
df["95p"] = [SZE.groupby("date")["internal_latency"].quantile(.95).mean(), SSE[SSE["isMsg"] == 1].groupby("date")["internal_latency"].quantile(.95).mean(), 
            SSE[SSE["isMsg"] == 0].groupby("date")["internal_latency"].quantile(.95).mean()]
# display(SZE.groupby("date")["internal_latency"].quantile(.95).mean())
# display(SZE.groupby("date")["internal_latency"].median().mean())

# display(SSE[SSE["isMsg"] == 1].groupby("date")["internal_latency"].quantile(.95).mean())
# display(SSE[SSE["isMsg"] == 1].groupby("date")["internal_latency"].median().mean())
# display(SSE[SSE["isMsg"] == 0].groupby("date")["internal_latency"].quantile(.95).mean())
# display(SSE[SSE["isMsg"] == 0].groupby("date")["internal_latency"].median().mean())
df["median"] = df["median"].astype("int")
df["95p"] = df["95p"].astype("int")
display(df)
savePath = r'L:\orderLog\result\internal latency'
df.to_csv(os.path.join(savePath, 'internal_latency_basic2_%s_%s.csv'%(startDate, endDate)), index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0,exchange,isMsg,median,95p
0,SZE,1,28,71
1,SSE,1,28,9114
2,SSE,0,46,6493


In [24]:
SSE[(SSE['accCode'] == 9441) & (SSE['date'] == 20200922) & (SSE['internal_latency'] > 10000)]['sdd'].describe([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]).astype('int64')

count          306
mean     132341702
std       14780588
min       95002000
10%      105232000
20%      110309000
30%      130404000
40%      135539000
50%      140041000
60%      140512000
70%      140954000
80%      143211000
90%      145219000
max      145650000
Name: sdd, dtype: int64

In [16]:
SSE[SSE['accCode'] == 9441].groupby('date')['internal_latency'].quantile(.95)

date
20200914    13875.00
20200915    36489.85
20200916    34129.75
20200917       47.00
20200918      178.45
20200921      169.00
20200922    42909.25
20200923    37536.20
20200924    37441.40
20200925    43204.30
20200928    35348.70
20200929    43615.25
20200930    37031.40
Name: internal_latency, dtype: float64

In [47]:
thisStartDate = '20200914'
thisEndDate = '20200930'

prevStartDate = '20200831'
prevEndDate = '20200911'

readPath = r'L:\orderLog\result\internal latency'
thisResult = pd.read_csv(os.path.join(readPath, 'internal_latency_basic1_%s_%s.csv'%(thisStartDate, thisEndDate)))
prevResult = pd.read_csv(os.path.join(readPath, 'internal_latency_basic1_%s_%s.csv'%(prevStartDate, prevEndDate)))

df = pd.merge(thisResult, prevResult, on=['exchange', 'isMsg'])
df = df.rename(columns={"median_x": "curMedian", "median_y": "prevMedian", "95p_x": "cur95p", "95p_y": "prev95p"})
df["medianDif"] = df["curMedian"] - df["prevMedian"]
df["95pDif"] = df["cur95p"] - df["prev95p"]
df = df[["exchange", "isMsg", "prevMedian", "curMedian", "medianDif", "prev95p", "cur95p", "95pDif"]]
df

Unnamed: 0,exchange,isMsg,prevMedian,curMedian,medianDif,prev95p,cur95p,95pDif
0,SZE,1,52,52,0,88,92,4
1,SSE,1,99,99,0,334,469,135
2,SSE,0,89,89,0,169,181,12


In [84]:
# check abnormal
test1 = checkLog[(checkLog["colo_account"] == "zs_9765") & (checkLog["isMsg"] == 0)]
test2 = checkLog[(checkLog["colo_account"] == "zs_9765") & (checkLog["isMsg"] == 1)]
display(test1["internal_latency"].describe([0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]).astype('int64'))
display(test2["internal_latency"].describe([0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95]).astype('int64'))
t = test2[test2["internal_latency"] >= 30]
t1 = t.groupby(['hour','minute'])["internal_latency"].count().reset_index()
t1 = t1.rename(columns = {'internal_latency': 'count'})
t2 = t.groupby(['hour','minute'])["internal_latency"].min().astype('int64').reset_index()
t2 = t2.rename(columns = {'internal_latency': 'min'})
t3 = t.groupby(['hour','minute'])["internal_latency"].mean().astype('int64').reset_index()
t3 = t3.rename(columns = {'internal_latency': 'mean'})
t4 = t.groupby(['hour','minute'])["internal_latency"].median().astype('int64').reset_index()
t4 = t4.rename(columns = {'internal_latency': 'median'})
t5 = t.groupby(['hour','minute'])["internal_latency"].max().astype('int64').reset_index()
t5 = t5.rename(columns = {'internal_latency': 'max'})
tt = pd.merge(pd.merge(pd.merge(pd.merge(t1, t2, on=['hour','minute']), t3, on=['hour','minute']), t4, on=['hour','minute']), t5, on=['hour','minute'])

HTML(tt.groupby(["hour", "minute"]).first().to_html())

count    56
mean     22
std       5
min      12
50%      21
55%      21
60%      23
65%      23
70%      23
75%      24
80%      25
85%      25
90%      27
95%      29
max      50
Name: internal_latency, dtype: int64

count    5097
mean       20
std        12
min         6
50%        20
55%        21
60%        21
65%        22
70%        23
75%        23
80%        24
85%        25
90%        27
95%        30
max       689
Name: internal_latency, dtype: int64

Unnamed: 0_level_0,Unnamed: 1_level_0,count,min,mean,median,max
hour,minute,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
9,29,3,75,318,192,689
9,30,39,31,55,47,180
9,31,10,30,39,36,59
9,38,62,30,38,32,85
9,39,37,30,33,32,64
9,40,6,31,36,34,44
9,57,24,30,33,33,41
9,58,18,30,39,34,113
9,59,2,37,61,61,86
10,51,3,30,30,30,31


### Compare with last result

In [43]:
thisStartDate = '20200914'
thisEndDate = '20200930'

prevStartDate = '20200831'
prevEndDate = '20200911'

readPath = r'L:\orderLog\result\internal latency'
thisResult = pd.read_csv(os.path.join(readPath, 'internal_latency_%s_%s.csv'%(thisStartDate, thisEndDate)))
thisResult = thisResult.rename(columns={'95 percentile': 'cur95p',
                                        'median': 'curMedian',
                                        'std': 'cur95pSTD'})


prevResult = pd.read_csv(os.path.join(readPath, 'internal_latency_%s_%s.csv'%(prevStartDate, prevEndDate)))
prevResult = prevResult.rename(columns={'95 percentile': 'prev95p',
                                        'median': 'prevMedian',
                                        'std': 'prev95pSTD'})

checkResult = pd.merge(thisResult, prevResult, how='left', on=['exchange','colo_account','strategy','isMsg'], validate='one_to_one')

checkResult['95pDif'] = checkResult['cur95p'] - checkResult['prev95p']
checkResult['medianDif'] = checkResult['curMedian'] - checkResult['prevMedian']
checkResult['95pSTDDif'] = checkResult['cur95pSTD'] - checkResult['prev95pSTD']

for col in ['prevMedian', 'curMedian', 'medianDif', 'prev95p', 'cur95p', '95pDif']:
    checkResult[col] = checkResult[col].fillna(0)
    checkResult[col] = checkResult[col].astype(int)
for col in ['prev95pSTD', 'cur95pSTD', '95pSTDDif']:
    checkResult[col] = checkResult[col].fillna(0)
    checkResult[col] = checkResult[col].astype(float)
    checkResult[col] = checkResult[col].apply(lambda x: '%.2f'%(x))
    
display(HTML(checkResult[(abs(checkResult['medianDif']) > 10) & (checkResult['cur95p'] < 500) & (checkResult['prev95p'] < 500) & (checkResult["count_x"] > 100) & (checkResult["count_y"] > 100)].loc[:,["exchange", "colo_account", "strategy", "isMsg", "count_y", "count_x", "prevMedian", "curMedian", "medianDif", 
                                                           "prev95p", "cur95p", "95pDif", "prev95pSTD", "cur95pSTD", "95pSTDDif"]].groupby(['exchange', 'colo_account', 'strategy', 'isMsg'])["prevMedian", "curMedian", "medianDif", 
                                                           "prev95p", "cur95p", "95pDif", "prev95pSTD", "cur95pSTD", "95pSTDDif"].first().to_html()))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,prevMedian,curMedian,medianDif,prev95p,cur95p,95pDif,prev95pSTD,cur95pSTD,95pSTDDif
exchange,colo_account,strategy,isMsg,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
SH,zs_9765,statwo,0.0,100,114,14,175,350,175,13.47,360.61,347.14
SH,zt_529001,statwo,0.0,89,78,-11,157,147,-9,15.91,6.78,-9.13
SH,zt_529001,statwo,1.0,94,78,-16,181,151,-29,34.78,13.16,-21.62
SH,zt_7079,staone,0.0,39,67,28,74,163,88,22.47,79.32,56.85
SH,zt_7079,staone,1.0,27,57,29,58,161,102,56.98,172.9,115.92
SH,zt_8943,statwo,1.0,113,157,44,235,443,208,23.91,316.67,292.76
SH,zt_8971,staone,1.0,59,136,77,233,378,144,98.99,279.68,180.69
SH,zt_9551,staone,0.0,55,38,-17,183,63,-119,259.38,28.09,-231.29
SH,zt_9551,staone,1.0,56,30,-25,195,84,-111,258.5,120.46,-138.04
SH,zt_965501,staone,1.0,36,58,21,166,237,70,67.21,122.61,55.41


In [51]:
orderLog['internal_latency'] = orderLog["clockAtArrival"] - orderLog["caamd"]
orderLog[(orderLog['colo_account'] == 'zt_8971') & (orderLog['updateType'] == 0) & (orderLog['sta'] == 'statwo') & (orderLog['isMsg'] == 1)].groupby(['date'])["internal_latency"].quantile(.95).reset_index()

Unnamed: 0,date,internal_latency
0,20200720,282.0
1,20200721,240.55
2,20200722,369.55
3,20200723,363.35
4,20200724,401.0
5,20200727,309.0
6,20200728,299.2
7,20200729,382.0
8,20200730,423.7
9,20200731,719.1


In [75]:
HTML(checkResult[checkResult['colo_account'].isin(['zt_527501', 'zt_527601', 'zt_537403', 'zt_527701', 'zt_537401', 'zt_527103', 'zt_528101'])].groupby(["exchange", "strategy", "isMsg", "colo_account"])[['count_x', 'count_y', "prevMedian", "curMedian", "medianDif", 
                                                           "prev95p", "cur95p", "95pDif", "prev95pSTD", "cur95pSTD", "95pSTDDif"]].first().to_html())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count_x,count_y,prevMedian,curMedian,medianDif,prev95p,cur95p,95pDif,prev95pSTD,cur95pSTD,95pSTDDif
exchange,strategy,isMsg,colo_account,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
SH,staone,0.0,zt_527103,495,3572.0,43,54,10,130,109,-20,32.84,18.25,-14.6
SH,staone,0.0,zt_527501,174,297.0,51,58,7,826,161,-665,2245.13,95.82,-2149.31
SH,staone,0.0,zt_527601,1135,3059.0,49,54,5,207,262,55,65.02,159.5,94.48
SH,staone,0.0,zt_527701,460,2785.0,46,58,11,215,127,-87,90.24,23.64,-66.6
SH,staone,0.0,zt_528101,1131,3080.0,41,51,10,166,113,-53,60.68,16.38,-44.3
SH,staone,0.0,zt_537401,277,168.0,47,55,8,351,97,-254,575.56,32.6,-542.96
SH,staone,0.0,zt_537403,1656,3183.0,46,51,4,119,98,-21,30.87,21.87,-9.0
SH,staone,1.0,zt_527103,161,824.0,15,49,34,77,108,31,40.17,28.1,-12.07
SH,staone,1.0,zt_527501,53,30.0,19,45,26,140,119,-20,401.21,128.63,-272.58
SH,staone,1.0,zt_527601,166,138.0,19,48,29,49,131,81,28.39,115.15,86.75


In [1]:
import csv
data = []
with open('F:\\data\\mdOrderLog_20200915_0843.csv') as f:
    reader = csv.reader(f)
    for row in reader:
        data.append(row)

In [3]:
import pandas as pd
data = pd.DataFrame(data[1:], columns=data[0]) 

In [30]:
import pandas as pd
data = pd.read_csv(r'F:\data\mdOrderLog_20200915_0844.csv')

In [31]:
pd.concat([data[(data['ApplSeqNum'] == 6105809) & (data['SecurityID'] == 300882)],
           data[(data['ApplSeqNum'] == 8505225) & (data['SecurityID'] == 300883)],
           data[(data['ApplSeqNum'] == 8603913) & (data['SecurityID'] == 300882)],
           data[(data['ApplSeqNum'] == 9492008) & (data['SecurityID'] == 300882)],
           data[(data['ApplSeqNum'] == 8568511) & (data['SecurityID'] == 300885)],
           data[(data['ApplSeqNum'] == 10194961) & (data['SecurityID'] == 300883)],
           data[(data['ApplSeqNum'] == 12879098) & (data['SecurityID'] == 300883)],
           data[(data['ApplSeqNum'] == 12921318) & (data['SecurityID'] == 300883)],
           data[(data['ApplSeqNum'] == 13724717) & (data['SecurityID'] == 300883)],
           data[(data['ApplSeqNum'] == 16132262) & (data['SecurityID'] == 300883)]])
           

Unnamed: 0,clockAtArrival,sequenceNo,exchId,securityType,__isRepeated,TransactTime,ChannelNo,ApplSeqNum,SecurityID,secid,mdSource,Side,OrderType,__origTickSeq,Price,OrderQty
13986492,1600134718108236,40348889,2,1,0,95105520,2011,6105809,300882,2300882,12,2,2,-1,705032704,2000
19718251,1600135791750913,58871598,2,1,0,100859150,2011,8505225,300883,2300883,12,2,2,-1,1410055408,200
19962199,1600135855265830,59734511,2,1,0,101002660,2011,8603913,300882,2300882,12,2,2,-1,-727379968,4400
22077528,1600136367112573,67093790,2,1,0,101834500,2011,9492008,300882,2300882,12,2,2,-1,1410055408,2500
21453034,1600136211946306,64943150,2,1,0,101559340,2013,8568511,300885,2300885,12,2,2,-1,305032704,100
23711590,1600136772113807,72796916,2,1,0,102519500,2011,10194961,300883,2300883,12,2,2,-1,298945408,100
29711877,1600138684959035,95029925,2,1,0,105712330,2011,12879098,300883,2300883,12,2,2,-1,298945408,700
29804510,1600138721728899,95402817,2,1,0,105749100,2011,12921318,300883,2300883,12,2,2,-1,298945408,700
31627248,1600139490563076,102719528,2,1,0,111037920,2011,13724717,300883,2300883,12,2,2,-1,1410055408,100
37020683,1600146905865074,128400825,2,1,0,131413150,2011,16132262,300883,2300883,12,2,2,-1,410065408,4900


In [20]:
data = data[((data['SecurityID'] > 300000) & (data['SecurityID'] < 310000)) | (data['SecurityID'] < 4000)]

In [21]:
data[((data['Price'] > 1000000000)) | ((data['Price'] == 298945408)) |
    ((data['Price'] == 705032704)) | ((data['Price'] == 410065408)) |
    ((data['Price'] < 0))]

Unnamed: 0,clockAtArrival,sequenceNo,exchId,securityType,__isRepeated,TransactTime,ChannelNo,ApplSeqNum,SecurityID,secid,mdSource,Side,OrderType,__origTickSeq,Price,OrderQty
13986492,1600134718108236,40348889,2,1,0,95105520,2011,6105809,300882,2300882,12,2,2,-1,705032704,2000
19718251,1600135791750913,58871598,2,1,0,100859150,2011,8505225,300883,2300883,12,2,2,-1,1410055408,200
19962199,1600135855265830,59734511,2,1,0,101002660,2011,8603913,300882,2300882,12,2,2,-1,-727379968,4400
22077528,1600136367112573,67093790,2,1,0,101834500,2011,9492008,300882,2300882,12,2,2,-1,1410055408,2500
23711590,1600136772113807,72796916,2,1,0,102519500,2011,10194961,300883,2300883,12,2,2,-1,298945408,100
29711877,1600138684959035,95029925,2,1,0,105712330,2011,12879098,300883,2300883,12,2,2,-1,298945408,700
29804510,1600138721728899,95402817,2,1,0,105749100,2011,12921318,300883,2300883,12,2,2,-1,298945408,700
31627248,1600139490563076,102719528,2,1,0,111037920,2011,13724717,300883,2300883,12,2,2,-1,1410055408,100
37020683,1600146905865074,128400825,2,1,0,131413150,2011,16132262,300883,2300883,12,2,2,-1,410065408,4900


In [6]:
data[(data['OfferApplSeqNum'] == 6105809) & (data['SecurityID'] == 300882)]

Unnamed: 0,clockAtArrival,sequenceNo,exchId,securityType,__isRepeated,TransactTime,ChannelNo,ApplSeqNum,SecurityID,secid,mdSource,Side,OrderType,__origTickSeq,Price,OrderQty
13986492,1600134718108236,40348889,2,1,0,95105520,2011,6105809,300882,2300882,12,2,2,-1,705032704,2000


In [7]:
data[data['Price'] < 0]

Unnamed: 0,clockAtArrival,sequenceNo,exchId,securityType,__isRepeated,TransactTime,ChannelNo,ApplSeqNum,SecurityID,secid,mdSource,Side,OrderType,__origTickSeq,Price,OrderQty
13114545,1600134665533629,32007191,2,1,0,95105520,2011,6105809,300882,2300882,13,2,2,-1,-2147483648,2000
18152464,1600135739157714,46848643,2,1,0,100859150,2011,8505225,300883,2300883,13,2,2,-1,-2147483648,200
18367859,1600135802671419,47523389,2,1,0,101002660,2011,8603913,300882,2300882,13,2,2,-1,-2147483648,4400
20270147,1600136314511260,53307284,2,1,0,101834500,2011,9492008,300882,2300882,13,2,2,-1,-2147483648,2500
21731582,1600136719503706,57785078,2,1,0,102519500,2011,10194961,300883,2300883,13,2,2,-1,-2147483648,100
27126331,1600138632315444,74828107,2,1,0,105712330,2011,12879098,300883,2300883,13,2,2,-1,-2147483648,700
27209418,1600138669088594,75106461,2,1,0,105749100,2011,12921318,300883,2300883,13,2,2,-1,-2147483648,700
28853067,1600139437907025,80519588,2,1,0,111037920,2011,13724717,300883,2300883,13,2,2,-1,-2147483648,100
33673905,1600146853173991,97029334,2,1,0,131413150,2011,16132262,300883,2300883,13,2,2,-1,-2147483648,4900


In [82]:
HTML(pd.concat([checkResult[(abs(checkResult['medianDif']) >= 10) & (checkResult["count_x"] > 100) & (checkResult["count_y"] > 100)
                &  (checkResult['strategy'] == 'staone') & (checkResult['exchange'] == 'SH')], checkResult[(abs(checkResult['medianDif']) >= 20) & (checkResult["count_x"] > 100) & (checkResult["count_y"] > 100)
                & (checkResult['strategy'] == 'staone') & (checkResult['exchange'] == 'SZ') & (checkResult['isMsg'] == 1)]]).loc[:,["exchange", "colo_account", "strategy", "isMsg", "count_y", "count_x", "prevMedian", "curMedian", "medianDif", 
                                                           "prev95p", "cur95p", "95pDif", "prev95pSTD", "cur95pSTD", "95pSTDDif"]].reset_index(drop=True).groupby(["exchange", "strategy", "isMsg", "colo_account"])[['count_x', 'count_y', "prevMedian", "curMedian", "medianDif", 
                                                           "prev95p", "cur95p", "95pDif", "prev95pSTD", "cur95pSTD", "95pSTDDif"]].first().to_html())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count_x,count_y,prevMedian,curMedian,medianDif,prev95p,cur95p,95pDif,prev95pSTD,cur95pSTD,95pSTDDif
exchange,strategy,isMsg,colo_account,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
SH,staone,0.0,zs_975602,6765,6077.0,77,93,15,159,1206,1046,5.84,2783.69,2777.85
SH,staone,0.0,zs_9765,236,207.0,51,76,24,157,1590,1432,53.72,3557.85,3504.13
SH,staone,0.0,zt_7079,753,465.0,39,67,28,74,163,88,22.47,79.32,56.85
SH,staone,0.0,zt_9441,3567,1444.0,836,11360,10523,11503,27782,16279,4115.66,17440.52,13324.86
SH,staone,0.0,zt_9551,9904,6771.0,55,38,-17,183,63,-119,259.38,28.09,-231.29
SH,staone,0.0,zt_9561,2678,2439.0,59,38,-21,1775,1267,-507,4761.39,4272.98,-488.41
SH,staone,0.0,zt_965801,244,234.0,44,62,18,297,1393,1096,378.56,2745.86,2367.3
SH,staone,0.0,zt_966501,314,151.0,46,62,16,333,562,228,373.99,413.28,39.3
SH,staone,0.0,zt_966701,219,153.0,48,60,12,350,2893,2543,300.5,8641.93,8341.43
SH,staone,1.0,zs_975602,1042,1007.0,82,103,20,185,775,590,23.49,1418.89,1395.4


In [92]:
checkLog[(checkLog['colo'].isin(['zs_96_06', 'zt_96_09','zt_88_03'])) & (checkLog['sta'] == 'statwo')].groupby('date')['internal_latency'].quantile(.95)

date
20200914     247.0
20200915     237.0
20200916     338.2
20200917     454.3
20200918    1053.0
20200921    1063.0
20200922     263.0
20200923     286.1
20200924     300.0
20200925     376.0
20200928     337.0
20200929     314.9
20200930     388.9
Name: internal_latency, dtype: float64

In [97]:
checkLog[(checkLog['colo_account'] == 'zt_9441')].groupby('date')['internal_latency'].quantile(.95)

date
20200914    13875.00
20200915    36489.85
20200916    34129.75
20200917       47.00
20200918      178.45
20200921      169.00
20200922    42909.25
20200923    37536.20
20200924    37441.40
20200925    43204.30
20200928    35348.70
20200929    43615.25
20200930    37031.40
Name: internal_latency, dtype: float64

In [71]:
checkResult[checkResult['colo_account'] == 'zt_9441']

Unnamed: 0,exchange,colo_account,strategy,isMsg,count_x,cur95p,curMedian,date_x,cur95pSTD,TradeByMsg_x,TradeBySs_x,TradeBySsPerc(%)_x,count_y,prev95p,prevMedian,date_y,prev95pSTD,TradeByMsg_y,TradeBySs_y,TradeBySsPerc(%)_y,95pDif,medianDif,95pSTDDif
228,SH,zt_9441,staone,0.0,3567,27782,11360,13,17440.52,903.0,3567.0,1.0,1444.0,11503,836,10.0,4115.66,428.0,1444.0,1.0,16279,10523,13324.86
229,SH,zt_9441,staone,1.0,903,31481,12561,13,27886.34,903.0,3567.0,3.950166,428.0,12442,1773,10.0,4502.73,428.0,1444.0,3.373832,19039,10787,23383.61


In [58]:
orderLog[(orderLog["exchange"] == "SSE") & (orderLog["colo_broker"].isin(['zt_5275', 'zt_5271', 'zt_5273', 'zt_5276', 'zt_5269',
                                                                         'zt_5277', 'zt_5242'])) & (orderLog["updateType"] == 0)].groupby('colo_account')["order"].size()

colo_account
zt_524201      51
zt_526901    7615
zt_527101    2704
zt_527301    5213
zt_527501    3900
zt_527601    6227
zt_527701    6043
Name: order, dtype: int64

In [55]:
orderLog[(orderLog["exchange"] == "SSE")]["colo_broker"].unique()

array(['zt_89', 'zt_88', 'zt_5275', 'zt_95', 'zt_5274', 'zt_9667',
       'zt_94', 'zt_5271', 'zt_9663', 'zs_62', 'zt_5273', 'zt_5276',
       'zt_58', 'zt_5269', 'zt_9658', 'zt_9665', 'zt_92', 'zt_5277',
       'zs_97', 'zt_5242'], dtype=object)

In [31]:
checkResult[checkResult["colo_account"].str[:5] == "zt_96"].loc[:,["exchange", "colo_account", "strategy", "isMsg", "count_y", "count_x", "prevMedian", "curMedian", "medianDif", 
                                                           "prev95p", "cur95p", "95pDif", "prev95pSTD", "cur95pSTD", "95pSTDDif"]].reset_index(drop=True)

Unnamed: 0,exchange,colo_account,strategy,isMsg,count_y,count_x,prevMedian,curMedian,medianDif,prev95p,cur95p,95pDif,prev95pSTD,cur95pSTD,95pSTDDif
0,SH,zt_9658,staone,0.0,4875.0,2365,40,42,2,202,188,-14,74.53,79.77,5.24
1,SH,zt_9658,staone,1.0,1723.0,640,17,24,7,85,104,19,33.29,27.46,-5.83
2,SH,zt_9663,staone,0.0,1142.0,571,39,44,4,272,309,36,107.28,251.59,144.3
3,SH,zt_9663,staone,1.0,65.0,76,18,25,7,122,108,-14,58.48,56.83,-1.65
4,SH,zt_9665,staone,0.0,9370.0,4075,37,41,3,123,202,79,13.61,58.4,44.79
5,SH,zt_9665,staone,1.0,544.0,866,19,19,0,117,150,33,8.76,24.12,15.36
6,SH,zt_9666,staone,0.0,3528.0,2,35,72,37,110,106,-3,27.01,0.0,0.0
7,SH,zt_9667,staone,0.0,3726.0,2586,33,31,-2,82,71,-11,16.35,16.86,0.51
8,SH,zt_9667,staone,1.0,1081.0,742,15,13,-1,67,72,4,19.1,9.26,-9.84


### Compare two strategies

In [34]:
checkLog = orderLog[orderLog["updateType"] == 0]
checkLog['colo_account'] = checkLog['colo'].str[:2] + '_' + checkLog['accCode'].astype(str)
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog1 = checkLog[checkLog["ars"].isin([121, 221, 321, 131, 231, 331])]

display(checkLog1.shape[0])

SZE = checkLog1[checkLog1['secid'] >= 2000000]
SSE = checkLog1[checkLog1['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"

c1 = SZE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].count().reset_index()
c4 = SZE.groupby(["exchange", "colo_account", "isMsg"])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].std().reset_index()

re1 = pd.merge(c3, c1, on=["exchange", "colo_account", "isMsg"])
re1 = re1.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re1 = pd.merge(re1, c2, on=["exchange", "colo_account", "isMsg"])
re1 = re1.rename(columns = {'internal_latency': 'median'})
re1 = pd.merge(re1, c4, on=["exchange", "colo_account", "isMsg"])
re1 = pd.merge(re1, c5, on=["exchange", "colo_account", "isMsg"])
re1 = re1.rename(columns = {'internal_latency': 'std'})
re1 = pd.merge(re1, re1[re1["isMsg"] == 1].loc[:, ["exchange", "colo_account", "count"]], on=["exchange", "colo_account"], how="outer")
re1 = pd.merge(re1, re1[re1["isMsg"] == 0].loc[:, ["exchange", "colo_account", "count_x"]], on=["exchange", "colo_account"], how="outer")
re1 = re1.rename(columns = {'count_x_x': 'count', 'count_y': 'TradeByMsg','count_x_y': 'TradeBySs'})
re1["TradeBySsPerc(%)"] = re1["TradeBySs"]/re1["count"]
# re1 = re1[re1["count"] >= 150]
re1 = re1[re1["isMsg"] == 1]
re1

c1 = SSE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].mean().reset_index()
c2 = SSE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].mean().reset_index()
c3 = SSE.groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].count().reset_index()
c4 = SSE.groupby(["exchange", "colo_account", "isMsg"])["date"].unique().str.len().reset_index()
c5 = SSE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=["exchange", "colo_account", "isMsg"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=["exchange", "colo_account", "isMsg"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=["exchange", "colo_account", "isMsg"])
re2 = pd.merge(re2, c5, on=["exchange", "colo_account", "isMsg"])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2 = pd.merge(re2, re2[re2["isMsg"] == 1].loc[:, ["exchange", "colo_account", "count"]], on=["exchange", "colo_account"], how="outer")
re2 = pd.merge(re2, re2[re2["isMsg"] == 0].loc[:, ["exchange", "colo_account", "count_x"]], on=["exchange", "colo_account"], how="outer")
re2 = re2.rename(columns = {'count_x_x': 'count', 'count_y': 'TradeByMsg','count_x_y': 'TradeBySs'})
re2["TradeBySsPerc(%)"] = re2["TradeBySs"]/re2["count"]
re2


re = pd.concat([re1, re2]).reset_index(drop=True).loc[:, ["exchange", "colo_account", "isMsg", "95 percentile", "median", "std"]]


checkLog2 = checkLog[~checkLog["ars"].isin([121, 221, 321, 131, 231, 331])]

SZE = checkLog2[checkLog2['secid'] >= 2000000]
SSE = checkLog2[checkLog2['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"

display(checkLog2.shape[0])

c1 = SZE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].count().reset_index()
c4 = SZE.groupby(["exchange", "colo_account", "isMsg"])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].std().reset_index()

re1 = pd.merge(c3, c1, on=["exchange", "colo_account", "isMsg"])
re1 = re1.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re1 = pd.merge(re1, c2, on=["exchange", "colo_account", "isMsg"])
re1 = re1.rename(columns = {'internal_latency': 'median'})
re1 = pd.merge(re1, c4, on=["exchange", "colo_account", "isMsg"])
re1 = pd.merge(re1, c5, on=["exchange", "colo_account", "isMsg"])
re1 = re1.rename(columns = {'internal_latency': 'std'})
re1 = pd.merge(re1, re1[re1["isMsg"] == 1].loc[:, ["exchange", "colo_account", "count"]], on=["exchange", "colo_account"], how="outer")
re1 = pd.merge(re1, re1[re1["isMsg"] == 0].loc[:, ["exchange", "colo_account", "count_x"]], on=["exchange", "colo_account"], how="outer")
re1 = re1.rename(columns = {'count_x_x': 'count', 'count_y': 'TradeByMsg','count_x_y': 'TradeBySs'})
re1["TradeBySsPerc(%)"] = re1["TradeBySs"]/re1["count"]
# re1 = re1[re1["count"] >= 150]
re1 = re1[re1["isMsg"] == 1]
re1

c1 = SSE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].mean().reset_index()
c2 = SSE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].median().reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].mean().reset_index()
c3 = SSE.groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].count().reset_index()
c4 = SSE.groupby(["exchange", "colo_account", "isMsg"])["date"].unique().str.len().reset_index()
c5 = SSE.groupby(["exchange", "colo_account", "isMsg", "date"])["internal_latency"].quantile(.95).reset_index().groupby(["exchange", "colo_account", "isMsg"])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=["exchange", "colo_account", "isMsg"])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=["exchange", "colo_account", "isMsg"])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=["exchange", "colo_account", "isMsg"])
re2 = pd.merge(re2, c5, on=["exchange", "colo_account", "isMsg"])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2 = pd.merge(re2, re2[re2["isMsg"] == 1].loc[:, ["exchange", "colo_account", "count"]], on=["exchange", "colo_account"], how="outer")
re2 = pd.merge(re2, re2[re2["isMsg"] == 0].loc[:, ["exchange", "colo_account", "count_x"]], on=["exchange", "colo_account"], how="outer")
re2 = re2.rename(columns = {'count_x_x': 'count', 'count_y': 'TradeByMsg','count_x_y': 'TradeBySs'})
re2["TradeBySsPerc(%)"] = re2["TradeBySs"]/re2["count"]
re2

ree = pd.concat([re1, re2]).reset_index(drop=True).loc[:, ["exchange", "colo_account", "isMsg", "95 percentile", "median", "std"]]

re = pd.merge(re, ree, on=["exchange", "colo_account", "isMsg"], how="inner")
re = re.rename(columns = {'95 percentile_x': '95 percentile_s2', 'median_x': 'median_s2','std_x': 'std_s2',
                         '95 percentile_y': '95 percentile_s1', 'median_y': 'median_s1','std_y': 'std_s1'})
for col in ['95 percentile_s2', 'median_s2', 'std_s2', '95 percentile_s1', 'median_s1', 'std_s1']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
re.loc[:, ["exchange", "colo_account", "isMsg", "95 percentile_s2", "95 percentile_s1", "median_s2", "median_s1"]]

savePath = r'F:\orderLog\result\internal latency'
re.loc[:, ["exchange", "colo_account", "isMsg", "95 percentile_s2", "95 percentile_s1", "median_s2", "median_s1"]].to_csv(os.path.join(savePath, 'internal_latency1_%s_%s.csv'%(startDate, endDate)), index=False)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


675979

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # Remove the CWD from sys.path while we load stuff.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  # This is added back by InteractiveShellApp.init_path()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_inde

235700

FileNotFoundError: [Errno 2] No such file or directory: 'F:\\orderLog\\result\\internal latency\\internal_latency1_20200914_20200930.csv'

In [20]:
thisStartDate = '20200309'
thisEndDate = '20200320'

prevStartDate = '20200224'
prevEndDate = '20200306'

readPath = r'F:\orderLog\result\internal latency'
thisResult = pd.read_csv(os.path.join(readPath, 'internal_latency1_%s_%s.csv'%(thisStartDate, thisEndDate)))
thisResult = thisResult[["exchange", "colo_account", "isMsg", "95 percentile_s2", "median_s2"]]
thisResult = thisResult.rename(columns={"95 percentile_s2": "cur95p", 'median_s2': 'curMedian'})
prevResult = pd.read_csv(os.path.join(readPath, 'internal_latency1_%s_%s.csv'%(prevStartDate, prevEndDate)))
prevResult = prevResult[["exchange", "colo_account", "isMsg", "95 percentile_s2", "median_s2"]]
prevResult = prevResult.rename(columns={"95 percentile_s2": "prev95p", 'median_s2': 'prevMedian'})

re = pd.merge(thisResult, prevResult, on=["exchange", "colo_account", "isMsg"])
re['95pDif'] = re['cur95p'] - re['prev95p']
re['medianDif'] = re['curMedian'] - re['prevMedian']


for col in ['prevMedian', 'curMedian', 'medianDif', 'prev95p', 'cur95p', '95pDif']:
    re[col] = re[col].fillna(0)
    re[col] = re[col].astype(int)
    
display(re[abs(re['95pDif']) > 20].loc[:,["exchange", "colo_account", "isMsg", "prevMedian", "curMedian", "medianDif", 
                                                           "prev95p", "cur95p", "95pDif"]].reset_index(drop=True))
display(re[abs(re['medianDif']) > 20].loc[:,["exchange", "colo_account", "isMsg", "prevMedian", "curMedian", "medianDif", 
                                                           "prev95p", "cur95p", "95pDif"]])

Unnamed: 0,exchange,colo_account,isMsg,prevMedian,curMedian,medianDif,prev95p,cur95p,95pDif
0,SH,zt_8854,0.0,121,122,1,360,275,-85
1,SH,zt_8854,1.0,188,179,-9,544,446,-98


Unnamed: 0,exchange,colo_account,isMsg,prevMedian,curMedian,medianDif,prev95p,cur95p,95pDif
