In [111]:
import os
import glob
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('max_rows', 100)
pd.set_option('max_columns', 100)

perc = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]

In [112]:
startDate = '20200907'
endDate = '20200911'


readPath = r'\\192.168.10.30\Kevin_zhenyu\orderLog\equityTradeLogs'
dataPathLs = np.array(glob.glob(os.path.join(readPath, 'speedCompare***.csv')))
dateLs = np.array([os.path.basename(i).split('_')[1].split('.')[0] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
rawOrderLog = []
for thisDate, thisPath in zip(dateLs, dataPathLs):
    data = pd.read_csv(thisPath)
    data = data.rename(columns={'mdClockAtArrival': 'caamd'})
    rawOrderLog += [data]
rawOrderLog = pd.concat(rawOrderLog, sort=False)

for col in ['clockAtArrival', 'caamd', 'secid', 'updateType', 'vai', 'absFilledThisUpdate', 'orderDirection', 'absOrderSize',
            'absOrderSizeCumFilled', 'date', 'accCode', 'mse']:
    rawOrderLog[col] = rawOrderLog[col].astype('int64')   
rawOrderLog = rawOrderLog.sort_values(by=['date', 'secid', 'vai', 'accCode', 'clockAtArrival']).reset_index(drop=True)

rawOrderLog = rawOrderLog[rawOrderLog["secid"] >= 1000000]

rawOrderLog['clock'] = rawOrderLog['clockAtArrival'].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
rawOrderLog['broker'] = rawOrderLog['accCode'] // 100
rawOrderLog["broker"] = np.where(rawOrderLog["accCode"].astype(str).apply(lambda x: len(x) == 6), rawOrderLog['accCode'] // 10000, rawOrderLog["broker"])
rawOrderLog['colo_broker'] = rawOrderLog['colo'].str[:2] + '_' + rawOrderLog['broker'].astype('str')
rawOrderLog['order'] = rawOrderLog.groupby(['date', 'accCode', 'secid', 'vai']).grouper.group_info[0]
rawOrderLog['group'] = rawOrderLog.groupby(['date', 'secid', 'vai']).grouper.group_info[0]
rawOrderLog['startClock'] = rawOrderLog.groupby(['order'])['clockAtArrival'].transform('first')
rawOrderLog['duration'] = rawOrderLog['clockAtArrival'] - rawOrderLog['startClock']
rawOrderLog['orderPrice'] = rawOrderLog['orderPrice'].apply(lambda x: round(x, 2))
rawOrderLog['tradePrice'] = rawOrderLog['tradePrice'].apply(lambda x: round(x, 2))
rawOrderLog['orderDirection1'] = np.where(rawOrderLog["orderDirection"] == -2, -1, np.where(
    rawOrderLog["orderDirection"] == 2, 1, rawOrderLog["orderDirection"]))
orderLog = rawOrderLog.copy()

### Assertion 1:  make sure same direction in same date, secid, vai
print('=======================================================================================')
print('1. same date, secid, vai: same direction')
orderLog['directNum'] = orderLog.groupby(['date', 'secid', 'vai'])['orderDirection1'].transform('nunique')
if len(orderLog[orderLog['directNum'] != 1]) > 0:
    print('opposite direction for same date, same secid, same vai')
    display(orderLog[(orderLog['directNum'] != 1) & (orderLog['updateType'] == 0)][['date', 'accCode', 'secid', 'vai', 'orderDirection', 'order']])
    orderLog = orderLog[orderLog['directNum'] == 1]

assert((orderLog.groupby(['date', 'secid', 'vai'])['orderDirection1'].nunique() == 1).all() == True)

## Assertion 2:  make sure each account, secid, vai only has one insertion
print('=======================================================================================')
print('2. same date, secid, vai, accCode: one insertion')
a = orderLog[orderLog['updateType'] == 0].groupby(['date', 'accCode', 'secid', 'vai', 'order'])['clockAtArrival'].count()
if len(a[a > 1]) > 0:
    print('more than one insertion at same time')
    a = a[a>1].reset_index()
    display(a)
    orderLog = orderLog[~(orderLog['order'].isin(a['order'].unique()))]

orderLog['isMsg'] = np.where(orderLog['updateType'] == 0, 
                             np.where(orderLog['mse'] == 100, 1, 0), np.nan)
orderLog['isMsg'] = orderLog.groupby(['order'])['isMsg'].ffill()

placeSZE = orderLog[(orderLog['secid'] >= 2000000) & (orderLog['updateType'] == 0)]
print('%.2f%% SZE orders triggered by msg data'%(placeSZE[placeSZE['isMsg'] == 1].shape[0]/placeSZE.shape[0]*100))


### Assertion 3:  check IPO stocks selling status
print('=======================================================================================')
print('3. IPO stocks selling (ars = 301, 302)')
if orderLog[orderLog['ars'].isin([301, 302])].shape[0] != 0:
    kk = orderLog[orderLog['ars'].isin([301, 302])]
    print(kk)
    try:
        assert(kk[kk['orderDirection1'] == 1].shape[0] == 0)
        print('we only sell, never buy')
    except:
        print('There are IPO buy side orders!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print(kk[kk['orderDirection1'] == 1])
    kk1 = kk[kk['updateType'] == 0]
    kk1 = kk1.sort_values(by=['accCode', 'secid','clockAtArrival'])
    kk1['diff'] = kk1.groupby(['accCode', 'secid'])['clockAtArrival'].apply(lambda x: x-x.shift(1))
    kk1['diff'] = kk1['diff'].fillna(0)
    try:
        assert(kk1[kk1['diff'] < 10e6].shape[0] == 0)
        print('for each stock in the same account, there is no insertion within 10 seconds of the previous insertion')
    except:
        print('There are insertion within 10 seconds for orders under same account same stock!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print(kk1[kk1['diff'] < 10e6])
    kk2 = kk[(kk['updateType'] == 1)]
    try:
        assert(kk2[kk2['duration'] < 3e6].shape[0] == 0)
        print('for each stock in the same account, the cancellation of an order happens more than 3 seconds after the insertion')
    except:
        print('There are cancellation within 3 seconds for orders under same account same stock!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print(kk2[kk2['duration'] < 3e6])


### Assertion 4: check updateType == 7 orders, make sure updateType == 7 orders < 20 per account, < 100 in total
print('=======================================================================================')
print('4. updateType 7 orders')
if orderLog[orderLog['updateType'] == 7].shape[0] != 0:
    assert(orderLog[orderLog['updateType'] == 7].groupby('accCode')['order'].nunique().max() < 20)
    assert(orderLog[orderLog['updateType'] == 7].groupby('accCode')['order'].nunique().sum() < 100)

### Assertion 5: check updateType == 6 orders, make sure updateType == 6 orders < 5% per account
print('=======================================================================================')
print('5. updateType 6 orders')
k1 = orderLog[orderLog['updateType'] == 6].groupby('accCode')['order'].nunique().reset_index()
k2 = orderLog.groupby('accCode')['order'].nunique().reset_index()
k = pd.merge(k1, k2, on='accCode', how='left')
k['prob'] = k['order_x']/k['order_y']
try:
    assert(sum(k['prob'] >= 0.05) == 0)
except:
    print('There are accounts with more than 5% updateType 6 orders!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
    print(k[k['prob'] >= 0.05])

### Assertion 6: check CYB orders, make sure CYB stocks total absOrderSize < 30w
print('=======================================================================================')
print('6. CYB stocks total order size < 30w')
try:
    assert(orderLog[(orderLog['secid'] >= 2300000) & (orderLog['updateType'] == 0)]['absOrderSize'].max() <= 300000)
except:
    print('CYB stocks total absOrderSize >= 30w!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
    
     
### Assertion 7:  make sure there is no unexpected updateType 
print('=======================================================================================')
print('7. unexpected updateType')
def getTuple(x):
    return tuple(i for i in x)

checkLog = orderLog[~((orderLog['updateType'] == 4) & (orderLog.groupby(['order'])['updateType'].shift(-1) == 4))]
checkLog = checkLog.groupby(['order'])['updateType'].apply(lambda x: getTuple(x)).reset_index()
checkLog['status'] = np.where(checkLog['updateType'].isin([(0, 2, 4), (0, 2, 1, 4), (0, 2, 1, 2, 4), (0, 2, 4, 1, 4), (0, 4), (0, 1, 4), (0, 4, 1, 4), (0, 2, 2, 4), (0, 4, 2, 4), (0, 2, 2, 1, 4), (0, 2, 2, 4, 1, 4)]),0,
                     np.where(checkLog['updateType'].isin([(0, 2, 4, 1, 3), (0, 2, 4, 1, 4, 3), (0, 2, 1, 4, 3), (0, 4, 1, 3), (0, 1, 4, 3),
                                                               (0, 2, 2, 4, 1, 3), (0, 2, 2, 4, 1, 4, 3), (0, 2, 2, 1, 4, 3), (0, 4, 2, 4, 1, 3),
                                                               (0, 4, 2, 1, 3), (0, 4, 1, 4, 3), (0, 4, 1)]), 1,
                     np.where(checkLog['updateType'].isin([(0, 2, 1, 3), (0, 2, 2, 1, 3), (0, 2, 3), (0, 3), (0, 1, 3), (0, ), (0, 2), (0, 2, 1), (0, 2, 2)]), 2, 3)))

orderLog = pd.merge(orderLog, checkLog[['order', 'status']], how='left', on=['order'], validate='many_to_one')
orderLog = orderLog[orderLog['status'].isin([0, 1, 2])].reset_index(drop=True)

### Assertion 8:  make sure status==0 got all traded
print('=======================================================================================')
print('8. status == 0: all traded')
a = orderLog[orderLog['status'] == 0]
a = a.groupby(['order'])[['absOrderSizeCumFilled', 'absOrderSize']].max().reset_index()
a.columns = ['order', 'filled', 'total']
print('in total trade, any fill != total cases')
display(a[a['filled'] != a['total']])
if a[a['filled'] != a['total']].shape[0] > 0:
    removeOrderLs = a[a['filled'] != a['total']]['order'].unique()
    orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]
    
### Assertion 9:  make sure status==1 got partial traded
print('=======================================================================================')
print('9. status == 1: partial traded')
a = orderLog[orderLog['status'] == 1]
a = a.groupby(['order'])[['absOrderSizeCumFilled', 'absOrderSize']].max().reset_index()
a.columns = ['order', 'filled', 'total']
print('in partial trade, any fill >= total or fill is 0 cases for updateType 4')
display(a[(a['filled'] >= a['total']) | (a['filled'] == 0)])
if a[(a['filled'] >= a['total']) | (a['filled'] == 0)].shape[0] > 0:
    removeOrderLs = a[(a['filled'] >= a['total']) | (a['filled'] == 0)]['order'].unique()
    orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]
    
### Assertion 10: make sure no cancellation within 1 sec
print('=======================================================================================')
print('10. no cancellation within 1 sec')
a = orderLog[(orderLog['updateType'] == 1) & (orderLog['duration'] < 1e6)]
print('any cancellation within 1 sec')
display(a)
if a.shape[0] > 0:
    removeOrderLs = a['order'].unique()
    orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]


### Assertion 11: make sure no order has shares > 80w or notional > 800w
print('=======================================================================================')
print('11. Orders with size > 80w or notional > 800w')
orderLog['orderNtl'] = orderLog['absOrderSize'] * orderLog['orderPrice']
if orderLog[orderLog['absOrderSize'] > 800000].shape[0] > 0:
    print('some order quantity are > 80w')
    print(orderLog[orderLog['absOrderSize'] > 800000].groupby(['colo', 'accCode'])['order'].nunique())
    display(orderLog[orderLog['absOrderSize'] > 800000][['date', 'accCode', 'secid', 'vai', 'absOrderSize', 'orderPrice',
                                                         'orderNtl', 'orderDirection', 'clock', 'order']])
            
if orderLog[orderLog['orderNtl'] > 8000000].shape[0] > 0:
    print('some order ntl are > 800w')
    print(orderLog[orderLog['orderNtl'] > 8000000].groupby(['colo', 'accCode'])['order'].nunique())
    display(orderLog[orderLog['orderNtl'] > 8000000][['date', 'accCode', 'secid', 'vai', 'absOrderSize', 'orderPrice',
                                                      'orderNtl', 'orderDirection', 'clock', 'order', "updateType", 
                                                      "tradePrice", "absOrderSizeCumFilled", "absFilledThisUpdate"]])

removeOrderLs = list(set(orderLog[orderLog['absOrderSize'] > 800000]['order'].unique()) | set(orderLog[orderLog['orderNtl'] > 8000000]['order'].unique()))
orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]


orderLog = orderLog.sort_values(by=['date', 'secid', 'vai', 'accCode', 'clockAtArrival']).reset_index(drop=True)

orderLog['exchange'] = np.where(orderLog['secid'] >= 2000000, 'SZE', 'SSE')
orderLog['orderNtl'] = orderLog['orderPrice'] * orderLog['absOrderSize']
orderLog['tradeNtl'] = np.where(orderLog['updateType'] == 4, orderLog['tradePrice']*orderLog['absFilledThisUpdate'], 0)
orderLog["ars"] = orderLog.groupby(['order'])['ars'].transform('first')
orderLog['sta'] = np.where(orderLog['ars'].isin([121, 221, 321, 131, 231, 331]), 'statwo', 'staone')

1. same date, secid, vai: same direction
opposite direction for same date, same secid, same vai


Unnamed: 0,date,accCode,secid,vai,orderDirection,order
298962,20200907,8854,2300458,12845023,1,32247
298965,20200907,9685,2300458,12845023,-1,47954
463762,20200908,5474,2002189,3765492,-1,100388
463765,20200908,6480,2002189,3765492,-1,104253
463768,20200908,9741,2002189,3765492,1,120571
463772,20200908,9756,2002189,3765492,1,124084
898714,20200910,522201,1600316,13234588,-1,265619
898717,20200910,527301,1600316,13234588,-1,269010
898720,20200910,966301,1600316,13234588,1,282833
960825,20200910,9248,1603088,2758028,1,249773


2. same date, secid, vai, accCode: one insertion
more than one insertion at same time


Unnamed: 0,date,accCode,secid,vai,order,clockAtArrival
0,20200907,6683,2000710,424956,29051,2
1,20200907,6683,2300572,871098,30976,2
2,20200907,9741,1603286,859700,48897,2
3,20200907,9741,1603507,593112,49043,2
4,20200907,9741,2000635,3010600,49632,2
5,20200907,9741,2002293,2383650,49926,2
6,20200907,9741,2002820,1118660,50370,2
7,20200907,9741,2300112,14904758,50796,2
8,20200907,9741,2300391,8035250,51057,2
9,20200907,9741,2300757,748970,52150,2


98.11% SZE orders triggered by msg data
3. IPO stocks selling (ars = 301, 302)
4. updateType 7 orders
5. updateType 6 orders
There are accounts with more than 5% updateType 6 orders!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   accCode  order_x  order_y      prob
4     9551      392     5013  0.078197
6. CYB stocks total order size < 30w
7. unexpected updateType
8. status == 0: all traded
in total trade, any fill != total cases


Unnamed: 0,order,filled,total
150,184,2100,6600
922,1186,900,1000
21414,26170,600,700
21731,27139,100,117
22032,27880,500,2400
...,...,...,...
263210,347425,4200,4900
265259,350348,12000,13600
266234,351770,600,2800
267558,353655,200,700


9. status == 1: partial traded
in partial trade, any fill >= total or fill is 0 cases for updateType 4


Unnamed: 0,order,filled,total


10. no cancellation within 1 sec
any cancellation within 1 sec


Unnamed: 0.1,Unnamed: 0,clockAtArrival,caamd,secid,updateType,vai,ars,absFilledThisUpdate,orderDirection,absOrderSize,absOrderSizeCumFilled,orderPrice,tradePrice,date,accCode,mse,colo,orderSysId,internalId,tradeId,sdd,aaa,ApplSeqNum,mrm,mta,mrsb,mrss,mrv,mrb100,mra100,l4tr,clock,broker,colo_broker,order,group,startClock,duration,orderDirection1,directNum,isMsg,status


11. Orders with size > 80w or notional > 800w


In [115]:
# checkLog = orderLog[(orderLog["updateType"] == 0) & (((orderLog['date'] >= 20200910) & (orderLog['colo'].isin(['zt_52_04'])) & (orderLog['accCode'].isin([522201, 527301, 526901, 528401]))) | 
#                                                     ((orderLog['date'] >= 20200909) & (orderLog['colo'].isin(['zt_52_10'])) & (orderLog['accCode'].isin([528901, 529001, 529101]))) |
#                                                     ((orderLog['date'] >= 20200910) & (orderLog['colo'].isin(['zs_52_08'])) & (orderLog['accCode'].isin([5281, 5284, 5377, 5276]))) |
#                                                     ((orderLog['date'] >= 20200910) & (orderLog['colo'].isin(['zs_52_08'])) & (orderLog['accCode'].isin([5281, 5284, 5377, 5276]))) |
#                                                     ((orderLog['date'] >= 20200910) & (orderLog['colo'].isin(['zs_52_09'])) & (orderLog['accCode'].isin([5290, 5289, 5291]))) |
#                                                     ((orderLog['date'] >= 20200909) & (orderLog['colo'].isin(['zs_66_01'])) & (orderLog['accCode'].isin([6683, 6678]))) |
#                                                     ((orderLog['date'] >= 20200910) & (orderLog['colo'].isin(['zs_88_04'])) & (orderLog['accCode'].isin([8865, 896702, 8967]))) )]
# checkLog = orderLog[(orderLog["updateType"] == 0) & (((orderLog['date'] < 20200910) & (orderLog['colo'].isin(['zt_52_04']))) | 
#                                                     ((orderLog['date'] < 20200909) & (orderLog['colo'].isin(['zt_52_10']))) |
#                                                     ((orderLog['date'] < 20200910) & (orderLog['colo'].isin(['zs_52_08']))) |
#                                                     ((orderLog['date'] < 20200910) & (orderLog['colo'].isin(['zs_52_08']))) |
#                                                     ((orderLog['date'] < 20200910) & (orderLog['colo'].isin(['zs_52_09']))) |
#                                                     ((orderLog['date'] < 20200909) & (orderLog['colo'].isin(['zs_66_01']))) |
#                                                     ((orderLog['date'] < 20200910) & (orderLog['colo'].isin(['zs_88_04']))) )]
checkLog = orderLog[(orderLog["updateType"] == 0)]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
checkLog = checkLog[checkLog['sta'] == 'statwo']
SZE = checkLog[checkLog['secid'] >= 2000000]
SSE = checkLog[checkLog['secid'] < 2000000]
SZE["exchange"] = "SZ"
SSE["exchange"] = "SH"

c1 = SSE.groupby(['colo', 'exchange', 'colo_broker', "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'colo_broker', 'exchange'])["internal_latency"].mean().reset_index()
c2 = SSE.groupby(['colo', 'exchange', 'colo_broker', "date"])["internal_latency"].median().reset_index().groupby(['colo', 'colo_broker', 'exchange'])["internal_latency"].mean().reset_index()
c3 = SSE.groupby(['colo', 'colo_broker', 'exchange'])["internal_latency"].count().reset_index()
c4 = SSE.groupby(['colo', 'colo_broker', 'exchange'])["date"].unique().str.len().reset_index()
c5 = SSE.groupby(['colo', 'exchange', 'colo_broker',  "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'colo_broker', 'exchange'])["internal_latency"].std().reset_index()

re2 = pd.merge(c3, c1, on=['colo', 'colo_broker', 'exchange'])
re2 = re2.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re2 = pd.merge(re2, c2, on=['colo', 'colo_broker', 'exchange'])
re2 = re2.rename(columns = {'internal_latency': 'median'})
re2 = pd.merge(re2, c4, on=['colo', 'colo_broker',  'exchange'])
re2 = pd.merge(re2, c5, on=['colo', 'colo_broker', 'exchange'])
re2 = re2.rename(columns = {'internal_latency': 'std'})
re2

c1 = SZE.groupby(['colo', 'exchange', 'colo_broker', "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'colo_broker', 'exchange'])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(['colo', 'exchange', 'colo_broker', "date"])["internal_latency"].median().reset_index().groupby(['colo', 'colo_broker', 'exchange'])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(['colo', 'colo_broker', 'exchange'])["internal_latency"].count().reset_index()
c4 = SZE.groupby(['colo', 'colo_broker', 'exchange'])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(['colo', 'exchange',  'colo_broker', "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'colo_broker', 'exchange'])["internal_latency"].std().reset_index()

re1 = pd.merge(c3, c1, on=['colo', 'colo_broker', 'exchange'])
re1 = re1.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re1 = pd.merge(re1, c2, on=['colo', 'colo_broker', 'exchange'])
re1 = re1.rename(columns = {'internal_latency': 'median'})
re1 = pd.merge(re1, c4, on=['colo', 'colo_broker', 'exchange'])
re1 = pd.merge(re1, c5, on=['colo', 'colo_broker', 'exchange'])
re1 = re1.rename(columns = {'internal_latency': 'std'})
re1


# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = pd.concat([re1, re2]).reset_index(drop=True)


for col in ['median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
    
from IPython.display import display, HTML
add = checkLog.groupby(['exchange', 'colo_broker', 'colo'])['secid'].nunique().reset_index()
add = add.rename(columns={'secid':'# of stocks'})
add['exchange'] = np.where(add['exchange'] == 'SSE', 'SH', 'SZ')
re = pd.merge(re, add, on=['exchange', 'colo_broker', 'colo'])
display(HTML(re.groupby(['exchange', 'colo_broker', "colo"])["# of stocks", "count", "median", "95 percentile", "std"].first().to_html()))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,# of stocks,count,median,95 percentile,std
exchange,colo_broker,colo,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
SH,zs_62,zs_96_08,144,2161,106,191,10.86
SH,zs_97,zs_96_06,637,4740,98,181,11.22
SH,zs_97,zs_96_08,666,6452,109,204,10.72
SH,zt_52,zt_52_04,531,16824,79,157,10.1
SH,zt_52,zt_52_05,980,15284,96,164,4.71
SH,zt_52,zt_52_07,791,10427,83,150,3.9
SH,zt_52,zt_52_10,733,15650,78,153,19.89
SH,zt_53,zt_52_05,183,1942,97,166,5.16
SH,zt_53,zt_52_07,117,842,79,146,8.88
SH,zt_70,zt_70_01,158,827,65,149,11.88


In [27]:
add

Unnamed: 0,exchange,colo_broker,colo,# of stocks
0,SH,zs_62,zs_94_04,143
1,SH,zs_62,zs_96_08,144
2,SH,zs_97,zs_96_06,660
3,SH,zs_97,zs_96_08,704
4,SH,zt_52,zt_52_01,121
5,SH,zt_52,zt_52_04,540
6,SH,zt_52,zt_52_05,991
7,SH,zt_52,zt_52_07,804
8,SH,zt_52,zt_52_10,744
9,SH,zt_53,zt_52_05,184


In [46]:
add = orderLog.groupby(['exchange', 'colo_broker', 'colo'])['secid'].nunique().reset_index()
add = add.rename(columns={'secid':'# of stocks'})
add

Unnamed: 0,exchange,colo_broker,colo,# of stocks
0,SSE,zs_62,zs_94_04,143
1,SSE,zs_62,zs_96_08,144
2,SSE,zs_97,zs_96_06,660
3,SSE,zs_97,zs_96_08,704
4,SSE,zt_52,zt_52_01,121
5,SSE,zt_52,zt_52_04,540
6,SSE,zt_52,zt_52_05,991
7,SSE,zt_52,zt_52_07,804
8,SSE,zt_52,zt_52_10,744
9,SSE,zt_53,zt_52_05,184


In [12]:
orderLog[(orderLog['colo'] == 'zs_66_01')].groupby(['date', 'sta'])['accCode'].unique()

date      sta   
20200907  staone                [6683, 6678]
          statwo                [6683, 6678]
20200908  staone                [6683, 6678]
          statwo                [6683, 6678]
20200909  staone          [6683, 6678, 6623]
          statwo          [6683, 6623, 6678]
20200910  staone          [6683, 6678, 6623]
          statwo          [6683, 6623, 6678]
20200911  staone    [6683, 6678, 6627, 6623]
          statwo    [6623, 6683, 6627, 6678]
Name: accCode, dtype: object

In [67]:
re = re[['colo', 'exchange', '# of stocks', '95 percentile', 'median']]
re = re.rename(columns={"95 percentile":"prev_95p", "median":"prev_med", "# of stocks":"prev_numOfStocks"})
result1 = re
result1

Unnamed: 0,colo,exchange,prev_numOfStocks,prev_95p,prev_med
0,zs_52_08,SZ,322,88,52
1,zs_52_09,SZ,999,72,48
2,zs_66_01,SZ,602,69,48
3,zs_88_04,SZ,482,66,49
4,zt_52_04,SH,479,149,76
5,zt_52_10,SH,572,160,81


In [69]:
re = re[['colo', 'exchange', '# of stocks', '95 percentile', 'median']]
re = re.rename(columns={"95 percentile":"cur_95p", "median":"cur_med", "# of stocks":"cur_numOfStocks"})
result1 = pd.merge(re, result1, on=['colo', 'exchange'])
result1 = result1[['colo', 'exchange', 'prev_numOfStocks', 'cur_numOfStocks', 'prev_med', 'cur_med', 'prev_95p', 'cur_95p']]
result1

Unnamed: 0,colo,exchange,prev_numOfStocks,cur_numOfStocks,prev_med,cur_med,prev_95p,cur_95p
0,zs_52_08,SZ,322,302,52,56,88,94
1,zs_52_09,SZ,999,974,48,50,72,81
2,zs_66_01,SZ,602,661,48,65,69,103
3,zs_88_04,SZ,482,448,49,49,66,71
4,zt_52_04,SH,479,483,76,84,149,168
5,zt_52_10,SH,572,693,81,77,160,149


In [70]:
HTML(result1.groupby(['colo', 'exchange'])['prev_numOfStocks', 'cur_numOfStocks', 'prev_med', 'cur_med', 'prev_95p', 'cur_95p'].first().to_html())

Unnamed: 0_level_0,Unnamed: 1_level_0,prev_numOfStocks,cur_numOfStocks,prev_med,cur_med,prev_95p,cur_95p
colo,exchange,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
zs_52_08,SZ,322,302,52,56,88,94
zs_52_09,SZ,999,974,48,50,72,81
zs_66_01,SZ,602,661,48,65,69,103
zs_88_04,SZ,482,448,49,49,66,71
zt_52_04,SH,479,483,76,84,149,168
zt_52_10,SH,572,693,81,77,160,149


In [79]:
pd.set_option('max_rows', 400)
orderLog[(orderLog['updateType'] == 0) & (orderLog['sta'] == 'statwo') & (orderLog['colo'].isin(['zs_52_08', 'zs_52_09', 'zs_66_01', 'zs_88_04', 'zt_52_04', 'zt_52_10']))].groupby(['exchange','colo'])['secid'].nunique()

exchange  colo    
SSE       zt_52_04     531
          zt_52_10     733
SZE       zs_52_08     345
          zs_52_09    1138
          zs_66_01     760
          zs_88_04     563
Name: secid, dtype: int64

In [108]:
orderLog[(orderLog['updateType'] == 0)& (orderLog['colo'].isin(['zt_52_10']))].groupby(['date'])['accCode'].unique()

date
20200907                    [529001, 529101, 528901]
20200908                    [528901, 529001, 529101]
20200909            [528901, 522401, 529101, 529001]
20200910    [522401, 528901, 529101, 529001, 522501]
20200911    [528901, 529101, 529001, 522401, 522501]
Name: accCode, dtype: object

In [92]:
orderLog[(orderLog['updateType'] == 0) & (orderLog['sta'] == 'statwo') & (orderLog['colo'].isin(['zt_52_10'])) & (orderLog['date'] >= 20200909)].groupby(['date'])['secid'].nunique()

date
20200909    487
20200910    566
20200911    498
Name: secid, dtype: int64

In [103]:
checkLog[checkLog['colo'] == 'zt_52_10'].groupby(['date'])['internal_latency'].median().mean()

81.0