In [13]:
import os
import glob
import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('max_rows', 100)
pd.set_option('max_columns', 100)

perc = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]

In [14]:
startDate = '20201102'
endDate = '20201110'


readPath = r'\\192.168.10.34\random_backup\Kevin_zhenyu\orderLog\equityTradeLogs'
dataPathLs = np.array(glob.glob(os.path.join(readPath, 'speedCompare***.csv')))
dateLs = np.array([os.path.basename(i).split('_')[1].split('.')[0] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
rawOrderLog = []
for thisDate, thisPath in zip(dateLs, dataPathLs):
    data = pd.read_csv(thisPath)
    data = data.rename(columns={'mdClockAtArrival': 'caamd'})
    rawOrderLog += [data]
rawOrderLog = pd.concat(rawOrderLog, sort=False)

for col in ['clockAtArrival', 'caamd', 'secid', 'updateType', 'vai', 'absFilledThisUpdate', 'orderDirection', 'absOrderSize',
            'absOrderSizeCumFilled', 'date', 'accCode', 'mse']:
    rawOrderLog[col] = rawOrderLog[col].fillna(0)
    rawOrderLog[col] = rawOrderLog[col].astype('int64')   
rawOrderLog = rawOrderLog.sort_values(by=['date', 'secid', 'vai', 'accCode', 'clockAtArrival']).reset_index(drop=True)

rawOrderLog = rawOrderLog[rawOrderLog["secid"] >= 1000000]

rawOrderLog['clock'] = rawOrderLog['clockAtArrival'].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
rawOrderLog['broker'] = rawOrderLog['accCode'] // 100
rawOrderLog["broker"] = np.where(rawOrderLog["accCode"].astype(str).apply(lambda x: len(x) == 6), rawOrderLog['accCode'] // 10000, rawOrderLog["broker"])
rawOrderLog['colo_broker'] = rawOrderLog['colo'].str[:2] + '_' + rawOrderLog['broker'].astype('str')
rawOrderLog['order'] = rawOrderLog.groupby(['date', 'accCode', 'secid', 'vai']).grouper.group_info[0]
rawOrderLog['group'] = rawOrderLog.groupby(['date', 'secid', 'vai']).grouper.group_info[0]
rawOrderLog['startClock'] = rawOrderLog.groupby(['order'])['clockAtArrival'].transform('first')
rawOrderLog['duration'] = rawOrderLog['clockAtArrival'] - rawOrderLog['startClock']
rawOrderLog['orderPrice'] = rawOrderLog['orderPrice'].apply(lambda x: round(x, 2))
rawOrderLog['tradePrice'] = rawOrderLog['tradePrice'].apply(lambda x: round(x, 2))
rawOrderLog['orderDirection1'] = np.where(rawOrderLog["orderDirection"] == -2, -1, np.where(
    rawOrderLog["orderDirection"] == 2, 1, rawOrderLog["orderDirection"]))
orderLog = rawOrderLog.copy()

### Assertion 1:  make sure same direction in same date, secid, vai
print('=======================================================================================')
print('1. same date, secid, vai: same direction')
orderLog['directNum'] = orderLog.groupby(['date', 'secid', 'vai'])['orderDirection1'].transform('nunique')
if len(orderLog[orderLog['directNum'] != 1]) > 0:
    print('opposite direction for same date, same secid, same vai')
    display(orderLog[(orderLog['directNum'] != 1) & (orderLog['updateType'] == 0)][['date', 'accCode', 'secid', 'vai', 'orderDirection', 'order']])
    orderLog = orderLog[orderLog['directNum'] == 1]

assert((orderLog.groupby(['date', 'secid', 'vai'])['orderDirection1'].nunique() == 1).all() == True)

## Assertion 2:  make sure each account, secid, vai only has one insertion
print('=======================================================================================')
print('2. same date, secid, vai, accCode: one insertion')
a = orderLog[orderLog['updateType'] == 0].groupby(['date', 'accCode', 'secid', 'vai', 'order'])['clockAtArrival'].count()
if len(a[a > 1]) > 0:
    print('more than one insertion at same time')
    a = a[a>1].reset_index()
    display(a)
    orderLog = orderLog[~(orderLog['order'].isin(a['order'].unique()))]

orderLog['isMsg'] = np.where(orderLog['updateType'] == 0, 
                             np.where(orderLog['mse'] == 100, 1, 0), np.nan)
orderLog['isMsg'] = orderLog.groupby(['order'])['isMsg'].ffill()

placeSZE = orderLog[(orderLog['secid'] >= 2000000) & (orderLog['updateType'] == 0)]
print('%.2f%% SZE orders triggered by msg data'%(placeSZE[placeSZE['isMsg'] == 1].shape[0]/placeSZE.shape[0]*100))


### Assertion 3:  check IPO stocks selling status
print('=======================================================================================')
print('3. IPO stocks selling (ars = 301, 302)')
if orderLog[orderLog['ars'].isin([301, 302])].shape[0] != 0:
    kk = orderLog[orderLog['ars'].isin([301, 302])]
    print(kk)
    try:
        assert(kk[kk['orderDirection1'] == 1].shape[0] == 0)
        print('we only sell, never buy')
    except:
        print('There are IPO buy side orders!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print(kk[kk['orderDirection1'] == 1])
    kk1 = kk[kk['updateType'] == 0]
    kk1 = kk1.sort_values(by=['accCode', 'secid','clockAtArrival'])
    kk1['diff'] = kk1.groupby(['accCode', 'secid'])['clockAtArrival'].apply(lambda x: x-x.shift(1))
    kk1['diff'] = kk1['diff'].fillna(0)
    try:
        assert(kk1[kk1['diff'] < 10e6].shape[0] == 0)
        print('for each stock in the same account, there is no insertion within 10 seconds of the previous insertion')
    except:
        print('There are insertion within 10 seconds for orders under same account same stock!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print(kk1[kk1['diff'] < 10e6])
    kk2 = kk[(kk['updateType'] == 1)]
    try:
        assert(kk2[kk2['duration'] < 3e6].shape[0] == 0)
        print('for each stock in the same account, the cancellation of an order happens more than 3 seconds after the insertion')
    except:
        print('There are cancellation within 3 seconds for orders under same account same stock!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
        print(kk2[kk2['duration'] < 3e6])


### Assertion 4: check updateType == 7 orders, make sure updateType == 7 orders < 20 per account, < 100 in total
print('=======================================================================================')
print('4. updateType 7 orders')
if orderLog[orderLog['updateType'] == 7].shape[0] != 0:
    assert(orderLog[orderLog['updateType'] == 7].groupby('accCode')['order'].nunique().max() < 20)
    assert(orderLog[orderLog['updateType'] == 7].groupby('accCode')['order'].nunique().sum() < 100)

### Assertion 5: check updateType == 6 orders, make sure updateType == 6 orders < 5% per account
print('=======================================================================================')
print('5. updateType 6 orders')
k1 = orderLog[orderLog['updateType'] == 6].groupby('accCode')['order'].nunique().reset_index()
k2 = orderLog.groupby('accCode')['order'].nunique().reset_index()
k = pd.merge(k1, k2, on='accCode', how='left')
k['prob'] = k['order_x']/k['order_y']
try:
    assert(sum(k['prob'] >= 0.05) == 0)
except:
    print('There are accounts with more than 5% updateType 6 orders!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
    print(k[k['prob'] >= 0.05])

### Assertion 6: check CYB orders, make sure CYB stocks total absOrderSize < 30w
print('=======================================================================================')
print('6. CYB stocks total order size < 30w')
try:
    assert(orderLog[(orderLog['secid'] >= 2300000) & (orderLog['updateType'] == 0)]['absOrderSize'].max() <= 300000)
except:
    print('CYB stocks total absOrderSize >= 30w!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!')
    
     
### Assertion 7:  make sure there is no unexpected updateType 
print('=======================================================================================')
print('7. unexpected updateType')
def getTuple(x):
    return tuple(i for i in x)

checkLog = orderLog[~((orderLog['updateType'] == 4) & (orderLog.groupby(['order'])['updateType'].shift(-1) == 4))]
checkLog = checkLog.groupby(['order'])['updateType'].apply(lambda x: getTuple(x)).reset_index()
checkLog['status'] = np.where(checkLog['updateType'].isin([(0, 2, 4), (0, 2, 1, 4), (0, 2, 1, 2, 4), (0, 2, 4, 1, 4), (0, 4), (0, 1, 4), (0, 4, 1, 4), (0, 2, 2, 4), (0, 4, 2, 4), (0, 2, 2, 1, 4), (0, 2, 2, 4, 1, 4)]),0,
                     np.where(checkLog['updateType'].isin([(0, 2, 4, 1, 3), (0, 2, 4, 1, 4, 3), (0, 2, 1, 4, 3), (0, 4, 1, 3), (0, 1, 4, 3),
                                                               (0, 2, 2, 4, 1, 3), (0, 2, 2, 4, 1, 4, 3), (0, 2, 2, 1, 4, 3), (0, 4, 2, 4, 1, 3),
                                                               (0, 4, 2, 1, 3), (0, 4, 1, 4, 3), (0, 4, 1)]), 1,
                     np.where(checkLog['updateType'].isin([(0, 2, 1, 3), (0, 2, 2, 1, 3), (0, 2, 3), (0, 3), (0, 1, 3), (0, ), (0, 2), (0, 2, 1), (0, 2, 2)]), 2, 3)))

orderLog = pd.merge(orderLog, checkLog[['order', 'status']], how='left', on=['order'], validate='many_to_one')
orderLog = orderLog[orderLog['status'].isin([0, 1, 2])].reset_index(drop=True)

### Assertion 8:  make sure status==0 got all traded
print('=======================================================================================')
print('8. status == 0: all traded')
a = orderLog[orderLog['status'] == 0]
a = a.groupby(['order'])[['absOrderSizeCumFilled', 'absOrderSize']].max().reset_index()
a.columns = ['order', 'filled', 'total']
print('in total trade, any fill != total cases')
display(a[a['filled'] != a['total']])
if a[a['filled'] != a['total']].shape[0] > 0:
    removeOrderLs = a[a['filled'] != a['total']]['order'].unique()
    orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]
    
### Assertion 9:  make sure status==1 got partial traded
print('=======================================================================================')
print('9. status == 1: partial traded')
a = orderLog[orderLog['status'] == 1]
a = a.groupby(['order'])[['absOrderSizeCumFilled', 'absOrderSize']].max().reset_index()
a.columns = ['order', 'filled', 'total']
print('in partial trade, any fill >= total or fill is 0 cases for updateType 4')
display(a[(a['filled'] >= a['total']) | (a['filled'] == 0)])
if a[(a['filled'] >= a['total']) | (a['filled'] == 0)].shape[0] > 0:
    removeOrderLs = a[(a['filled'] >= a['total']) | (a['filled'] == 0)]['order'].unique()
    orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]
    
### Assertion 10: make sure no cancellation within 1 sec
print('=======================================================================================')
print('10. no cancellation within 1 sec')
a = orderLog[(orderLog['updateType'] == 1) & (orderLog['duration'] < 1e6)]
print('any cancellation within 1 sec')
display(a)
if a.shape[0] > 0:
    removeOrderLs = a['order'].unique()
    orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]


### Assertion 11: make sure no order has shares > 80w or notional > 800w
print('=======================================================================================')
print('11. Orders with size > 80w or notional > 800w')
orderLog['orderNtl'] = orderLog['absOrderSize'] * orderLog['orderPrice']
if orderLog[orderLog['absOrderSize'] > 800000].shape[0] > 0:
    print('some order quantity are > 80w')
    print(orderLog[orderLog['absOrderSize'] > 800000].groupby(['colo', 'accCode'])['order'].nunique())
    display(orderLog[orderLog['absOrderSize'] > 800000][['date', 'accCode', 'secid', 'vai', 'absOrderSize', 'orderPrice',
                                                         'orderNtl', 'orderDirection', 'clock', 'order']])
            
if orderLog[orderLog['orderNtl'] > 8000000].shape[0] > 0:
    print('some order ntl are > 800w')
    print(orderLog[orderLog['orderNtl'] > 8000000].groupby(['colo', 'accCode'])['order'].nunique())
    display(orderLog[orderLog['orderNtl'] > 8000000][['date', 'accCode', 'secid', 'vai', 'absOrderSize', 'orderPrice',
                                                      'orderNtl', 'orderDirection', 'clock', 'order', "updateType", 
                                                      "tradePrice", "absOrderSizeCumFilled", "absFilledThisUpdate"]])

removeOrderLs = list(set(orderLog[orderLog['absOrderSize'] > 800000]['order'].unique()) | set(orderLog[orderLog['orderNtl'] > 8000000]['order'].unique()))
orderLog = orderLog[~(orderLog['order'].isin(removeOrderLs))]


orderLog = orderLog.sort_values(by=['date', 'secid', 'vai', 'accCode', 'clockAtArrival']).reset_index(drop=True)

orderLog['exchange'] = np.where(orderLog['secid'] >= 2000000, 'SZE', 'SSE')
orderLog['orderNtl'] = orderLog['orderPrice'] * orderLog['absOrderSize']
orderLog['tradeNtl'] = np.where(orderLog['updateType'] == 4, orderLog['tradePrice']*orderLog['absFilledThisUpdate'], 0)
orderLog["mrstaat"] = orderLog.groupby(['order'])['mrstaat'].transform('first')
orderLog["ars"] = orderLog.groupby(['order'])['ars'].transform('first')
orderLog["mrstauc"] = orderLog.groupby(['order'])['mrstauc'].transform('first')
orderLog['sta'] = np.where(orderLog['mrstaat'] == 1000, 'staone', np.where(
orderLog['mrstaat'] == 3000, 'statwo', 'sta300'))
orderLog

  interactivity=interactivity, compiler=compiler, result=result)


1. same date, secid, vai: same direction
opposite direction for same date, same secid, same vai


Unnamed: 0,date,accCode,secid,vai,orderDirection,order
5971,20201102,5328,1600050,-1,2,80861
5975,20201102,5328,1600050,-1,2,80861
5978,20201102,5328,1600050,-1,1,80861
5981,20201102,5328,1600050,-1,-2,80861
6765,20201102,6237,1600053,874196,-1,96348
...,...,...,...,...,...,...
6880074,20201110,8856,2300496,82518060,-1,1598346
6880077,20201110,8856,2300496,82518060,1,1598346
6880081,20201110,8856,2300496,82518060,1,1598346
6933189,20201110,8856,2300618,139745707,-1,1598349


2. same date, secid, vai, accCode: one insertion
more than one insertion at same time


Unnamed: 0,date,accCode,secid,vai,order,clockAtArrival
0,20201102,5222,2000526,511315,232,2
1,20201102,5226,2000526,511315,15832,2
2,20201102,5275,2002095,1840594,50778,3
3,20201102,5275,2002291,490300,51260,2
4,20201102,5275,2002832,6179485,53115,2
...,...,...,...,...,...,...
431,20201110,9758,2300653,173582,1655877,2
432,20201110,9758,2300653,655296,1655899,2
433,20201110,9758,2300755,2081346,1656201,2
434,20201110,966301,2300357,1687600,1705670,2


99.74% SZE orders triggered by msg data
3. IPO stocks selling (ars = 301, 302)
4. updateType 7 orders
5. updateType 6 orders
6. CYB stocks total order size < 30w
7. unexpected updateType
8. status == 0: all traded
in total trade, any fill != total cases


Unnamed: 0,order,filled,total
19854,28065,1500,7000
28501,40470,9300,11600
37067,52579,4000,5700
38601,54970,1300,3400
59418,82636,1200,2400
...,...,...,...
1223991,1691682,2000,10600
1227002,1695979,600,2800
1229614,1699730,3000,17700
1234916,1707050,300,1800


9. status == 1: partial traded
in partial trade, any fill >= total or fill is 0 cases for updateType 4


Unnamed: 0,order,filled,total


10. no cancellation within 1 sec
any cancellation within 1 sec


Unnamed: 0.1,Unnamed: 0,date,clockAtArrival,caamd,colo,accCode,secid,vai,sdd,orderDirection,absOrderSize,updateType,tradePrice,absFilledThisUpdate,absOrderSizeCumFilled,tradeId,orderSysId,aaa,internalId,ars,mse,mta,sequenceNo,mrstauc,mrss300,cfe,finalState,ApplSeqNum,mra100,mrstaat,hee,mt,session,ms,mrb100,mrsb90,orderPrice,threadId,mrsb300,clock,mrss90,underlyingIndex,mrm,l4tr,orderId,mfe,cancellationPending,mrstaum,mrrlma,broker,colo_broker,order,group,startClock,duration,orderDirection1,directNum,isMsg,status


11. Orders with size > 80w or notional > 800w


Unnamed: 0.1,Unnamed: 0,date,clockAtArrival,caamd,colo,accCode,secid,vai,sdd,orderDirection,absOrderSize,updateType,tradePrice,absFilledThisUpdate,absOrderSizeCumFilled,tradeId,orderSysId,aaa,internalId,ars,mse,mta,sequenceNo,mrstauc,mrss300,cfe,finalState,ApplSeqNum,mra100,mrstaat,hee,mt,session,ms,mrb100,mrsb90,orderPrice,threadId,mrsb300,clock,mrss90,underlyingIndex,mrm,l4tr,orderId,mfe,cancellationPending,mrstaum,mrrlma,broker,colo_broker,order,group,startClock,duration,orderDirection1,directNum,isMsg,status,orderNtl,exchange,tradeNtl,sta
0,528907,20201102,1604281097219002,1604281097218806,zt_88_03,8970,1600004,1957600,93814000.0,-2,300,0,-1.00,0,0,,,0.000527,542.0,1.0,100,-0.003100,23456207.0,3000.0,-1.0,8.510437e+06,0.0,299743.0,1227.0,3000.0,0.000163,2.0,0,09:38:17.218419,1225.0,-0.002213,12.25,8671.0,-1,2020-11-02 09:38:17.219002,0.000527,300,-0.003100,0.0,-1.0,29286125.48,0.0,0.0,,89,zt_89,149997,0,1604281097219002,0,-1,1,1.0,0,3675.0,SSE,0.0,statwo
1,528908,20201102,1604281097450895,1604281097218806,zt_88_03,8970,1600004,1957600,-1.0,-2,300,2,-1.00,0,0,,0051001923,-1,542.0,1.0,0,-1.000000,23456207.0,3000.0,-1.0,8.510437e+06,0.0,0.0,1227.0,3000.0,-1.000000,-1.0,0,09:38:17.449419,1225.0,-0.002213,12.25,8910.0,-1,2020-11-02 09:38:17.450895,0.000527,300,-0.003100,0.0,542.0,29286125.48,0.0,0.0,,89,zt_89,149997,0,1604281097219002,231893,-1,1,1.0,0,3675.0,SSE,0.0,statwo
2,528909,20201102,1604281097545067,1604281097218806,zt_88_03,8970,1600004,1957600,-1.0,-2,300,4,12.25,300,300,2.09713e+06,,-1,542.0,1.0,0,-1.000000,23456207.0,3000.0,-1.0,8.510437e+06,1.0,0.0,1227.0,3000.0,-1.000000,-1.0,0,09:38:17.544419,1225.0,-0.002213,12.25,8910.0,-1,2020-11-02 09:38:17.545067,0.000527,300,-0.003100,0.0,542.0,29286125.48,0.0,0.0,,89,zt_89,149997,0,1604281097219002,326065,-1,1,1.0,0,3675.0,SSE,3675.0,statwo
3,87547,20201102,1604281097218965,1604281097218806,zt_88_03,8971,1600004,1957600,93814000.0,-2,200,0,-1.00,0,0,,,0.000527,466.0,1.0,100,-0.003100,23456207.0,3000.0,-1.0,1.005016e+07,0.0,299743.0,1227.0,3000.0,0.000199,2.0,0,09:38:17.218419,1225.0,-0.002213,12.25,8741.0,-1,2020-11-02 09:38:17.218965,0.000527,300,-0.003100,0.0,-1.0,38022223.81,0.0,0.0,,89,zt_89,154811,0,1604281097218965,0,-1,1,1.0,0,2450.0,SSE,0.0,statwo
4,87548,20201102,1604281097451378,1604281097218806,zt_88_03,8971,1600004,1957600,-1.0,-2,200,2,-1.00,0,0,,0051001922,-1,466.0,1.0,0,-1.000000,23456207.0,3000.0,-1.0,1.005016e+07,0.0,0.0,1227.0,3000.0,-1.000000,-1.0,0,09:38:17.450419,1225.0,-0.002213,12.25,8996.0,-1,2020-11-02 09:38:17.451378,0.000527,300,-0.003100,0.0,466.0,38022223.81,0.0,0.0,,89,zt_89,154811,0,1604281097218965,232413,-1,1,1.0,0,2450.0,SSE,0.0,statwo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7011389,831394,20201110,1604989390374564,1604989390374540,zs_88_04,8865,2300815,1409591,51784.0,1,200,1,-1.00,0,0,,AE22008752,-1,1649.0,1.0,100,-1.000000,249324036.0,1000.0,-1.0,2.272040e+06,0.0,0.0,10299.0,3000.0,-1.000000,-1.0,0,14:23:10.374072,10288.0,-1.000000,102.87,120380.0,-1,2020-11-10 14:23:10.374564,-1.000000,852,0.000000,0.0,22008752.0,-1.00,1.0,0.0,0.0,88,zs_88,1600226,912724,1604989379646323,10728241,1,1,1.0,2,20574.0,SZE,0.0,statwo
7011390,831395,20201110,1604989390379489,1604989390374540,zs_88_04,8865,2300815,1409591,-1.0,1,200,3,-1.00,0,0,,,-1,1649.0,1.0,0,-1.000000,249324036.0,1000.0,-1.0,2.292614e+06,1.0,0.0,10299.0,3000.0,-1.000000,-1.0,0,14:23:10.379072,10288.0,-1.000000,102.87,120474.0,-1,2020-11-10 14:23:10.379489,-1.000000,852,0.000000,0.0,22008752.0,-1.00,0.0,0.0,0.0,88,zs_88,1600226,912724,1604989379646323,10733166,1,1,1.0,2,20574.0,SZE,0.0,statwo
7011391,597254,20201110,1604989377614079,1604989377614023,zs_94_05,9471,2300815,1409591,142254200.0,1,100,0,-1.00,0,0,,,0.001137,3621.0,1.0,100,0.003238,228202257.0,1000.0,-1.0,6.474766e+05,0.0,23648700.0,10287.0,3000.0,0.000615,0.0,0,14:22:57.613167,10282.0,0.001137,102.87,27052.0,-1,2020-11-10 14:22:57.614079,-0.001657,852,0.003238,0.0,-1.0,-1.00,0.0,0.0,0.0,94,zs_94,1627894,912724,1604989377614079,0,1,1,1.0,0,10287.0,SZE,0.0,statwo
7011392,597255,20201110,1604989377616661,1604989377614023,zs_94_05,9471,2300815,1409591,-1.0,1,100,2,-1.00,0,0,,0020QJJXV100QXPG,-1,3621.0,1.0,0,-1.000000,228202257.0,1000.0,-1.0,6.474766e+05,0.0,0.0,10287.0,3000.0,-1.000000,-1.0,0,14:22:57.616167,10282.0,-1.000000,102.87,27079.0,-1,2020-11-10 14:22:57.616661,-1.000000,852,0.003238,0.0,2295025.0,-1.00,0.0,0.0,0.0,94,zs_94,1627894,912724,1604989377614079,2582,1,1,1.0,0,10287.0,SZE,0.0,statwo


In [100]:
orderLog[(orderLog['colo'].str[:5] == 'zt_88')].groupby(['date', 'exchange', 'colo'])['accCode'].unique()

date      exchange  colo    
20201102  SSE       zt_88_02                              [8854, 8886]
                    zt_88_03          [8970, 8971, 897002, 8833, 8943]
          SZE       zt_88_02                                    [8854]
                    zt_88_03                        [8970, 8833, 8971]
20201103  SSE       zt_88_02                              [8854, 8886]
                    zt_88_03          [8970, 8971, 897002, 8833, 8943]
          SZE       zt_88_02                                    [8854]
                    zt_88_03                        [8970, 8833, 8971]
20201104  SSE       zt_88_02                              [8854, 8886]
                    zt_88_03          [8970, 8971, 897002, 8833, 8943]
          SZE       zt_88_02                                    [8854]
                    zt_88_03                        [8970, 8833, 8971]
20201105  SSE       zt_88_02                              [8886, 8854]
                    zt_88_03          [8971, 897

In [139]:
orderLog = orderLog[~orderLog['mrstauc'].isnull()]
orderLog = orderLog[orderLog['ars'] != 0]
orderLog['sta_uc'] = orderLog['sta'] + '_' + orderLog['mrstauc'].astype(int).astype(str)
orderLog["server"] = orderLog["colo"].apply(lambda x: x.split("_")[0] + x.split("_")[1] + x.split("_")[2])
# orderLog["server_account"] = orderLog["server"] + '_' + orderLog['accCode'].astype('str')
orderLog["server_account"] = orderLog["colo"].str[:5]
checkLog = orderLog[orderLog['updateType'].isin([0, 3, 4])].reset_index(drop=True)
checkLog = checkLog[checkLog['duration'] <= 1e6]

checkLog["time"] = checkLog["clock"].apply(lambda x: x.strftime("%H%M%S")).astype(int)
# ol = checkLog[(checkLog["updateType"] == 0) & (~checkLog["ars"].isin([121, 221, 321, 131, 231, 331])) & (checkLog["time"]>= 93000)
#              & (checkLog["time"] < 93300)]["order"].unique()

sta_list = orderLog['sta_uc'].unique()

result = {}
for col in ['date', 'broker1', 'broker2', 'isFast']:
    result[col] = []
            
for i in sta_list:
    checkLog1 = checkLog[checkLog['sta_uc'] == i]
    checkLog1['accountNum'] = checkLog1.groupby(['date', 'secid', 'vai'])['server_account'].transform('nunique')
    checkLog1 = checkLog1[checkLog1['accountNum'] >= 2]
    checkLog1['cumFillSize'] = checkLog1.groupby(['order'])['absOrderSizeCumFilled'].transform('max')
    checkLog1['status'] = np.where(checkLog1['cumFillSize'] == 0, 2, 
                                    np.where(checkLog1['cumFillSize'] < checkLog1['absOrderSize'], 1, 0))
    checkLog1 = checkLog1.groupby(['group', 'order'])[['date', 'server_account', 'secid', 'vai', 'status']].first().reset_index()

    for exchg in ['SSE']:

        print(exchg)

        if exchg == 'SSE':
            exchangeLog = checkLog1[checkLog1['secid'] < 2000000]
        else:
            exchangeLog = checkLog1[checkLog1['secid'] >= 2000000]

        for group, groupData in exchangeLog.groupby(['group']):
            date = groupData['date'].values[0]
            brokerLs = groupData['server_account'].values
            if len(brokerLs) == 0:
                continue
            statusLs = groupData['status'].values
            ixLs = [i for i in range(len(brokerLs))]
            for k, broker1, status1 in zip(ixLs[:-1], brokerLs[:-1], statusLs[:-1]):
                for broker2, status2 in zip(brokerLs[k+1:], statusLs[k+1:]):
                    if broker1 != broker2:
                        result['date'] += [date, date]
                        result['broker1'] += [broker1, broker2]
                        result['broker2'] += [broker2, broker1]

                        if status1 < status2:
                            result['isFast'] += [1, 0]
                        elif status1 > status2:
                            result['isFast'] += [0, 1]
                        else:
                            result['isFast'] += [-1, -1]                

result = pd.DataFrame(result)
result = result[result['isFast'] != -1]
result['count'] = result.groupby(['broker1', 'broker2'])['isFast'].transform('count')
result['fasterSum'] = result.groupby(['broker1', 'broker2'])['isFast'].transform('sum')
result['faster'] = result['fasterSum']/result['count']


summary = pd.DataFrame()
summaryCols = []
countCols = []
fasterCols = []
for date in result['date'].unique():
    a = result[result['date'] == date].reset_index(drop=True)
    a['count'] = a.groupby(['broker1', 'broker2'])['isFast'].transform('count')
    a['fasterSum'] = a.groupby(['broker1', 'broker2'])['isFast'].transform('sum')
    a['faster'] = a['fasterSum']/a['count']
    a = a.groupby(['broker1', 'broker2'])[['count', 'faster']].mean().reset_index()
    a.columns = ['broker1', 'broker2', '%s count'%date, '%s faster'%date]
    summaryCols += ['%s count'%date, '%s faster'%date]
    countCols += ['%s count'%date]
    fasterCols += ['%s faster'%date]

    if summary.empty:
        summary = a.copy()
    else:
        summary = pd.merge(summary, a, how='outer', on=['broker1', 'broker2'], validate='one_to_one')

a = result.groupby(['broker1', 'broker2'])[['count', 'faster']].mean().reset_index()
a.columns = ['broker1', 'broker2', 'total count', 'total faster']
summary = pd.merge(summary, a, how='outer', on=['broker1', 'broker2'], validate='one_to_one')
summaryCols = ['total count', 'total faster'] + summaryCols


savePath = r'L:\orderLog\result\relativeSpeed'
a = summary.groupby(['broker1', 'broker2'])[['total count', 'total faster']].first().reset_index()
a.to_csv(os.path.join(savePath, 'relativeSpeedAccount_%s_%s_%s.csv'%(startDate, endDate, exchg)), index=False)




countCols += ['total count']
fasterCols += ['total faster']
# summary['b1'] = summary['broker1'].str[7:11].astype(int) // 100
# summary['b2'] = summary['broker2'].str[7:11].astype(int) // 100

for col in countCols:
    summary[col] = summary[col].fillna(0)
    summary[col] = summary[col].astype('int64')

for col in fasterCols:
    summary[col] = summary[col].fillna(-1)
    summary[col] = summary[col].apply(lambda x: '%.0f%%'%(x*100))
    summary[col] = np.where(summary[col] == '-100%', ' ', summary[col])

#     summary = summary.rename(columns={"broker1":"account1", "broker2":"account2"})
#     summary = summary.groupby(['account1', 'account2'])[summaryCols].first()
#     from IPython.display import display, HTML
#     display(HTML(summary.iloc[:, :2].to_html()))
#     summary["tt"]=summary[["broker1", "broker2"]].min(axis=1).astype(str) + ' - ' + summary[["broker1", "broker2"]].max(axis=1).astype(str)
#     summary = summary.sort_values(by=["tt", "broker1"])
#     s1 = summary.groupby("tt").first().reset_index(drop=True)
s1 = summary

KeyboardInterrupt: 

In [142]:
orderLog = orderLog[~orderLog['mrstauc'].isnull()]
orderLog = orderLog[orderLog['ars'] != 0]
orderLog['sta_uc'] = orderLog['sta'] + '_' + orderLog['mrstauc'].astype(int).astype(str)
orderLog["server"] = orderLog["colo"].apply(lambda x: x.split("_")[0] + x.split("_")[1] + x.split("_")[2])
orderLog["server_account"] = orderLog["server"] + '_' + orderLog['accCode'].astype('str')
# orderLog["server_account"] = orderLog["colo"].str[:5]
checkLog = orderLog[orderLog['updateType'].isin([0, 3, 4])].reset_index(drop=True)
checkLog = checkLog[checkLog['duration'] <= 1e6]

checkLog["time"] = checkLog["clock"].apply(lambda x: x.strftime("%H%M%S")).astype(int)
# ol = checkLog[(checkLog["updateType"] == 0) & (~checkLog["ars"].isin([121, 221, 321, 131, 231, 331])) & (checkLog["time"]>= 93000)
#              & (checkLog["time"] < 93300)]["order"].unique()

sta_list = orderLog['sta_uc'].unique()

result = {}
for col in ['date', 'broker1', 'broker2', 'isFast']:
    result[col] = []
            
for i in sta_list:
    checkLog1 = checkLog[checkLog['sta_uc'] == i]
    checkLog1['accountNum'] = checkLog1.groupby(['date', 'secid', 'vai'])['server_account'].transform('nunique')
    checkLog1 = checkLog1[checkLog1['accountNum'] >= 2]
    checkLog1['cumFillSize'] = checkLog1.groupby(['order'])['absOrderSizeCumFilled'].transform('max')
    checkLog1['status'] = np.where(checkLog1['cumFillSize'] == 0, 2, 
                                    np.where(checkLog1['cumFillSize'] < checkLog1['absOrderSize'], 1, 0))
    checkLog1 = checkLog1.groupby(['group', 'order'])[['date', 'server_account', 'secid', 'vai', 'status']].first().reset_index()

    for exchg in ['SSE']:

        print(exchg)

        if exchg == 'SSE':
            exchangeLog = checkLog1[checkLog1['secid'] < 2000000]
        else:
            exchangeLog = checkLog1[checkLog1['secid'] >= 2000000]

        for group, groupData in exchangeLog.groupby(['group']):
            date = groupData['date'].values[0]
            brokerLs = groupData['server_account'].values
            if len(brokerLs) == 0:
                continue
            statusLs = groupData['status'].values
            ixLs = [i for i in range(len(brokerLs))]
            for k, broker1, status1 in zip(ixLs[:-1], brokerLs[:-1], statusLs[:-1]):
                for broker2, status2 in zip(brokerLs[k+1:], statusLs[k+1:]):
                    if broker1 != broker2:
                        result['date'] += [date, date]
                        result['broker1'] += [broker1, broker2]
                        result['broker2'] += [broker2, broker1]

                        if status1 < status2:
                            result['isFast'] += [1, 0]
                        elif status1 > status2:
                            result['isFast'] += [0, 1]
                        else:
                            result['isFast'] += [-1, -1]                

result = pd.DataFrame(result)
result = result[result['isFast'] != -1]
result['count'] = result.groupby(['broker1', 'broker2'])['isFast'].transform('count')
result['fasterSum'] = result.groupby(['broker1', 'broker2'])['isFast'].transform('sum')
result['faster'] = result['fasterSum']/result['count']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


SSE
SSE
SSE
SSE
SSE
SSE
SSE
SSE
SSE
SSE
SSE
SSE
SSE
SSE
SSE
SSE


In [143]:
summary = pd.DataFrame()
summaryCols = []
countCols = []
fasterCols = []
result['tag'] = np.where(result['date'] < 20201109, 'before moving', 'after moving')
for tag in result['tag'].unique():
    a = result[result['tag'] == tag].reset_index(drop=True)
    a['count'] = a.groupby(['broker1', 'broker2'])['isFast'].transform('count')
    a['fasterSum'] = a.groupby(['broker1', 'broker2'])['isFast'].transform('sum')
    a['faster'] = a['fasterSum']/a['count']
    a = a.groupby(['broker1', 'broker2'])[['count', 'faster']].mean().reset_index()
    a.columns = ['broker1', 'broker2', tag + '_' + 'count', tag + '_' + 'faster']
    summaryCols += [tag + '_' + 'count', tag + '_' + 'faster']
    countCols += [tag + '_' + 'count']
    fasterCols += [tag + '_' + 'faster']

    if summary.empty:
        summary = a.copy()
    else:
        summary = pd.merge(summary, a, how='outer', on=['broker1', 'broker2'], validate='one_to_one')

a = result.groupby(['broker1', 'broker2'])[['count', 'faster']].mean().reset_index()
a.columns = ['broker1', 'broker2', 'total count', 'total faster']
summary = pd.merge(summary, a, how='outer', on=['broker1', 'broker2'], validate='one_to_one')
summaryCols = ['total count', 'total faster'] + summaryCols

countCols += ['total count']
fasterCols += ['total faster']

for col in countCols:
    summary[col] = summary[col].fillna(0)
    summary[col] = summary[col].astype('int64')

for col in fasterCols:
    summary[col] = summary[col].fillna(-1)
    summary[col] = summary[col].apply(lambda x: '%.0f%%'%(x*100))
    summary[col] = np.where(summary[col] == '-100%', ' ', summary[col])

s1 = summary

In [125]:
s1[(s1['broker1'] == 'zt_88') & (s1['broker2'].isin(['zs_88', 'zs_96', 'zt_52', 'zt_94', 'zt_96']))]

Unnamed: 0,broker1,broker2,before moving_count,before moving_faster,after moving_count,after moving_faster,total count,total faster
36,zt_88,zs_88,982,88%,601,92%,1583,89%
37,zt_88,zs_96,640,77%,498,89%,1138,82%
38,zt_88,zt_52,10447,32%,5216,43%,15663,35%
39,zt_88,zt_94,1595,59%,850,83%,2445,67%
40,zt_88,zt_96,1504,13%,386,24%,1890,15%


In [148]:
r=pd.merge(s1[s1['broker1'] == 'zt8803_8943'].iloc[:, :4], s1[s1['broker1'] == 'zt8806_8943'].iloc[:, [1, -4, -3]], on=['broker2'])
r[r['after moving_count'] > 50]

Unnamed: 0,broker1,broker2,before moving_count,before moving_faster,after moving_count,after moving_faster
0,zt8803_8943,zt5204_526901,2074,29%,746,39%
1,zt8803_8943,zt5204_528401,1045,32%,341,45%
2,zt8803_8943,zt5205_528701,841,33%,314,48%
3,zt8803_8943,zt5207_527101,270,38%,132,49%
5,zt8803_8943,zt9406_9551,807,57%,403,91%


In [138]:
orderLog[(orderLog['exchange'] == 'SSE') & (orderLog['updateType'] == 0) & (orderLog['colo'].str[:5] == 'zt_88')].groupby(['date', 'colo'])['accCode'].size()

date      colo    
20201102  zt_88_02    2666
          zt_88_03    9137
20201103  zt_88_02    2400
          zt_88_03    6736
20201104  zt_88_02    1845
          zt_88_03    8161
20201105  zt_88_02    2906
          zt_88_03    7635
20201106  zt_88_02    2508
          zt_88_03    7646
20201109  zt_88_03    1726
          zt_88_06    8810
20201110  zt_88_03    1385
          zt_88_06    8353
Name: accCode, dtype: int64

In [154]:
s1[(s1['broker1'] == 'zt8803_8833')].iloc[[0,1,2,3,5], :]

Unnamed: 0,broker1,broker2,before moving_count,before moving_faster,after moving_count,after moving_faster,total count,total faster
411,zt8803_8833,zt5204_526901,655,29%,228,46%,883,33%
412,zt8803_8833,zt5204_528401,70,26%,34,41%,104,31%
413,zt8803_8833,zt5205_528701,236,30%,79,35%,315,31%
414,zt8803_8833,zt5207_527101,59,31%,21,52%,80,36%
416,zt8803_8833,zt9406_9551,157,57%,54,87%,211,65%


In [8]:
orderLog[(orderLog['updateType'] == 0) & (orderLog['colo'].isin(['zt_88_03', 'zt_88_02', 'zt_88_06']))].groupby(['date', 'colo', 'exchange'])['accCode'].unique()

date      colo      exchange
20201102  zt_88_02  SSE                             [8854, 8886]
                    SZE                                   [8854]
          zt_88_03  SSE         [8970, 8971, 897002, 8833, 8943]
                    SZE                       [8970, 8833, 8971]
20201103  zt_88_02  SSE                             [8854, 8886]
                    SZE                                   [8854]
          zt_88_03  SSE         [8970, 8971, 897002, 8833, 8943]
                    SZE                       [8970, 8833, 8971]
20201104  zt_88_02  SSE                             [8854, 8886]
                    SZE                                   [8854]
          zt_88_03  SSE         [8970, 8971, 897002, 8833, 8943]
                    SZE                       [8970, 8833, 8971]
20201105  zt_88_02  SSE                             [8886, 8854]
                    SZE                                   [8854]
          zt_88_03  SSE         [8971, 897002, 8970, 8943, 88

In [69]:
orderLog[(orderLog['updateType'] == 0) & (orderLog['colo'].str[:5] == 'zt_88')].groupby(['exchange', 'colo', 'mrstaat', 'mrstauc'])['date'].size()

exchange  colo      mrstaat  mrstauc
SSE       zt_88_02  1000.0   0.0        12325
          zt_88_03  1000.0   0.0         3314
                    3000.0   1000.0     29888
                             2000.0      5793
                             3000.0      3431
          zt_88_06  1000.0   0.0         1269
                    3000.0   1000.0     12007
                             2000.0      2582
                             3000.0      1305
SZE       zt_88_02  1000.0   0.0        10486
          zt_88_03  1000.0   0.0         3942
                    3000.0   1000.0     20954
                             2000.0       562
                             3000.0       113
          zt_88_06  1000.0   0.0         1677
                    3000.0   1000.0      8977
                             2000.0       228
                             3000.0        64
Name: date, dtype: int64

In [99]:
checkLog = orderLog[(orderLog["updateType"] == 0) & (orderLog['colo'].str[:5] == 'zt_88') & (orderLog['date'] < 20201109)]
checkLog = checkLog[checkLog['caamd'] != 0]
checkLog['internal_latency'] = checkLog["clockAtArrival"] - checkLog["caamd"]
SZE = checkLog[checkLog['secid'] >= 2000000]
SZE["exchange"] = "SZ"
SSE = checkLog[checkLog['secid'] < 2000000]
SSE["exchange"] = "SH"

c1 = SZE.groupby(['colo', 'exchange', 'accCode', "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'exchange', 'accCode'])["internal_latency"].mean().reset_index()
c2 = SZE.groupby(['colo', 'exchange', 'accCode', "date"])["internal_latency"].median().reset_index().groupby(['colo', 'exchange', 'accCode'])["internal_latency"].mean().reset_index()
c3 = SZE.groupby(['colo', 'exchange', 'accCode'])["internal_latency"].count().reset_index()
c4 = SZE.groupby(['colo', 'exchange', 'accCode'])["date"].unique().str.len().reset_index()
c5 = SZE.groupby(['colo', 'exchange', 'accCode', "date"])["internal_latency"].quantile(.95).reset_index().groupby(['colo', 'exchange', 'accCode'])["internal_latency"].std().reset_index()

re1 = pd.merge(c3, c1, on=['colo', 'exchange', 'accCode'])
re1 = re1.rename(columns = {'internal_latency_x': 'count', 'internal_latency_y': '95 percentile'})
re1 = pd.merge(re1, c2, on=['colo', 'exchange', 'accCode'])
re1 = re1.rename(columns = {'internal_latency': 'median'})
re1 = pd.merge(re1, c4, on=['colo', 'exchange', 'accCode'])
re1 = pd.merge(re1, c5, on=['colo', 'exchange', 'accCode'])
re1 = re1.rename(columns = {'internal_latency': 'std'})


# re1 = pd.merge(re1[re1["isMsg"] == 1], re1[re1["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_x").reset_index(drop=True)
# re2 = pd.merge(re2[re2["isMsg"] == 1], re2[re2["isMsg"] == 0], on=["exchange", "colo_account"], how="outer").sort_values(by="median_y").reset_index(drop=True)
re = pd.concat([re1]).reset_index(drop=True)


for col in ['median', '95 percentile']:
    re[col] = re[col].astype(int)
for col in ['std']:
    re[col] = re[col].apply(lambda x: '%.2f'%(x))
    
    
from IPython.display import display, HTML

display(HTML(re.groupby(['colo', 'exchange', 'accCode'])["count", "median", "95 percentile", "std"].first().to_html()))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,median,95 percentile,std
colo,exchange,accCode,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
zt_88_02,SZ,8854,10486,31,62,3.16
zt_88_03,SZ,8833,7733,61,147,19.15
zt_88_03,SZ,8970,10872,62,143,6.36
zt_88_03,SZ,8971,3902,61,155,18.21


In [84]:
startDate = '20201102'
endDate = '20201106'
readPath = r'L:\orderLog\result\marketPos'
dataPathLs = np.array(glob.glob(os.path.join(readPath, 'SZspeed_***.pkl')))
dateLs = np.array([os.path.basename(i).split('.')[0].split('_')[1] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
checkData = []
for path in dataPathLs:
    data = pd.read_pickle(path)
    checkData += [data]
checkLog = pd.concat(checkData).reset_index(drop=True)   
checkLog['date'].unique()

array([20201106, 20201102, 20201103, 20201104, 20201105], dtype=int64)

In [85]:
checkLog = checkLog.drop_duplicates(['date', 'secid', 'Price', 'OrderQty', 'Side', 'statusLs', 'TradePriceLs', 'TradeQtyLs', 'ApplSeqNum'], keep=False)
checkLog = checkLog[~checkLog['accCode'].isnull()]

In [88]:
cc1 = checkLog[checkLog['colo'].str[:5] == 'zt_88']
cc1 = cc1.drop_duplicates(['date', 'secid', 'Price', 'OrderQty', 'Side', 'statusLs', 'TradePriceLs', 'TradeQtyLs', 'ApplSeqNum'], keep=False)
cc1 = cc1.reset_index(drop=True)
cc1['ordering'] = cc1.index
cc1['time_diff'] = cc1['clockAtArrival'] - cc1['start_time']
cc1['colo1'] = cc1['colo'].str[:2] + cc1['colo'].str[3:5] + cc1['colo'].str[6:8]
cc1['colo_broker'] = cc1['colo1'] + '_' + cc1["accCode"].astype(int).astype(str)
cc1['accCode'] = cc1['accCode'].fillna(0).astype(int)
cc1['sta'] = np.where(cc1['mrstaat'] == 1000, 'staone', np.where(
cc1['mrstaat'] == 3000, 'statwo', np.where(
cc1['mrstaat'] == 11000, 'stathree13', 'stathree23')))

checkLog1 = cc1[cc1['sta'] == 'statwo']
ol = pd.merge(cc1[cc1['sta'] == 'staone'], cc1[cc1['sta'] == 'statwo'][['colo', 'accCode']].drop_duplicates(),
         on=['colo', 'accCode'], how='inner')['ordering'].unique()
# checkLog2 = cc1[(cc1['sta'] == 'staone') & (~cc1['ordering'].isin(ol))]
checkLog2 = cc1[(cc1['sta'] == 'staone')]

from IPython.display import display, HTML
re1 = checkLog1.groupby(['colo_broker', 'colo'])['time_diff'].describe().fillna(0).astype(int).reset_index()
# re1 = re1[re1['count'] > 20].reset_index()
c1 = checkLog1.groupby(['colo_broker', 'colo'])['time_diff'].apply(lambda x: x.describe([0.1])['10%']).astype(int).reset_index()
c1 = c1.rename(columns={"time_diff":"10%"})
re1 = pd.merge(re1, c1[['colo_broker', 'colo', '10%']], on=['colo_broker', 'colo'])
c1 = checkLog1.groupby(['colo_broker', 'colo'])['time_diff'].apply(lambda x: x.describe([0.9])['90%']).astype(int).reset_index()
c1 = c1.rename(columns={"time_diff":"90%"})
re1 = pd.merge(re1, c1[['colo_broker', 'colo', '90%']], on=['colo_broker', 'colo'])

display(HTML(re1[['colo_broker', 'count',  '10%', '25%', '50%', '75%', '90%']].groupby(['colo_broker']).first().to_html()))

from IPython.display import display, HTML
re1 = checkLog2.groupby(['colo_broker', 'colo'])['time_diff'].describe().fillna(0).astype(int).reset_index()
# re1 = re1[re1['count'] > 20].reset_index()
c1 = checkLog2.groupby(['colo_broker', 'colo'])['time_diff'].apply(lambda x: x.describe([0.1])['10%']).astype(int).reset_index()
c1 = c1.rename(columns={"time_diff":"10%"})
re1 = pd.merge(re1, c1[['colo_broker', 'colo', '10%']], on=['colo_broker', 'colo'])
c1 = checkLog2.groupby(['colo_broker', 'colo'])['time_diff'].apply(lambda x: x.describe([0.9])['90%']).astype(int).reset_index()
c1 = c1.rename(columns={"time_diff":"90%"})
re1 = pd.merge(re1, c1[['colo_broker', 'colo', '90%']], on=['colo_broker', 'colo'])

display(HTML(re1[['colo_broker', 'count',  '10%', '25%', '50%', '75%', '90%']].groupby(['colo_broker']).first().to_html()))

Unnamed: 0_level_0,count,10%,25%,50%,75%,90%
colo_broker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
zt8803_8833,4675,36799,37285,37901,38838,41394
zt8803_8970,5731,38062,38563,39082,40080,42303
zt8803_8971,1672,38284,38951,39762,41030,44096


Unnamed: 0_level_0,count,10%,25%,50%,75%,90%
colo_broker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
zt8802_8854,9390,36597,37052,37631,38192,39868
zt8803_8833,1035,37686,38629,42058,49474,55819
zt8803_8970,605,38022,38777,40198,44045,52368
zt8803_8971,303,39115,40014,42956,50037,55761


In [94]:
orderLog[(orderLog['colo'].str[:5] == 'zt_88') & (orderLog['date'] == 20201106)].groupby(['exchange', 'colo', 'mrstaat'])['accCode'].unique()

exchange  colo      mrstaat
SSE       zt_88_02  1000.0                         [8886, 8854]
          zt_88_03  1000.0     [8970, 8971, 897002, 8833, 8943]
                    3000.0     [8970, 8971, 897002, 8943, 8833]
SZE       zt_88_02  1000.0                               [8854]
          zt_88_03  1000.0                   [8970, 8833, 8971]
                    3000.0                   [8970, 8833, 8971]
Name: accCode, dtype: object

In [95]:
orderLog[(orderLog['colo'].str[:5] == 'zt_88') & (orderLog['date'] == 20201109)].groupby(['exchange', 'colo', 'mrstaat'])['accCode'].unique()

exchange  colo      mrstaat
SSE       zt_88_03  1000.0                     [897002, 8833]
                    3000.0                     [897002, 8833]
          zt_88_06  1000.0     [8970, 8971, 8886, 8854, 8943]
                    3000.0     [8971, 8970, 8886, 8943, 8854]
SZE       zt_88_03  1000.0                             [8833]
                    3000.0                             [8833]
          zt_88_06  1000.0                 [8970, 8854, 8971]
                    3000.0                 [8970, 8854, 8971]
Name: accCode, dtype: object