In [1]:
import pymongo
import pandas as pd
import numpy as np
import pickle
import datetime
import time
import gzip
import lzma
import pytz

def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    url = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    client = pymongo.MongoClient(url, maxPoolSize=None)
    db = client[db_name]
    return db

def read_memb_daily(db, name, start_date=None, end_date=None, skey=None, index_id=None, interval=None, col=None, return_sdi=True):
    collection = db[name]
    # Build projection
    prj = {'_id': 0}
    if col is not None:
        if return_sdi:
            col = ['skey', 'date', 'interval'] + col
        for col_name in col:
            prj[col_name] = 1

    # Build query
    query = {}
    if skey is not None:
        query['skey'] = {'$in': skey}
    if index_id is not None:
        query['index_id'] = {'$in': index_id}
    if interval is not None:
        query['interval'] = {'$in': interval}
    if start_date is not None:
        if end_date is not None:
            query['date'] = {'$gte': start_date, '$lte': end_date}
        else:
            query['date'] = {'$gte': start_date}
    elif end_date is not None:
        query['date'] = {'$lte': end_date}

    # Load data
    cur = collection.find(query, prj)
    df = pd.DataFrame.from_records(cur)
    if df.empty:
        df = pd.DataFrame()
    else:
        df = df.sort_values(by=['date', 'index_id', 'skey'])
    return df    

def read_beta_daily(db, name, start_date=None, end_date=None, skey=None, interval=None, col=None, return_sdi=True): 
    collection = db[name] 
    # Build projection 
    prj = {'_id': 0} 
    if col is not None: 
        if return_sdi: 
            col = ['skey', 'date'] + col 
        for col_name in col: 
            prj[col_name] = 1 
 
    # Build query 
    query = {} 
    if skey is not None: 
        query['skey'] = {'$in': skey} 
    if interval is not None: 
        query['interval'] = {'$in': interval} 
    if start_date is not None: 
        if end_date is not None: 
            query['date'] = {'$gte': start_date, '$lte': end_date} 
        else: 
            query['date'] = {'$gte': start_date} 
    elif end_date is not None: 
        query['date'] = {'$lte': end_date} 
 
    # Load data 
    cur = collection.find(query, prj) 
    df = pd.DataFrame.from_records(cur) 
    if df.empty: 
        df = pd.DataFrame() 
    else: 
        df = df.sort_values(by=['date','skey']) 
    return df  

def build_query(start_date=None, end_date=None, index_id=None):
    query = {}
    def parse_date(x):
        if type(x) == int:
            return x
        elif type(x) == str:
            if len(x) != 8:
                raise Exception("date must be YYYYMMDD format")
            return int(x)
        elif type(x) == datetime.datetime or type(x) == datetime.date:
            return x.strftime("%Y%m%d").astype(int)
        else:
            raise Exception("invalid date type: " + str(type(x)))
    if start_date is not None or end_date is not None:
        query['date'] = {}
        if start_date is not None:
            query['date']['$gte'] = parse_date(start_date)
        if end_date is not None:
            query['date']['$lte'] = parse_date(end_date)
    def parse_symbol(x):
        if type(x) == int:
            return x
        else:
            return int(x)
    if index_id:
        if type(index_id) == list or type(index_id) == tuple:
            query['index_id'] = {'$in': [parse_symbol(x) for x in index_id]}
        else:
            query['index_id'] = parse_symbol(index_id)
    return query

def write_memb_data(db, name, df):
    collection = db[name]
    df1 = []
    for symbol in df['index_id'].unique():
        if symbol in collection.distinct('index_id'):
            symbol = int(symbol)
            m_ax = pd.DataFrame.from_records(collection.find({'index_id':{'$in':[symbol]}}).sort([('date',-1)]).skip(0).limit(1))['date'].values[0]
            df2 = df[(df['index_id'] == symbol) & (df['date'] > m_ax)]
            print(df2)
            df1 += [df2]
        else:
            print(symbol)
            df2 = df[(df['index_id'] == symbol)]
            print(df2)
            df1 += [df2]
    df1 = pd.concat(df1).reset_index(drop=True)
    df1 = df1.to_dict('records')
    collection.insert_many(df1) 

def delete_memb_data(db, name, start_date=None, end_date=None, index_id=None):
    collection = db[name]
    query = build_query(start_date, end_date, index_id)
    if not query:
        print('cannot delete the whole table')
        return None
    collection.delete_many(query)  


database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

pd.set_option('max_columns', 200)
db1 = DB("192.168.10.178", database_name, user, password)


import os
import glob
import datetime
import numpy as np
import pandas as pd

pd.set_option('max_rows', 100)
pd.set_option('max_columns', 100)

perc = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]

startDate = '20201106'
endDate = '20201216'

readPath = '/mnt/equityTradeLogs'
dataPathLs = np.array(glob.glob(os.path.join(readPath, 'speedCompare***.csv')))
dateLs = np.array([os.path.basename(i).split('_')[1].split('.')[0] for i in dataPathLs])
dateLs = dateLs[(dateLs >= startDate) & (dateLs <= endDate)]

index = read_memb_daily(db1, 'index_memb', int(startDate), int(endDate), index_id=[1000300, 1000905, 1000852, 1000985])
index = index[['index_id', 'skey', 'date']].reset_index(drop=True)
index = index.rename(columns={'index_id':'indexCat', 'skey':'secid'})
index.loc[index['indexCat'] == 1000985, 'indexCat'] = 1000852
index['ID'] = index['secid']

# addData = pd.DataFrame({'indexCat': [1000300, 1000905, 1000852], 'ID': [1000300, 1000905, 1000852]})
# indexCatData = pd.concat([indexCatData, addData], sort=False).reset_index(drop=True)

# d1 = pd.read_pickle('/mnt/ShareWithServer/stockBeta_L_IC_60d.pkl').reset_index()
# d1['indexCat'] = 1000905
# d1 = d1[(d1['index'] >= int(startDate)) & (d1['index'] <= int(endDate))]
# d2 = pd.read_pickle('/mnt/ShareWithServer/stockBeta_L_IF_60d.pkl').reset_index()
# d2['indexCat'] = 1000300
# d2 = d2[(d2['index'] >= int(startDate)) & (d2['index'] <= int(endDate))]
# d3 = pd.read_pickle('/mnt/ShareWithServer/stockBeta_L_CSI_60d.pkl').reset_index()
# d3['indexCat'] = 1000852
# d3 = d3[(d3['index'] >= int(startDate)) & (d3['index'] <= int(endDate))]
# d = pd.concat([d1, d2, d3]).sort_values(by='index')
# betaData = []
# for i in d.columns[1:-1]:
#     col = [i] + ['index', 'indexCat']
#     re = d[col]
#     re = re.rename(columns={i: 'beta_60', 'index':'date'})
#     re['secid'] = int(i[2:]) + 2000000 if i[:2] == 'SZ' else int(i[2:]) + 1000000
#     betaData += [re]
# betaData = pd.concat(betaData).reset_index(drop=True)
# betaData['date'] = betaData['date'].apply(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d').date())

d = read_beta_daily(db1, 'mktbeta', int(startDate), int(endDate))
d1 = d[['skey', 'beta_60d_IF', 'date']]
d1 = d1.rename(columns={'beta_60d_IF':"beta_60"})
d1['indexCat'] = 1000300
d2 = d[['skey', 'beta_60d_IC', 'date']]
d2 = d2.rename(columns={'beta_60d_IC':"beta_60"})
d2['indexCat'] = 1000905
d3 = d[['skey', 'beta_60d_CSI1000', 'date']]
d3 = d3.rename(columns={'beta_60d_CSI1000':"beta_60"})
d3['indexCat'] = 1000852
betaData = pd.concat([d1, d2, d3]).reset_index(drop=True)
betaData['date'] = betaData['date'].apply(lambda x: datetime.datetime.strptime(str(x), '%Y%m%d').date())
betaData = betaData.rename(columns={'skey':'secid'})

for date in np.sort(dateLs):
    
    dateDate = datetime.datetime.strptime(date, '%Y%m%d').date()
    dateBetaData = betaData[betaData['date'] == dateDate]
    
    readPath = '/mnt/equityTradeLogs'
    orderLog = pd.read_csv(os.path.join(readPath, 'speedCompare_%s.csv'%date))
    orderLog = orderLog[~orderLog['vai'].isnull()]
    orderLog = orderLog.rename(columns={'mdClockAtArrival': 'caamd'})
    display('There are accounts with duplicated ticks:')
    display(orderLog[orderLog.duplicated(['date', 'secid', 'vai', 'accCode', 'clockAtArrival', 'updateType', \
                                        'orderDirection', 'absOrderSize'], keep=False)]\
    .groupby(['date', 'colo', 'accCode'])['ars'].size())
    orderLog = orderLog.drop_duplicates(['date', 'secid', 'vai', 'accCode', 'clockAtArrival', 'updateType', \
                                        'orderDirection', 'absOrderSize'], keep='first')
    
    display('There are ticks with orderDirection 0')
    display(orderLog[orderLog['orderDirection'] == 0][['date', 'colo', 'accCode', \
                'secid', 'vai', 'updateType', 'sdd', 'orderDirection', 'absOrderSize', 'internalId', 'orderId']])

    assert(orderLog[orderLog['updateType'] == 0][orderLog[orderLog['updateType'] == 0]\
                                                       .duplicated(['date', 'colo', 'accCode', 'secid', 'orderDirection',
                                                                    'vai', 'absOrderSize', 'internalId'], keep=False)].shape[0] == 0)
    try:
        assert(orderLog[(orderLog['updateType'] == 0) & (orderLog['accCode'] != 8856)][orderLog[(orderLog['updateType'] == 0) & (orderLog['accCode'] != 8856)]\
                                                           .duplicated(['date', 'colo', 'accCode', 'secid', 'orderDirection',
                                                                        'absOrderSize', 'internalId'], keep=False)].shape[0] == 0)
    except:
        print('There are orders with all things same except sdd')
        print(orderLog[(orderLog['updateType'] == 0) & (orderLog['accCode'] != 8856)][orderLog[(orderLog['updateType'] == 0) & (orderLog['accCode'] != 8856)]\
                                                           .duplicated(['date', 'colo', 'accCode', 'secid', 'orderDirection',
                                                                        'absOrderSize', 'internalId'], keep=False)])
        assert(orderLog[(orderLog['updateType'] == 0) & (orderLog['accCode'] != 8856)][orderLog[(orderLog['updateType'] == 0) & (orderLog['accCode'] != 8856)]\
                                                           .duplicated(['date', 'colo', 'accCode', 'secid', 'orderDirection',
                                                                        'absOrderSize', 'internalId', 'sdd'], keep=False)].shape[0] == 0)
    try:
        assert(sum(orderLog[(orderLog['updateType'] != 0) & (orderLog['accCode'] != 8856)].groupby(['date', 'colo', 'accCode', 'secid', 
                    'orderDirection', 'absOrderSize', 'internalId'])['orderId'].nunique() != 1) == 0) 
    except:
        print('There are orders with same internalId but different orderId other than accCode 8856 case')
        print(orderLog[(orderLog['updateType'] != 0) & (orderLog['accCode'] != 8856)].groupby(['date', 'colo', 'accCode', 'secid', 
                    'orderDirection', 'absOrderSize', 'internalId'])['orderId'].nunique()[orderLog[(orderLog['updateType'] != 0) & (orderLog['accCode'] != 8856)].groupby(['date', 'colo', 'accCode', 'secid', 
                    'orderDirection', 'absOrderSize', 'internalId'])['orderId'].nunique() > 1])

    r2 = orderLog[(orderLog['accCode'] != 8856) & (orderLog['orderDirection'] != 0)]
    r1 = orderLog[(orderLog['accCode'] == 8856) & (orderLog['orderDirection'] != 0)]
    r1['test'] = r1.groupby(['date', 'colo', 'accCode', 'secid', 
                'orderDirection', 'absOrderSize']).grouper.group_info[0]
    r1 = r1.sort_values(by=['test', 'clockAtArrival'])
    r1.loc[r1['updateType'] != 0, 'vai'] = np.nan
    r1['vai'] = r1.groupby('test')['vai'].ffill()
    r2['test'] = r2.groupby(['date', 'colo', 'accCode', 'secid', 
                'orderDirection', 'absOrderSize', 'internalId']).grouper.group_info[0]
    r2 = r2.sort_values(by=['test', 'clockAtArrival'])
    r2.loc[r2['updateType'] != 0, 'vai'] = np.nan
    r2['vai'] = r2.groupby('test')['vai'].ffill()
    assert(sum(r1[r1['updateType'] != 0].groupby(['test', 'vai'])['orderId'].nunique() != 1) == 0)
    try:
        assert(sum(r2[r2['updateType'] != 0].groupby(['test', 'vai'])['orderId'].nunique() != 1) == 0)
    except:
        a = r2[r2['updateType'] != 0].groupby(['test', 'vai'])['orderId'].nunique()[r2[r2['updateType'] != 0].groupby(['test', 'vai'])['orderId'].nunique() != 1].reset_index()
        print(pd.merge(r2, a[['test', 'vai']], on=['test', 'vai'], how='inner')[['secid', 'accCode', 'colo', 'vai', 'updateType', 'sdd', 'internalId', 'orderId', 'absOrderSize', 'absFilledThisUpdate', 'absOrderSizeCumFilled', 'orderPrice', 'tradePrice']])
    orderLog = pd.concat([r1, r2])
    del r1
    del r2  
    
    orderLog['order'] = orderLog.groupby(['date', 'colo', 'accCode', 'secid', 'vai', 'orderDirection', 'absOrderSize', 'internalId']).grouper.group_info[0]
    orderLog['firstUpdateType'] = orderLog.groupby(['order'])['updateType'].transform('first')
    orderLog['caamd'] = orderLog.groupby('order')['caamd'].transform('first')
    orderLog = orderLog[orderLog['firstUpdateType'] == 0]
    
    orderLog['insertNum'] = np.where(orderLog['updateType'] == 0, 1, 0)
    orderLog['insertNum'] = orderLog.groupby(['order'])['insertNum'].transform('sum')
    orderLog = orderLog[orderLog['insertNum'] == 1]
       
    orderLog['innerSeq'] = orderLog.index.values
    targetStockLs = orderLog['secid'].unique()
    orderLog['firstUpdateType'] = orderLog.groupby(['order'])['updateType'].transform('first')
    orderLog['firstClock'] = orderLog.groupby(['order'])['clockAtArrival'].transform('first')
        
    assert(orderLog[orderLog['firstUpdateType'] != 0].shape[0] == 0)
    indexCatData = index[index['date'] == int(date)]
    orderLog = pd.merge(orderLog, indexCatData[['secid', 'indexCat']], how='left', on=['secid'], validate='many_to_one')
    orderLog = pd.merge(orderLog, dateBetaData[['secid', 'indexCat', 'beta_60']], how='left', on=['secid', 'indexCat'], validate='many_to_one')
    
    readPath = '/mnt/Kevin_zhenyu/rawData/logs_%s_***'%date
    mdDataSHPath = glob.glob(os.path.join(readPath, 'mdLog_SH***.csv'))[-1]
    mdDataSH = pd.read_csv(mdDataSHPath)
    mdDataSH['ID'] = mdDataSH['StockID'] + 1000000
    mdDataSH['time'] = mdDataSH.time.str.slice(0, 2) + mdDataSH.time.str.slice(3, 5) + mdDataSH.time.str.slice(6, 8) + '000'
    mdDataSH['time'] = mdDataSH['time'].astype('int64')
    mdDataSH['time'] = mdDataSH.groupby(['ID'])['time'].cummax()
    mdDataSH['max_cum_volume'] = mdDataSH.groupby(['StockID'])['cum_volume'].cummax()
    indexData = mdDataSH[mdDataSH['StockID'].isin([300, 852, 905])][['ID', 'sequenceNo', 'close']].reset_index(drop=True)
    mdDataSH = mdDataSH[mdDataSH['StockID'] >= 600000]
    mdDataSH = mdDataSH[(mdDataSH['cum_volume'] > 0) & (mdDataSH['time'] >= 93000000) &\
                        (mdDataSH['cum_volume'] == mdDataSH['max_cum_volume'])]
    mdDataSH = mdDataSH[['ID', 'clockAtArrival', 'sequenceNo', 'time', 'cum_volume', 'bid1p', 'ask1p', 'bid1q', 'ask1q', 'bid5q', 'ask5q']]
    
    mdDataSZPath = glob.glob(os.path.join(readPath, 'mdLog_SZ***.csv'))[-1]
    mdDataSZ = pd.read_csv(mdDataSZPath)
    mdDataSZ['ID'] = mdDataSZ['StockID'] + 2000000
    mdDataSZ['time'] = mdDataSZ.time.str.slice(0, 2) + mdDataSZ.time.str.slice(3, 5) + mdDataSZ.time.str.slice(6, 8) + '000'
    mdDataSZ['time'] = mdDataSZ['time'].astype('int64')
    mdDataSZ['time'] = mdDataSZ.groupby(['ID'])['time'].cummax()
    mdDataSZ['max_cum_volume'] = mdDataSZ.groupby(['StockID'])['cum_volume'].cummax()
    mdDataSZ = mdDataSZ[(mdDataSZ['cum_volume'] > 0) & (mdDataSZ['time'] >= 93000000) &\
                        (mdDataSZ['cum_volume'] == mdDataSZ['max_cum_volume'])]
    mdDataSZ = mdDataSZ[['ID', 'clockAtArrival', 'sequenceNo', 'time', 'cum_volume', 'bid1p', 'ask1p', 'bid1q', 'ask1q', 'bid5q', 'ask5q']]
    
    mdData = pd.concat([mdDataSH, mdDataSZ, indexData]).reset_index(drop=True)
    mdData = mdData.sort_values(by=['sequenceNo']).reset_index(drop=True)

    addData = pd.DataFrame({'indexCat': [1000300, 1000905, 1000852], 'ID': [1000300, 1000905, 1000852], 
                            'secid':[1000300, 1000905, 1000852]})
    indexCatData = pd.concat([indexCatData, addData], sort=False).reset_index(drop=True)
    mdData = pd.merge(mdData, indexCatData, how='left', on=['ID'], validate='many_to_one')
    mdData = mdData[~mdData['indexCat'].isnull()].reset_index(drop=True)
    mdData['indexClose'] = np.where(mdData['ID'].isin([1000300, 1000852, 1000905]), mdData['close'], np.nan)
    mdData['indexClose'] = mdData.groupby(['indexCat'])['indexClose'].ffill()
    mdData = mdData[~mdData['ID'].isin([1000300, 1000852, 1000905])].reset_index(drop=True)
    
    mdData = mdData.sort_values(by=['ID', 'sequenceNo']).reset_index(drop=True)
    mdData['safeBid1p'] = np.where(mdData['bid1p'] == 0, mdData['ask1p'], mdData['bid1p'])
    mdData['safeAsk1p'] = np.where(mdData['ask1p'] == 0, mdData['bid1p'], mdData['ask1p'])
    mdData['adjMid'] = (mdData['safeBid1p']*mdData['ask1q'] + mdData['safeAsk1p']*mdData['bid1q'])/(mdData['bid1q'] + mdData['ask1q'])
    
    mdData['session'] = np.where(mdData['time'] >= 130000000, 1, 0)
    def findTmValue(clockLs, tm, method='L', buffer=0):
        maxIx = len(clockLs)
        orignIx = np.arange(maxIx)
        if method == 'F':
            ix = np.searchsorted(clockLs, clockLs+(tm-buffer))
            ## if target future index is next tick, mask
            mask = (orignIx == (ix - 1))|(orignIx == ix)|(ix == maxIx)
        elif method == 'L':
            ## if target future index is last tick, mask
            ix = np.searchsorted(clockLs, clockLs-(tm-buffer))
            ix = ix - 1
            ix[ix<0] = 0
            ## !!!ATTENTION: model3 change
            mask = (orignIx == ix) | ((clockLs-(tm-buffer)).values < clockLs.values[0])
        ix[mask] = -1
        return ix

    mdData = mdData.reset_index(drop=True)
    groupAllData = mdData.groupby(['ID', 'session'])
    mdData['sessionStartCLA'] = groupAllData['clockAtArrival'].transform('min')
    mdData['relativeClock'] = mdData['clockAtArrival'] - mdData['sessionStartCLA']
    mdData['trainFlag'] = np.where(mdData['relativeClock'] > 179.5*1e6, 1, 0)
    mdData['index'] = mdData.index.values
    mdData['sessionStartIx'] = groupAllData['index'].transform('min')
    for tm in [30, 90, 300]:
        tmCol = 'F{}s_ix'.format(tm)
        mdData[tmCol] = groupAllData['relativeClock'].transform(lambda x: findTmValue(x, tm*1e6, 'F', 5*1e5)).astype(int)
    nearLimit = ((mdData.ask5q.values == 0) | (mdData.bid5q.values == 0))
    
    for tm in [30, 90, 300]:
        tmIx = mdData['F{}s_ix'.format(tm)].values + mdData['sessionStartIx'].values
        adjMid_tm = mdData['adjMid'].values[tmIx]
        adjMid_tm[mdData['F{}s_ix'.format(tm)].values == -1] = np.nan
        mdData['adjMid_F{}s'.format(tm)] = adjMid_tm

    for tm in [30, 90, 300]:
        tmIx = mdData['F{}s_ix'.format(tm)].values + mdData['sessionStartIx'].values
        adjMid_tm = mdData['indexClose'].values[tmIx]
        adjMid_tm[mdData['F{}s_ix'.format(tm)].values == -1] = np.nan
        mdData['indexClose_F{}s'.format(tm)] = adjMid_tm
    
    mdData = mdData[mdData['ID'].isin(targetStockLs)]
    mdStartPos = mdData.drop_duplicates(subset=['ID', 'cum_volume'], keep='last')
    mdStartPos = mdStartPos[['ID', 'cum_volume', 'clockAtArrival']].reset_index(drop=True)
    mdStartPos.columns = ['secid', 'vai', 'mdStartClock']
    mdStartPos['isOrder'] = 0
    tradeStartPos = orderLog[orderLog['updateType'] == 0][['secid', 'vai', 'order']].reset_index(drop=True)
    tradeStartPos['isOrder'] = 1
    tradeStartPos = pd.concat([mdStartPos, tradeStartPos], sort=False)
    tradeStartPos = tradeStartPos.sort_values(by=['secid', 'vai', 'isOrder'])
    tradeStartPos['mdStartClock'] = tradeStartPos.groupby(['secid'])['mdStartClock'].ffill()
    tradeStartPos['mdStartClock'] = tradeStartPos.groupby(['secid'])['mdStartClock'].backfill()
    tradeStartPos = tradeStartPos[tradeStartPos['isOrder'] == 1][['secid', 'vai', 'order', 'mdStartClock']]
    
    orderLog = pd.merge(orderLog, tradeStartPos[['order', 'mdStartClock']], how='left', on=['order'], validate='many_to_one')
    orderLog['mdClockAtArrival'] = orderLog['clockAtArrival'] - orderLog['caamd'] + orderLog['mdStartClock']
    
    tradeData = orderLog[['secid', 'mdClockAtArrival', 'innerSeq']].reset_index(drop=True)
    tradeData.columns = ['ID', 'clockAtArrival', 'innerSeq']
    tradeData['isOrder'] = 1
    
    mdData = pd.concat([mdData, tradeData], sort=False)
    mdData = mdData.sort_values(by=['ID', 'clockAtArrival', 'isOrder', 'innerSeq']).reset_index(drop=True)
    for col in ['indexClose', 'adjMid_F30s', 'indexClose_F30s', 'adjMid_F90s', 'indexClose_F90s',
                'adjMid_F300s', 'indexClose_F300s']:
        mdData[col] = mdData.groupby(['ID'])[col].backfill()
        mdData[col] = mdData.groupby(['ID'])[col].ffill()

    tradeData = mdData[mdData['isOrder'] == 1][['ID', 'innerSeq', 'adjMid_F30s', 'adjMid_F90s', 'adjMid_F300s',
                                                'indexClose', 'indexClose_F30s', 'indexClose_F90s', 'indexClose_F300s']].reset_index(drop=True)
    tradeData = tradeData.rename(columns={'ID': 'secid'})
    orderLog = pd.merge(orderLog, tradeData, how='left', on=['secid', 'innerSeq'], validate='one_to_one')

    savePath = '/mnt/orderLog/ret'
    orderLog.to_pickle(os.path.join(savePath, 'orderLogWithRet_%s.pkl'%date))

  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

date      colo      accCode
20201106  zs_88_04  892402     11272
          zs_96_02  9655        1450
          zt_88_02  897102     20282
Name: ars, dtype: int64

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
81968,20201106,zs_94_03,9461,2002179.0,-1.0,7.0,50293.0,0.0,0.0,-1.0,-1.0
138507,20201106,zs_96_06,9758,2300653.0,-1.0,7.0,36673.0,0.0,0.0,-1.0,7.213082e+17
212878,20201106,zs_54_01,5470,2000565.0,-1.0,7.0,40926.0,0.0,0.0,-1.0,-1.0
214294,20201106,zs_54_01,5470,2000731.0,-1.0,1.0,53406.0,0.0,0.0,-1.0,868908300.0
216299,20201106,zs_54_01,5474,2300795.0,-1.0,1.0,36037.0,0.0,0.0,-1.0,868902100.0
221139,20201106,zs_54_01,5474,2300108.0,-1.0,1.0,47148.0,0.0,0.0,-1.0,868905700.0
226303,20201106,zs_94_05,9471,2000049.0,-1.0,7.0,35262.0,0.0,0.0,-1.0,-1.0
231049,20201106,zs_94_05,9471,2300724.0,-1.0,7.0,37303.0,0.0,0.0,-1.0,-1.0
484876,20201106,zs_64_01,6480,2002068.0,-1.0,1.0,35219.0,0.0,0.0,-1.0,282576.0
493165,20201106,zs_64_01,6480,2002660.0,-1.0,1.0,51063.0,0.0,0.0,-1.0,402333.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
70443,20201109,zs_94_05,9451,2300445.0,-1.0,7.0,53351.0,0.0,0.0,-1.0,-1.0
157840,20201109,zs_96_06,9758,2300725.0,-1.0,1.0,34844.0,0.0,0.0,-1.0,7.213082e+17
215068,20201109,zs_52_06,5269,2000037.0,-1.0,1.0,34991.0,0.0,0.0,-1.0,310326.0
216257,20201109,zs_52_06,5269,2300493.0,-1.0,1.0,35066.0,0.0,0.0,-1.0,330260.0
223179,20201109,zs_52_06,5269,2002666.0,-1.0,1.0,46881.0,0.0,0.0,-1.0,1143687.0
521575,20201109,zs_64_01,6480,2002527.0,-1.0,1.0,47449.0,0.0,0.0,-1.0,358816.0
529216,20201109,zs_52_08,5281,2000708.0,-1.0,1.0,40003.0,0.0,0.0,-1.0,1005424.0
596302,20201109,zs_96_08,9685,2000601.0,-1.0,1.0,48119.0,0.0,0.0,-1.0,7.221449e+17
599103,20201109,zs_96_08,9685,2000901.0,-1.0,1.0,52953.0,0.0,0.0,-1.0,7.221449e+17
1002197,20201109,zs_66_01,6631,2300201.0,-1.0,7.0,34247.0,0.0,0.0,-1.0,-1.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
62084,20201110,zs_94_05,9454,2300625.0,-1.0,7.0,37282.0,0.0,0.0,-1.0,-1.0
158084,20201110,zs_96_06,9756,2002735.0,-1.0,7.0,39611.0,0.0,0.0,-1.0,-1.0
180148,20201110,zs_52_06,5269,2300221.0,-1.0,1.0,37145.0,0.0,0.0,-1.0,737614.0
184353,20201110,zs_52_06,5269,2300545.0,-1.0,1.0,40900.0,0.0,0.0,-1.0,1108854.0
190148,20201110,zs_52_06,5269,2002623.0,-1.0,1.0,53405.0,0.0,0.0,-1.0,1698847.0
195224,20201110,zs_54_01,5470,2002003.0,-1.0,1.0,50178.0,0.0,0.0,-1.0,868908000.0
203048,20201110,zs_54_01,5474,2300174.0,-1.0,1.0,46863.0,0.0,0.0,-1.0,868905400.0
206404,20201110,zs_54_01,5474,2002003.0,-1.0,1.0,50178.0,0.0,0.0,-1.0,868908000.0
443164,20201110,zs_64_01,6480,2300540.0,-1.0,1.0,48146.0,0.0,0.0,-1.0,370777.0
503592,20201110,zs_96_08,9685,2300409.0,-1.0,7.0,36041.0,0.0,0.0,-1.0,-1.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
1333,20201111,zs_92_01,9208,2002261.0,-1.0,1.0,49722.0,0.0,0.0,-1.0,8250502000.0
3516,20201111,zs_92_02,9243,2000710.0,-1.0,1.0,51691.0,0.0,0.0,-1.0,8250503000.0
203382,20201111,zs_52_06,5269,2300493.0,-1.0,1.0,35301.0,0.0,0.0,-1.0,382832.0
218987,20201111,zs_54_01,5474,2300488.0,-1.0,1.0,35905.0,0.0,0.0,-1.0,868901700.0
224407,20201111,zs_54_01,5474,2002903.0,-1.0,1.0,40277.0,0.0,0.0,-1.0,868905300.0
226747,20201111,zs_54_01,5474,2002180.0,-1.0,1.0,49658.0,0.0,0.0,-1.0,868907400.0
513854,20201111,zs_52_08,5281,2002236.0,-1.0,1.0,34298.0,0.0,0.0,-1.0,82912.0
514664,20201111,zs_52_08,5281,2002558.0,-1.0,1.0,36367.0,0.0,0.0,-1.0,569116.0
572010,20201111,zs_96_08,9685,2002850.0,-1.0,1.0,47812.0,0.0,0.0,-1.0,7.221449e+17
679413,20201111,zs_52_09,5291,2002056.0,-1.0,1.0,38352.0,0.0,0.0,-1.0,835382.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

date      colo      accCode
20201112  zs_96_08  974101     27540
          zt_88_06  8971       25984
Name: ars, dtype: int64

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
457,20201112,zs_92_01,9208,2300222.0,-1.0,1.0,35292.0,0.0,0.0,-1.0,8250500000.0
1322,20201112,zs_92_01,9208,2002562.0,-1.0,1.0,47100.0,0.0,0.0,-1.0,8250502000.0
71831,20201112,zs_94_05,9454,2002779.0,-1.0,7.0,47503.0,0.0,0.0,-1.0,-1.0
193305,20201112,zs_52_06,5269,2300807.0,-1.0,1.0,37602.0,0.0,0.0,-1.0,664043.0
197427,20201112,zs_52_06,5269,2002931.0,-1.0,1.0,48231.0,0.0,0.0,-1.0,1112798.0
201067,20201112,zs_52_06,5269,2300076.0,-1.0,1.0,53586.0,0.0,0.0,-1.0,1496638.0
202546,20201112,zs_54_01,5470,2300809.0,-1.0,1.0,36139.0,0.0,0.0,-1.0,868902400.0
203274,20201112,zs_54_01,5470,2300154.0,-1.0,1.0,39433.0,0.0,0.0,-1.0,868904100.0
207037,20201112,zs_54_01,5474,2300479.0,-1.0,7.0,35937.0,0.0,0.0,-1.0,868901500.0
213858,20201112,zs_54_01,5474,2300721.0,-1.0,1.0,48499.0,0.0,0.0,-1.0,868906700.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
303,20201113,zs_92_01,9208,2000030.0,-1.0,1.0,35109.0,0.0,0.0,-1.0,8250501000.0
64584,20201113,zs_94_05,9454,2002779.0,-1.0,7.0,37485.0,0.0,0.0,-1.0,-1.0
138342,20201113,zs_96_06,9765,2002714.0,-1.0,7.0,34309.0,0.0,0.0,-1.0,-1.0
188026,20201113,zs_52_06,5269,2300410.0,-1.0,1.0,35024.0,0.0,0.0,-1.0,290745.0
190690,20201113,zs_52_06,5269,2300597.0,-1.0,7.0,35298.0,0.0,0.0,-1.0,-1.0
192784,20201113,zs_52_06,5269,2000566.0,-1.0,1.0,37313.0,0.0,0.0,-1.0,677552.0
195285,20201113,zs_52_06,5269,2300514.0,-1.0,1.0,40728.0,0.0,0.0,-1.0,970269.0
196721,20201113,zs_52_06,5269,2002321.0,-1.0,1.0,48006.0,0.0,0.0,-1.0,1117009.0
206277,20201113,zs_54_01,5470,2300796.0,-1.0,1.0,50426.0,0.0,0.0,-1.0,868908200.0
206762,20201113,zs_54_01,5470,2002613.0,-1.0,1.0,52448.0,0.0,0.0,-1.0,868909200.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
95945,20201116,zs_96_08,9754,2002821.0,-1.0,7.0,40356.0,0.0,0.0,-1.0,-1.0
192297,20201116,zs_52_06,5269,2002362.0,-1.0,1.0,37275.0,0.0,0.0,-1.0,644154.0
195560,20201116,zs_52_06,5269,2300688.0,-1.0,1.0,40339.0,0.0,0.0,-1.0,925152.0
198494,20201116,zs_52_06,5269,2002921.0,-1.0,1.0,48770.0,0.0,0.0,-1.0,1173793.0
199662,20201116,zs_52_06,5269,2002760.0,-1.0,1.0,50732.0,0.0,0.0,-1.0,1309682.0
200677,20201116,zs_52_06,5269,2300264.0,-1.0,1.0,52702.0,0.0,0.0,-1.0,1422141.0
200850,20201116,zs_52_06,5269,2002871.0,-1.0,1.0,52878.0,0.0,0.0,-1.0,1431586.0
205182,20201116,zs_54_01,5470,2002686.0,-1.0,1.0,47936.0,0.0,0.0,-1.0,868906700.0
205354,20201116,zs_54_01,5470,2300049.0,-1.0,1.0,48800.0,0.0,0.0,-1.0,868907300.0
205465,20201116,zs_54_01,5470,2000565.0,-1.0,1.0,49358.0,0.0,0.0,-1.0,868907600.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
13454,20201117,zt_94_06,9551,1600740.0,-1.0,1.0,-1.0,0.0,0.0,-1.0,2002655.0
13455,20201117,zt_94_06,9551,1603345.0,-1.0,1.0,-1.0,0.0,0.0,-1.0,2002682.0
181242,20201117,zs_52_06,5269,2002160.0,-1.0,1.0,35072.0,0.0,0.0,-1.0,308507.0
184598,20201117,zs_52_06,5269,2300671.0,-1.0,1.0,37399.0,0.0,0.0,-1.0,664932.0
193572,20201117,zs_54_01,5470,2300391.0,-1.0,1.0,53228.0,0.0,0.0,-1.0,868907900.0
455732,20201117,zs_64_01,6480,2300611.0,-1.0,1.0,34993.0,0.0,0.0,-1.0,25805.0
463868,20201117,zs_64_01,6480,2300627.0,-1.0,1.0,48873.0,0.0,0.0,-1.0,114881.0
519759,20201117,zs_96_08,9685,2002036.0,-1.0,7.0,39755.0,0.0,0.0,-1.0,-1.0
875012,20201117,zs_66_01,6631,2300321.0,-1.0,1.0,38434.0,0.0,0.0,-1.0,18128070000.0
876298,20201117,zs_66_01,6631,2000407.0,-1.0,1.0,48076.0,0.0,0.0,-1.0,18128090000.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
190345,20201118,zs_52_06,5269,2002136.0,-1.0,1.0,37186.0,0.0,0.0,-1.0,649746.0
194073,20201118,zs_52_06,5269,2300385.0,-1.0,1.0,40507.0,0.0,0.0,-1.0,970666.0
194691,20201118,zs_52_06,5269,2000423.0,-1.0,1.0,41259.0,0.0,0.0,-1.0,1037098.0
196409,20201118,zs_52_06,5269,2000782.0,-1.0,1.0,48702.0,0.0,0.0,-1.0,1220928.0
197284,20201118,zs_52_06,5269,2002659.0,-1.0,1.0,50428.0,0.0,0.0,-1.0,1358607.0
474507,20201118,zs_64_01,6480,2000526.0,-1.0,1.0,40692.0,0.0,0.0,-1.0,95488.0
532558,20201118,zs_96_08,9685,2300429.0,-1.0,7.0,35876.0,0.0,0.0,-1.0,-1.0
539005,20201118,zs_96_08,9685,2002345.0,-1.0,1.0,51606.0,0.0,0.0,-1.0,7.221449e+17
600948,20201118,zs_52_09,5290,2000417.0,-1.0,1.0,-1.0,0.0,0.0,-1.0,1045697.0
600949,20201118,zs_52_09,5290,2000417.0,-1.0,3.0,-1.0,0.0,0.0,-1.0,1045697.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
19231,20201119,zs_54_01,5470,2300729.0,-1.0,1.0,39412.0,0.0,0.0,-1.0,868904100.0
22469,20201119,zs_54_01,5470,2002540.0,-1.0,1.0,52564.0,0.0,0.0,-1.0,868908900.0
171266,20201119,zs_66_01,6631,2300722.0,-1.0,1.0,34398.0,0.0,0.0,-1.0,18128010000.0
171357,20201119,zs_66_01,6631,2002795.0,-1.0,1.0,34410.0,0.0,0.0,-1.0,18128010000.0
175059,20201119,zs_66_01,6631,2002140.0,-1.0,7.0,40002.0,0.0,0.0,-1.0,-1.0
176241,20201119,zs_66_01,6631,2002787.0,-1.0,7.0,47366.0,0.0,0.0,-1.0,-1.0
177235,20201119,zs_66_01,6631,2002398.0,-1.0,1.0,49284.0,0.0,0.0,-1.0,18128090000.0
244151,20201119,zs_54_01,5474,2002198.0,-1.0,1.0,53324.0,0.0,0.0,-1.0,868909300.0
293102,20201119,zs_66_01,6634,2300637.0,-1.0,1.0,38523.0,0.0,0.0,-1.0,18128060000.0
297172,20201119,zs_66_01,6634,2000668.0,-1.0,7.0,49602.0,0.0,0.0,-1.0,18128090000.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
21835,20201120,zs_64_01,6480,2002588.0,-1.0,1.0,40612.0,0.0,0.0,-1.0,84954.0
25023,20201120,zs_64_01,6480,2002548.0,-1.0,1.0,50434.0,0.0,0.0,-1.0,111989.0
26113,20201120,zs_64_01,6480,2002732.0,-1.0,1.0,52199.0,0.0,0.0,-1.0,127059.0
170473,20201120,zs_54_01,5470,2002571.0,-1.0,7.0,49023.0,0.0,0.0,-1.0,868907100.0
228163,20201120,zs_52_06,5269,2000610.0,-1.0,1.0,49400.0,0.0,0.0,-1.0,1086587.0
229621,20201120,zs_52_06,5269,2002735.0,-1.0,1.0,52599.0,0.0,0.0,-1.0,1288279.0
294884,20201120,zt_88_03,8833,2002967.0,-1.0,1.0,35559.0,0.0,0.0,-1.0,21000350.0
313955,20201120,zs_52_09,5291,2300459.0,-1.0,1.0,37825.0,0.0,0.0,-1.0,618183.0
552476,20201120,zs_66_01,6634,2300218.0,-1.0,1.0,34391.0,0.0,0.0,-1.0,18128010000.0
557909,20201120,zs_66_01,6634,2000558.0,-1.0,1.0,46910.0,0.0,0.0,-1.0,18128060000.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
72905,20201123,zs_52_06,5269,2000762.0,-1.0,1.0,35202.0,0.0,0.0,-1.0,334722.0
75082,20201123,zs_52_06,5269,2002909.0,-1.0,1.0,37294.0,0.0,0.0,-1.0,656427.0
75196,20201123,zs_52_06,5269,2002881.0,-1.0,1.0,37383.0,0.0,0.0,-1.0,664123.0
77423,20201123,zs_52_06,5269,2300331.0,-1.0,1.0,39750.0,0.0,0.0,-1.0,888709.0
79559,20201123,zs_52_06,5269,2002883.0,-1.0,1.0,47244.0,0.0,0.0,-1.0,1087281.0
81161,20201123,zs_52_06,5269,2300789.0,-1.0,1.0,48754.0,0.0,0.0,-1.0,1221986.0
100102,20201123,zs_54_01,5470,2002483.0,-1.0,1.0,47870.0,0.0,0.0,-1.0,868906400.0
183009,20201123,zs_96_06,9758,2300327.0,-1.0,1.0,36565.0,0.0,0.0,-1.0,7.213082e+17
265142,20201123,zs_52_09,5291,2002078.0,-1.0,1.0,53305.0,0.0,0.0,-1.0,1549135.0
393655,20201123,zs_66_01,6634,2002057.0,-1.0,1.0,38953.0,0.0,0.0,-1.0,18128060000.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
248252,20201124,zs_92_02,9243,2002413.0,-1.0,1.0,47653.0,0.0,0.0,-1.0,8250502000.0
292478,20201124,zs_54_01,5474,2002828.0,-1.0,1.0,39984.0,0.0,0.0,-1.0,868903800.0
292968,20201124,zs_54_01,5474,2000655.0,-1.0,1.0,40640.0,0.0,0.0,-1.0,868904100.0
293712,20201124,zs_54_01,5474,2300779.0,-1.0,1.0,46809.0,0.0,0.0,-1.0,868904500.0
293717,20201124,zs_54_01,5474,2000407.0,-1.0,1.0,46812.0,0.0,0.0,-1.0,868904600.0
404296,20201124,zs_66_01,6631,2002917.0,-1.0,1.0,34264.0,0.0,0.0,-1.0,18128000000.0
408194,20201124,zs_66_01,6631,2002090.0,-1.0,1.0,36665.0,0.0,0.0,-1.0,18128040000.0
421273,20201124,zs_96_08,9685,2002215.0,-1.0,1.0,51347.0,0.0,0.0,-1.0,7.221449e+17
474283,20201124,zs_54_01,5470,2300779.0,-1.0,1.0,46809.0,0.0,0.0,-1.0,868904500.0
573787,20201124,zs_66_01,6634,2300286.0,-1.0,1.0,34370.0,0.0,0.0,-1.0,18128010000.0


There are orders with all things same except sdd
        ApplSeqNum       aaa  absFilledThisUpdate  absOrderSize  \
342984   2320094.0  0.001460                  0.0         100.0   
344586         0.0  0.001793                  0.0         100.0   

        absOrderSizeCumFilled  accCode  ars         caamd  \
342984                    0.0   966701  1.0  1.606187e+15   
344586                    0.0   966701  1.0  1.606182e+15   

        cancellationPending         cfe         clock  clockAtArrival  \
342984                  0.0  3252543.10  1.606187e+15    1.606187e+15   
344586                  0.0  8782423.05  1.606182e+15    1.606182e+15   

        cumSharesBought  cumSharesBuyInserted  cumSharesSellInserted  \
342984              0.0                 100.0                    0.0   
344586              0.0                 100.0                    0.0   

        cumSharesSold      date  finalState         gfe       hee  \
342984            0.0  20201124         0.0  3252542.10  0.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
32878,20201125,zs_52_06,5269,2002057.0,-1.0,1.0,37459.0,0.0,0.0,-1.0,657431.0
33615,20201125,zs_52_06,5269,2300509.0,-1.0,1.0,37823.0,0.0,0.0,-1.0,695486.0
34821,20201125,zs_52_06,5269,2002678.0,-1.0,1.0,39035.0,0.0,0.0,-1.0,822326.0
40122,20201125,zs_52_06,5269,2002571.0,-1.0,1.0,53533.0,0.0,0.0,-1.0,1556204.0
304483,20201125,zs_66_01,6634,2300642.0,-1.0,7.0,36827.0,0.0,0.0,-1.0,-1.0
307240,20201125,zs_66_01,6634,2300320.0,-1.0,1.0,40724.0,0.0,0.0,-1.0,18128070000.0
308225,20201125,zs_66_01,6634,2002743.0,-1.0,1.0,47275.0,0.0,0.0,-1.0,18128080000.0
391174,20201125,zs_52_06,5287,2300674.0,-1.0,1.0,47824.0,0.0,0.0,-1.0,1103285.0
399963,20201125,zs_54_01,5474,2300693.0,-1.0,1.0,36051.0,0.0,0.0,-1.0,868901700.0
445186,20201125,zs_66_01,6631,2002893.0,-1.0,1.0,50357.0,0.0,0.0,-1.0,18128090000.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
2605,20201126,zs_92_02,9243,2002433.0,-1.0,1.0,50418.0,0.0,0.0,-1.0,8250502000.0
123171,20201126,zs_96_06,9758,2300776.0,-1.0,7.0,36797.0,0.0,0.0,-1.0,-1.0
175825,20201126,zs_52_06,5269,2000762.0,-1.0,1.0,52425.0,0.0,0.0,-1.0,1416625.0
176240,20201126,zs_52_06,5269,2300648.0,-1.0,1.0,53704.0,0.0,0.0,-1.0,1502975.0
185722,20201126,zs_54_01,5474,2300334.0,-1.0,1.0,40167.0,0.0,0.0,-1.0,868904400.0


There are orders with all things same except sdd
        ApplSeqNum       aaa  absFilledThisUpdate  absOrderSize  \
823855   8329262.0  0.000178                  0.0         200.0   
834881   8183721.0  0.000822                  0.0         200.0   

        absOrderSizeCumFilled  accCode  ars         caamd  \
823855                    0.0     5386  1.0  1.606358e+15   
834881                    0.0     5386  1.0  1.606358e+15   

        cancellationPending         cfe         clock  clockAtArrival  \
823855                  0.0  1406389.96  1.606358e+15    1.606358e+15   
834881                  0.0      630.82  1.606358e+15    1.606358e+15   

        cumSharesBought  cumSharesBuyInserted  cumSharesSellInserted  \
823855              0.0                   0.0                 1300.0   
834881              0.0                   0.0                  200.0   

        cumSharesSold      date  finalState         gfe       hee  \
823855         1100.0  20201126         0.0  1406389.96 -0.

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
2852,20201127,zs_92_02,9243,2002139.0,-1.0,1.0,51778.0,0.0,0.0,-1.0,8250503000.0
2955,20201127,zs_92_02,9243,2300766.0,-1.0,1.0,53060.0,0.0,0.0,-1.0,8250503000.0
175559,20201127,zs_52_06,5269,2300575.0,-1.0,1.0,37249.0,0.0,0.0,-1.0,585703.0
178626,20201127,zs_52_06,5269,2300757.0,-1.0,1.0,39005.0,0.0,0.0,-1.0,753810.0
179178,20201127,zs_52_06,5269,2300759.0,-1.0,1.0,39690.0,0.0,0.0,-1.0,807882.0
184272,20201127,zs_52_06,5269,2300669.0,-1.0,7.0,52319.0,0.0,0.0,-1.0,1373632.0
185790,20201127,zs_52_06,5269,2300712.0,-1.0,1.0,34981.0,0.0,0.0,-1.0,244134.0
186747,20201127,zs_52_06,5269,2300606.0,-1.0,1.0,35034.0,0.0,0.0,-1.0,254774.0
186967,20201127,zs_52_06,5269,2300153.0,-1.0,1.0,35044.0,0.0,0.0,-1.0,258048.0
467733,20201127,zs_64_01,6480,2300649.0,-1.0,1.0,41119.0,0.0,0.0,-1.0,95653.0


There are orders with all things same except sdd
        ApplSeqNum       aaa  absFilledThisUpdate  absOrderSize  \
156489  11015022.0   0.00144                  0.0         900.0   
173291   8140893.0  0.001123                  0.0         900.0   
378788         0.0  0.001279                  0.0         100.0   
381889         0.0  0.000625                  0.0         100.0   
498016   3703455.0  0.001039                  0.0         100.0   
500426   6023487.0  0.000441                  0.0         100.0   
505808    292410.0  0.002062                  0.0         100.0   
507926   2546370.0  0.001065                  0.0         100.0   
551987         0.0  0.003066                  0.0         100.0   
560484         0.0  0.001568                  0.0         100.0   

        absOrderSizeCumFilled  accCode  ars         caamd  \
156489                    0.0     9756  1.0  1.606447e+15   
173291                    0.0     9756  1.0  1.606444e+15   
378788                    0.0 

There are orders with same internalId but different orderId other than accCode 8856 case
date      colo      accCode  secid      orderDirection  absOrderSize  internalId
20201127  zs_96_06  9756     2300329.0  -1.0            900.0         1043.0        2
          zt_96_09  966701   1603833.0   1.0            100.0         263.0         2
Name: orderId, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

date      colo      accCode
20201130  zs_92_02  9243       2150
Name: ars, dtype: int64

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
66734,20201130,zs_54_01,5456,2300109.0,-1.0,1.0,36103.0,0.0,0.0,-1.0,868901600.0
111999,20201130,zs_96_06,9758,2002838.0,-1.0,1.0,34722.0,0.0,0.0,-1.0,7.213082e+17
166531,20201130,zs_52_06,5269,2002571.0,-1.0,1.0,35051.0,0.0,0.0,-1.0,332877.0
167315,20201130,zs_52_06,5269,2002931.0,-1.0,1.0,35098.0,0.0,0.0,-1.0,340719.0
167942,20201130,zs_52_06,5269,2300782.0,-1.0,1.0,35206.0,0.0,0.0,-1.0,385113.0
168216,20201130,zs_52_06,5269,2300471.0,-1.0,1.0,35298.0,0.0,0.0,-1.0,407681.0
169853,20201130,zs_52_06,5269,2300201.0,-1.0,1.0,37369.0,0.0,0.0,-1.0,730655.0
191868,20201130,zs_54_01,5474,2002334.0,-1.0,1.0,47722.0,0.0,0.0,-1.0,868905400.0
191979,20201130,zs_54_01,5474,2300696.0,-1.0,7.0,47893.0,0.0,0.0,-1.0,-1.0
193135,20201130,zs_54_01,5474,2300436.0,-1.0,1.0,50317.0,0.0,0.0,-1.0,868906600.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
72367,20201201,zs_52_06,5269,2300402.0,-1.0,1.0,35269.0,0.0,0.0,-1.0,299645.0
81580,20201201,zs_52_06,5269,2002054.0,-1.0,1.0,53519.0,0.0,0.0,-1.0,1565274.0
552928,20201201,zs_54_01,5474,2002090.0,-1.0,7.0,39529.0,0.0,0.0,-1.0,868905360.0
553596,20201201,zs_54_01,5474,2002729.0,-1.0,1.0,39751.0,0.0,0.0,-1.0,868906141.0
556222,20201201,zs_54_01,5474,2300635.0,-1.0,1.0,47774.0,0.0,0.0,-1.0,868910469.0
558346,20201201,zs_54_01,5474,2300708.0,-1.0,1.0,52912.0,0.0,0.0,-1.0,868916039.0
689700,20201201,zs_52_09,5291,2002191.0,-1.0,1.0,47233.0,0.0,0.0,-1.0,1105797.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
41579,20201202,zs_64_01,6480,2002817.0,-1.0,1.0,41016.0,0.0,0.0,-1.0,85855.0
43116,20201202,zs_64_01,6480,2002116.0,-1.0,1.0,48949.0,0.0,0.0,-1.0,102655.0
45780,20201202,zs_64_01,6480,2000554.0,-1.0,1.0,53781.0,0.0,0.0,-1.0,137333.0
112034,20201202,zs_96_08,9685,2300793.0,-1.0,1.0,48112.0,0.0,0.0,-1.0,7.221449e+17
113709,20201202,zs_96_08,9685,2002167.0,-1.0,1.0,53797.0,0.0,0.0,-1.0,7.221449e+17
134328,20201202,zs_52_06,5269,2300225.0,-1.0,1.0,35013.0,0.0,0.0,-1.0,268032.0
134637,20201202,zs_52_06,5269,2300758.0,-1.0,1.0,35073.0,0.0,0.0,-1.0,277794.0
137476,20201202,zs_52_06,5269,2300575.0,-1.0,1.0,37360.0,0.0,0.0,-1.0,628938.0
138187,20201202,zs_52_06,5269,2002347.0,-1.0,1.0,38271.0,0.0,0.0,-1.0,728629.0
138599,20201202,zs_52_06,5269,2000893.0,-1.0,1.0,38864.0,0.0,0.0,-1.0,767161.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
36731,20201203,zs_66_01,6634,2300375.0,-1.0,1.0,34458.0,0.0,0.0,-1.0,18128000000.0
38786,20201203,zs_66_01,6634,2002909.0,-1.0,7.0,36669.0,0.0,0.0,-1.0,18128030000.0
40192,20201203,zs_66_01,6634,2002272.0,-1.0,1.0,39134.0,0.0,0.0,-1.0,18128050000.0
41836,20201203,zs_66_01,6634,2002902.0,-1.0,1.0,47300.0,0.0,0.0,-1.0,18128070000.0
152748,20201203,zs_52_06,5269,2002066.0,-1.0,1.0,39804.0,0.0,0.0,-1.0,824994.0
152960,20201203,zs_52_06,5269,2300530.0,-1.0,1.0,40086.0,0.0,0.0,-1.0,859494.0
164107,20201203,zs_54_01,5474,2002886.0,-1.0,1.0,49242.0,0.0,0.0,-1.0,868913400.0
225677,20201203,zs_96_08,9741,2300473.0,-1.0,7.0,47023.0,0.0,0.0,-1.0,-1.0
257806,20201203,zs_52_09,5291,2002839.0,-1.0,1.0,34402.0,0.0,0.0,-1.0,77334.0
335961,20201203,zs_94_05,9471,2300232.0,-1.0,7.0,39749.0,0.0,0.0,-1.0,-1.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
86935,20201204,zs_54_01,5474,2002553.0,-1.0,1.0,36111.0,0.0,0.0,-1.0,868904600.0
90859,20201204,zs_54_01,5474,2002297.0,-1.0,1.0,48345.0,0.0,0.0,-1.0,868911400.0
93127,20201204,zs_54_01,5474,2000828.0,-1.0,1.0,53258.0,0.0,0.0,-1.0,868915100.0
214148,20201204,zs_66_01,6634,2300742.0,-1.0,7.0,34534.0,0.0,0.0,-1.0,-1.0
217476,20201204,zs_66_01,6634,2002917.0,-1.0,1.0,41354.0,0.0,0.0,-1.0,18128060000.0
352720,20201204,zs_96_08,9685,2000403.0,-1.0,1.0,50390.0,0.0,0.0,-1.0,7.221449e+17
353712,20201204,zs_96_08,9685,2300662.0,-1.0,1.0,52879.0,0.0,0.0,-1.0,7.221449e+17
455242,20201204,zs_96_08,9741,2300656.0,-1.0,7.0,34827.0,0.0,0.0,-1.0,-1.0
471047,20201204,zs_64_01,6480,2000545.0,-1.0,7.0,35341.0,0.0,0.0,-1.0,-1.0
472575,20201204,zs_64_01,6480,2300782.0,-1.0,1.0,38724.0,0.0,0.0,-1.0,66461.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
141595,20201207,zs_94_05,9454,2300468.0,-1.0,7.0,37155.0,0.0,0.0,-1.0,-1.0
269827,20201207,zs_66_01,6634,2002177.0,-1.0,1.0,50844.0,0.0,0.0,-1.0,18128090000.0
270146,20201207,zs_66_01,6634,2002753.0,-1.0,1.0,51509.0,0.0,0.0,-1.0,18128090000.0
378464,20201207,zs_52_08,5281,2002410.0,-1.0,1.0,34461.0,0.0,0.0,-1.0,110580.0
429688,20201207,zs_54_01,5474,2000565.0,-1.0,1.0,52618.0,0.0,0.0,-1.0,868916400.0
516716,20201207,zs_52_06,5269,2002808.0,-1.0,1.0,34995.0,0.0,0.0,-1.0,234434.0
517457,20201207,zs_52_06,5269,2002553.0,-1.0,1.0,35149.0,0.0,0.0,-1.0,262748.0
522097,20201207,zs_52_06,5269,2300175.0,-1.0,1.0,40457.0,0.0,0.0,-1.0,872328.0
550502,20201207,zs_52_09,5291,2300296.0,-1.0,1.0,39960.0,0.0,0.0,-1.0,826252.0
558213,20201207,zs_96_08,9685,2000802.0,-1.0,7.0,40338.0,0.0,0.0,-1.0,-1.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
84883,20201208,zs_64_01,6480,2300218.0,-1.0,1.0,40630.0,0.0,0.0,-1.0,73412.0
87044,20201208,zs_64_01,6480,2300196.0,-1.0,1.0,53554.0,0.0,0.0,-1.0,122058.0
454834,20201208,zs_52_06,5269,2300782.0,-1.0,1.0,35050.0,0.0,0.0,-1.0,217249.0
460965,20201208,zs_52_06,5269,2300622.0,-1.0,1.0,49048.0,0.0,0.0,-1.0,1059312.0
470858,20201208,zs_54_01,5474,2002438.0,-1.0,1.0,39794.0,0.0,0.0,-1.0,868908065.0
470962,20201208,zs_54_01,5474,2300622.0,-1.0,1.0,39922.0,0.0,0.0,-1.0,868908139.0
471481,20201208,zs_54_01,5474,2002749.0,-1.0,1.0,40427.0,0.0,0.0,-1.0,868908385.0
471871,20201208,zs_54_01,5474,2002453.0,-1.0,1.0,41009.0,0.0,0.0,-1.0,868908869.0
473444,20201208,zs_54_01,5474,2300220.0,-1.0,1.0,51058.0,0.0,0.0,-1.0,868912010.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
76956,20201209,zs_94_05,9471,2300616.0,-1.0,7.0,53641.0,0.0,0.0,-1.0,-1.0
260470,20201209,zs_52_06,5269,2000619.0,-1.0,1.0,35273.0,0.0,0.0,-1.0,259441.0
262281,20201209,zs_52_06,5269,2300147.0,-1.0,1.0,37766.0,0.0,0.0,-1.0,601415.0
362206,20201209,zs_96_08,9685,2002850.0,-1.0,7.0,50350.0,0.0,0.0,-1.0,-1.0
393805,20201209,zs_96_06,9756,2300341.0,-1.0,7.0,41369.0,0.0,0.0,-1.0,-1.0
415058,20201209,zs_66_01,6634,2002829.0,-1.0,1.0,37423.0,0.0,0.0,-1.0,18128040000.0
418476,20201209,zs_66_01,6634,2000099.0,-1.0,7.0,48302.0,0.0,0.0,-1.0,-1.0
507009,20201209,zs_94_05,9451,2300686.0,-1.0,7.0,48736.0,0.0,0.0,-1.0,-1.0
521529,20201209,zs_94_05,9454,2002360.0,-1.0,7.0,38831.0,0.0,0.0,-1.0,-1.0
546824,20201209,zs_96_06,9758,2300455.0,-1.0,1.0,52893.0,0.0,0.0,-1.0,7.213082e+17


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
2758,20201210,zs_52_06,5269,2300720.0,-1.0,1.0,35416.0,0.0,0.0,-1.0,333829.0
7390,20201210,zs_52_06,5269,2300517.0,-1.0,1.0,47056.0,0.0,0.0,-1.0,934740.0
22355,20201210,zs_96_08,9685,2000016.0,-1.0,1.0,50979.0,0.0,0.0,-1.0,7.221449e+17
130441,20201210,zs_52_09,5291,2300166.0,-1.0,1.0,39867.0,0.0,0.0,-1.0,811907.0
169649,20201210,zs_96_06,9765,2000708.0,-1.0,7.0,52143.0,0.0,0.0,-1.0,-1.0
216597,20201210,zs_64_01,6480,2300403.0,-1.0,1.0,35028.0,0.0,0.0,-1.0,30769.0
220451,20201210,zs_64_01,6480,2002149.0,-1.0,1.0,40285.0,0.0,0.0,-1.0,83010.0
222064,20201210,zs_64_01,6480,2002699.0,-1.0,1.0,47449.0,0.0,0.0,-1.0,94562.0
224693,20201210,zs_64_01,6480,2002238.0,-1.0,1.0,51171.0,0.0,0.0,-1.0,124852.0
225037,20201210,zs_64_01,6480,2300196.0,-1.0,1.0,52003.0,0.0,0.0,-1.0,131152.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
13504,20201211,zs_64_01,6480,2300678.0,-1.0,1.0,38994.0,0.0,0.0,-1.0,78156.0
14622,20201211,zs_64_01,6480,2002749.0,-1.0,1.0,39987.0,0.0,0.0,-1.0,89136.0
14908,20201211,zs_64_01,6480,2002669.0,-1.0,1.0,40120.0,0.0,0.0,-1.0,91998.0
16206,20201211,zs_64_01,6480,2002469.0,-1.0,1.0,41172.0,0.0,0.0,-1.0,102377.0
18753,20201211,zs_64_01,6480,2000423.0,-1.0,1.0,50369.0,0.0,0.0,-1.0,140134.0
20244,20201211,zs_64_01,6480,2002472.0,-1.0,1.0,52950.0,0.0,0.0,-1.0,169689.0
20837,20201211,zs_64_01,6480,2300374.0,-1.0,1.0,53707.0,0.0,0.0,-1.0,174632.0
55769,20201211,zs_52_06,5269,2002154.0,-1.0,1.0,50947.0,0.0,0.0,-1.0,1383437.0
57045,20201211,zs_52_06,5269,2002678.0,-1.0,1.0,53490.0,0.0,0.0,-1.0,1571786.0
66650,20201211,zs_66_01,6634,2002742.0,-1.0,1.0,47706.0,0.0,0.0,-1.0,18128100000.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
128198,20201214,zt_96_09,965801,1601222.0,-1.0,7.0,-1.0,0.0,0.0,-1.0,824.0
167299,20201214,zs_64_01,6480,2300712.0,-1.0,1.0,34972.0,0.0,0.0,-1.0,24799.0
169881,20201214,zs_64_01,6480,2300807.0,-1.0,1.0,37208.0,0.0,0.0,-1.0,59008.0
173592,20201214,zs_64_01,6480,2002293.0,-1.0,1.0,47893.0,0.0,0.0,-1.0,103822.0
176868,20201214,zs_64_01,6480,2002411.0,-1.0,1.0,53411.0,0.0,0.0,-1.0,151687.0
183553,20201214,zs_52_06,5269,2300410.0,-1.0,1.0,35229.0,0.0,0.0,-1.0,305297.0
185543,20201214,zs_52_06,5269,2002890.0,-1.0,1.0,37463.0,0.0,0.0,-1.0,668323.0
185738,20201214,zs_52_06,5269,2002329.0,-1.0,1.0,37688.0,0.0,0.0,-1.0,696244.0
186088,20201214,zs_52_06,5269,2300480.0,-1.0,1.0,37834.0,0.0,0.0,-1.0,716215.0
187816,20201214,zs_52_06,5269,2002845.0,-1.0,1.0,39762.0,0.0,0.0,-1.0,901829.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
230167,20201215,zs_66_01,6634,2002412.0,-1.0,1.0,40795.0,0.0,0.0,-1.0,18128070000.0
231713,20201215,zs_66_01,6634,2300067.0,-1.0,1.0,48357.0,0.0,0.0,-1.0,18128080000.0
233896,20201215,zs_66_01,6634,2002801.0,-1.0,1.0,53394.0,0.0,0.0,-1.0,18128100000.0
522926,20201215,zs_52_06,5269,2300665.0,-1.0,1.0,38607.0,0.0,0.0,-1.0,723486.0
592871,20201215,zs_64_01,6480,2300189.0,-1.0,1.0,35124.0,0.0,0.0,-1.0,22522.0
601242,20201215,zs_64_01,6480,2300475.0,-1.0,1.0,53508.0,0.0,0.0,-1.0,135046.0


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  interactivity=interactivity, compiler=compiler, result=result)


'There are accounts with duplicated ticks:'

Series([], Name: ars, dtype: int64)

'There are ticks with orderDirection 0'

Unnamed: 0,date,colo,accCode,secid,vai,updateType,sdd,orderDirection,absOrderSize,internalId,orderId
83937,20201216,zs_54_01,5474,2002094.0,-1.0,7.0,36274.0,0.0,0.0,-1.0,-1.0
160339,20201216,zs_52_06,5269,2300410.0,-1.0,1.0,35172.0,0.0,0.0,-1.0,251947.0
163475,20201216,zs_52_06,5269,2300521.0,-1.0,1.0,38454.0,0.0,0.0,-1.0,660233.0
251992,20201216,zs_66_01,6634,2300272.0,-1.0,1.0,52636.0,0.0,0.0,-1.0,18128100000.0
259507,20201216,zs_64_01,6480,2002729.0,-1.0,1.0,47305.0,0.0,0.0,-1.0,88802.0
259615,20201216,zs_64_01,6480,2300151.0,-1.0,1.0,47491.0,0.0,0.0,-1.0,89865.0
260830,20201216,zs_64_01,6480,2000677.0,-1.0,1.0,49418.0,0.0,0.0,-1.0,100171.0
260910,20201216,zs_64_01,6480,2002956.0,-1.0,1.0,49543.0,0.0,0.0,-1.0,100739.0
261181,20201216,zs_64_01,6480,2300445.0,-1.0,1.0,50128.0,0.0,0.0,-1.0,103409.0
261199,20201216,zs_64_01,6480,2300789.0,-1.0,1.0,50137.0,0.0,0.0,-1.0,103361.0


There are orders with all things same except sdd
        ApplSeqNum       aaa  absFilledThisUpdate  absOrderSize  \
679151   3688220.0  0.000419                  0.0         100.0   
679310   3686264.0   0.00044                  0.0         100.0   
681571  14404537.0  0.000657                  0.0         100.0   
681724  18252604.0  0.001406                  0.0         100.0   

        absOrderSizeCumFilled  accCode  ars  caamd  cancellationPending  \
679151                    0.0   966301  0.0    0.0                  0.0   
679310                    0.0   966301  0.0    0.0                  0.0   
681571                    0.0   966301  0.0    0.0                  0.0   
681724                    0.0   966301  0.0    0.0                  0.0   

              cfe         clock  clockAtArrival  cumSharesBought  \
679151  568630.46  1.608083e+15    1.608083e+15              0.0   
679310  506777.82  1.608083e+15    1.608083e+15            100.0   
681571  198186.36  1.608097e+15    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
