### 1. load database data

In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()


import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)


startDate = 20200221
endDate = 20200221
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
SH = db.read('md_snapshot_l2', start_date=startDate, end_date=endDate)
SZ = SH[SH['skey'] > 2000000]
SH = SH[SH['skey'] < 2000000]
SH['num'] = SH['skey'] * 10000 + SH['ordering']
SZ['num'] = SZ['skey'] * 10000 + SZ['ordering']

### 2. load 92 data

In [2]:
startDate = '20200221'
endDate = '20200221'

readPath = '/mnt/Kevin_zhenyu/rawData/logs_***_zs_92_01_day_data'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


for n in range(len(dataPathLs)):
    path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SH_***'))
    SH1 = pd.read_csv(path1[0])
    SH1 = SH1[SH1['source'] == 4]
    
    SH1['skey'] = SH1['StockID'] + 1000000
    SH1 = SH1.rename(columns={"openPrice":"open"})
    SH1["open"] = np.where(SH1["cum_volume"] > 0, SH1.groupby("skey")["open"].transform("max"), SH1["open"])
    SH1["time"] = SH1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

### 3. add 2 columns (sequenceNo, clockAtArrival)

In [6]:
SH1[(SH1['skey'] == 1600000) & (SH1['time'] > 93000000000)].head(5)

Unnamed: 0,ms,clock,threadId,clockAtArrival,sequenceNo,source,StockID,exchange,time,cum_volume,cum_amount,close,__origTickSeq,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,skey
432708,09:30:22.792538,1582248622793551,10690,1582248622793522,3193669,4,600000,SH,93002000000,971978,10916220.94,11.24,399440548470074789,11.23,11.22,11.21,11.2,11.19,13222,11600,25000,85100,21000,11.24,11.25,11.26,11.27,11.28,18800,47900,19600,19700,24900,11.23,1600000
436182,09:30:25.997538,1582248625998647,10690,1582248625998564,3649023,4,600000,SH,93006000000,1100678,12362748.94,11.24,399457728339258789,11.24,11.23,11.22,11.21,11.2,47200,80922,32500,25000,60300,11.25,11.26,11.27,11.28,11.29,68900,20200,20700,24900,41900,11.23,1600000
440194,09:30:29.872538,1582248629872989,10690,1582248629872965,4173640,4,600000,SH,93009000000,1199478,13473702.94,11.24,399470613241146789,11.24,11.23,11.22,11.21,11.2,59400,188822,37500,26700,72700,11.25,11.26,11.27,11.28,11.29,50500,34000,20700,36600,42500,11.23,1600000
443505,09:30:32.760538,1582248632761524,10690,1582248632761106,4385471,4,600000,SH,93012000000,1254478,14092247.94,11.25,399483498143034789,11.24,11.23,11.22,11.21,11.2,45000,184022,38500,26800,72700,11.25,11.26,11.27,11.28,11.29,2400,34000,20700,36700,44500,11.23,1600000
446071,09:30:35.089538,1582248635090627,10690,1582248635090600,4487311,4,600000,SH,93015000000,1270578,14273311.94,11.25,399496383044922789,11.25,11.24,11.23,11.22,11.21,9700,40200,184322,38500,26800,11.26,11.27,11.28,11.29,11.3,33900,20700,44900,43000,22800,11.23,1600000


In [3]:
SH1 = SH1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
           "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
           "ask4q", "ask5q", "open"]]
for cols in ['cum_amount', "close", 'open']:
    SH1[cols] = SH1[cols].round(2)
cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
           "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
           "ask4q", "ask5q", "open"]
SH1 = SH1[SH1['skey'].isin(SH['skey'].unique())]
re = pd.merge(SH, SH1, on=cols, how='outer')
# del SH
# del SH1

In [4]:
display(re.shape[0])
display(re[re['sequenceNo'].isnull()].shape[0])
display(re[re['date'].isnull()].shape[0])
display(re[~re['sequenceNo'].isnull()].shape[0])
display(re[~re['date'].isnull()].shape[0])

6831035

172773

42331

6658262

6788704

In [12]:
p21 = re[(re['date'].isnull())][['clockAtArrival_y', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
           "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
           "ask4q", "ask5q", "open"]]
p22 = re[(re['sequenceNo'].isnull())][["skey", "date", "time", "clockAtArrival_x", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount", 'num']]

p11 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                    & (~re['date'].isnull())]['num'].duplicated(keep=False)]
p12 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())].drop_duplicates(['num'], keep=False)
p11 = p11.sort_values(by=['num', 'sequenceNo'])
p11["order1"] = p11.groupby(["num"]).cumcount()
p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
p11 = p11[p11['order1'] == p11['order2']]
p11.drop(['order1', 'order2'],axis=1,inplace=True)
p1 = pd.concat([p11, p12])
p2 = pd.merge(p22, p21[['skey', 'time', 'clockAtArrival_y', 'sequenceNo']], on=['skey', 'time'], how='left')
re1 = pd.concat([p1, p2])
re1 = re1.sort_values(by='num')
re1['seq1'] = re1.groupby('skey')['sequenceNo'].bfill().ffill()
re1['count1'] = re1.groupby(['seq1']).cumcount()
re1['count2'] = re1.groupby(['seq1'])['count1'].transform('nunique')
re1['max_seq'] = re1.groupby('skey')['sequenceNo'].transform('max')
re1['count'] = np.where(re1['seq1'] != re1['max_seq'], re1['count1'] + 1 - re1['count2'], re1['count1'] - re1['count2'])
re1.drop(["max_seq"],axis=1,inplace=True)
re1.drop(["count1"],axis=1,inplace=True)
re1.drop(["count2"],axis=1,inplace=True)
re1['nan'] = np.where(re1['sequenceNo'].isnull(), 1, 0)

In [182]:
dd = pd.merge(p21, p22, on=['skey', 'time'], how='outer')
# only in SH1
d1 = dd[dd['clockAtArrival_x'].isnull()][['skey', 'time']]
# only in SH
d2 = dd[dd['clockAtArrival_y'].isnull()][['skey', 'time']]

d1 = pd.merge(p2, d1, how='right')
d2 = pd.merge(p22, d2, how='right').drop_duplicates()
d2['clockAtArrival_y'] = np.nan
d2['sequenceNo'] = np.nan

# merge again which can combine now
d3 = pd.merge(p22, p21[['skey', 'time', 'clockAtArrival_y', 'sequenceNo']], on=['skey', 'time'])

display(d1.shape[0])
display(d2.shape[0])
display(d3.shape[0])

p2 = pd.concat([d1, d2, d3])
re1 = pd.concat([p1, p2])

0

130442

42331

In [128]:
dff = pd.merge(p22, p21, on = ['skey', 'time'], how='right')
dff['cum_volume'] = np.where(dff['cum_volume_x'] != dff['cum_volume_y'], 'yes', 0)
dff['cum_amount'] = np.where(dff['cum_amount_x'] != dff['cum_amount_y'], 'yes', 0)
dff['open'] = np.where(dff['open_x'] != dff['open_y'], 'yes', 0)
dff['close'] = np.where(dff['close_x'] != dff['close_y'], 'yes', 0)
dff['bid5p'] = np.where(dff['bid5p_x'] != dff['bid5p_y'], 'yes', 0)
dff['bid4p'] = np.where(dff['bid4p_x'] != dff['bid4p_y'], 'yes', 0)
dff['bid3p'] = np.where(dff['bid3p_x'] != dff['bid3p_y'], 'yes', 0)
dff['bid2p'] = np.where(dff['bid2p_x'] != dff['bid2p_y'], 'yes', 0)
dff['bid1p'] = np.where(dff['bid1p_x'] != dff['bid1p_y'], 'yes', 0)
dff['bid5q'] = np.where(dff['bid5q_x'] != dff['bid5q_y'], 'yes', 0)
dff['bid4q'] = np.where(dff['bid4q_x'] != dff['bid4q_y'], 'yes', 0)
dff['bid3q'] = np.where(dff['bid3q_x'] != dff['bid3q_y'], 'yes', 0)
dff['bid2q'] = np.where(dff['bid2q_x'] != dff['bid2q_y'], 'yes', 0)
dff['bid1q'] = np.where(dff['bid1q_x'] != dff['bid1q_y'], 'yes', 0)
dff['ask5p'] = np.where(dff['ask5p_x'] != dff['ask5p_y'], 'yes', 0)
dff['ask4p'] = np.where(dff['ask4p_x'] != dff['ask4p_y'], 'yes', 0)
dff['ask3p'] = np.where(dff['ask3p_x'] != dff['ask3p_y'], 'yes', 0)
dff['ask2p'] = np.where(dff['ask2p_x'] != dff['ask2p_y'], 'yes', 0)
dff['ask1p'] = np.where(dff['ask1p_x'] != dff['ask1p_y'], 'yes', 0)
dff['ask5q'] = np.where(dff['ask5q_x'] != dff['ask5q_y'], 'yes', 0)
dff['ask4q'] = np.where(dff['ask4q_x'] != dff['ask4q_y'], 'yes', 0)
dff['ask3q'] = np.where(dff['ask3q_x'] != dff['ask3q_y'], 'yes', 0)
dff['ask2q'] = np.where(dff['ask2q_x'] != dff['ask2q_y'], 'yes', 0)
dff['ask1q'] = np.where(dff['ask1q_x'] != dff['ask1q_y'], 'yes', 0)
dff['col'] = dff.apply(lambda x: x == 'yes', axis=1).apply(lambda x: tuple(dff.columns[x]), axis=1)
pd.set_option("max_rows", 500)
dff.groupby('col')['skey'].size().reset_index().sort_values(by='skey', ascending=False)

Unnamed: 0,col,skey
162,"(close,)",12116
209,"(cum_volume, cum_amount)",4401
294,"(cum_volume, cum_amount, close)",3136
0,"(ask1q,)",2271
211,"(cum_volume, cum_amount, ask1q)",1734
50,"(bid1q,)",1687
234,"(cum_volume, cum_amount, bid1q)",1573
295,"(cum_volume, cum_amount, close, ask1q)",1455
326,"(cum_volume, cum_amount, close, bid1q)",1072
3,"(ask3q,)",889


In [40]:
re1['seq1'] = re1.groupby('skey')['sequenceNo'].bfill().ffill()
re1['count1'] = re1.groupby(['seq1']).cumcount()
re1['count2'] = re1.groupby(['seq1'])['count1'].transform('nunique')
re1['max_seq'] = re1.groupby('skey')['sequenceNo'].transform('max')
re1['count'] = np.where(re1['seq1'] != re1['max_seq'], re1['count1'] + 1 - re1['count2'], re1['count1'] - re1['count2'])
re1.drop(["max_seq"],axis=1,inplace=True)
re1.drop(["count1"],axis=1,inplace=True)
re1.drop(["count2"],axis=1,inplace=True)
re1['nan'] = np.where(re1['sequenceNo'].isnull(), 1, 0)

In [42]:
re1[(re1['skey'] == 1600000) & (re1['time'] >= 91622000000)].head(4)

Unnamed: 0,skey,date,time,clockAtArrival_x,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount,num,clockAtArrival_y,sequenceNo,seq1,count,nan
56,1600000,20200221.0,91623000000,1582248000000000.0,2020-02-21 09:16:23,57.0,0.0,0.0,0,0.0,11.23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.35,12.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,2115700,2115700,1168720,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,16000000000.0,1582248000000000.0,1082009.0,1082009.0,0,0
57,1600000,20200221.0,91626000000,1582248000000000.0,2020-02-21 09:16:26,58.0,0.0,0.0,0,0.0,11.23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.35,12.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,2115700,2115700,1168720,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,16000000000.0,1582248000000000.0,1085066.0,1085066.0,0,0
0,1600000,20200221.0,91626000000,1582248000000000.0,2020-02-21 09:16:26,59.0,0.0,0.0,0,0.0,11.23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.35,12.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,2115700,2115700,1176020,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,16000000000.0,,,1088550.0,-1,1
59,1600000,20200221.0,91629000000,1582248000000000.0,2020-02-21 09:16:29,60.0,0.0,0.0,0,0.0,11.23,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.35,12.35,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,2115700,2115700,1310220,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-1.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,16000000000.0,1582248000000000.0,1088550.0,1088550.0,0,0
