In [1]:
import pymongo 
import io 
import pandas as pd 
import pickle 
import datetime 
import time 
import gzip 
import lzma 
import pytz 
import pyarrow as pa 
import pyarrow.parquet as pq 
import numpy as np 
import re

def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)

class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db', version=3): 
        self.db_name = db_name 
        self.uri = uri 
        self.client = pymongo.MongoClient(self.uri) 
        self.db = self.client[self.db_name] 
        self.chunk_size = 20000 
        self.symbol_column = symbol_column 
        self.date_column = 'date' 
        self.version = version

    def parse_uri(self, uri): 
        # mongodb://user:password@example.com 
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}
        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("date must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid date type: " + str(type(x)))
        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)
        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)
        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)
        return query

    def read_tick(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name] 
        query = self.build_query(start_date, end_date, symbol) 
        if not query: 
            print('cannot read the whole table') 
            return None  
        segs = [] 
        for x in collection.find(query): 
            x['data'] = self.deser(x['data'], x['ver']) 
            segs.append(x) 
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start'])) 
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def read_daily(self, table_name, start_date=None, end_date=None, skey=None, index_id=None, interval=None, index_name=None, col=None, return_sdi=True): 
        collection = self.db[table_name]
        # Build projection 
        prj = {'_id': 0} 
        if col is not None: 
            if return_sdi: 
                col = ['skey', 'date', 'index_id'] + col 
            for col_name in col: 
                prj[col_name] = 1 
        # Build query 
        query = {} 
        if skey is not None: 
            query['skey'] = {'$in': skey} 
        if interval is not None: 
            query['interval'] = {'$in': interval} 
        if index_id is not None: 
            query['index_id'] = {'$in': index_id}    
        if index_name is not None:
            n = '' 
            for name in index_name: 
                try: 
                    name = re.compile('[\u4e00-\u9fff]+').findall(name)[0] 
                    if len(n) == 0: 
                        n = n = "|".join(name) 
                    else: 
                        n = n + '|' + "|".join(name) 
                except: 
                    if len(n) == 0: 
                        n = name 
                    else: 
                        n = n + '|' + name 
            query['index_name'] = {'$regex': n}
        if start_date is not None: 
            if end_date is not None: 
                query['date'] = {'$gte': start_date, '$lte': end_date} 
            else: 
                query['date'] = {'$gte': start_date} 
        elif end_date is not None: 
            query['date'] = {'$lte': end_date} 
        # Load data 
        cur = collection.find(query, prj) 
        df = pd.DataFrame.from_records(cur) 
        if df.empty: 
            df = pd.DataFrame() 
        else:
            if 'index_id' in df.columns:
                df = df.sort_values(by=['date', 'index_id', 'skey']).reset_index(drop=True)
            else:
                df = df.sort_values(by=['date','skey']).reset_index(drop=True)
        return df 
 

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = self.version
            ser_data = self.ser(df_seg, version)
            seg = {'ver': version, 'data': ser_data, 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None
        collection.delete_many(query)

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        elif version == 3:
            # 32-bit number needs more space than 64-bit for parquet
            for col_name in s.columns:
                col = s[col_name]
                if col.dtype == np.int32:
                    s[col_name] = s[col_name].astype(np.int64)
                elif col.dtype == np.uint32:
                    s[col_name] = s[col_name].astype(np.uint64)
            tbl = pa.Table.from_pandas(s)
            f = io.BytesIO()
            pq.write_table(tbl, f, use_dictionary=False, compression='ZSTD', compression_level=0)
            f.seek(0)
            data = f.read()
            return data
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        print(version)
        def unpickle(s):
            return pickle.loads(s)
        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        elif version == 3:
            f = io.BytesIO()
            f.write(s)
            f.seek(0)
            return pq.read_table(f, use_threads=False).to_pandas()
        else:
            raise Exception('unknown version')

def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()


In [2]:
database_name = 'com_md_eq_cn'
user = 'zhenyuy'
password = 'bnONBrzSMGoE'

import sys

pd.set_option('max_columns', 200)
db1 = DB("192.168.10.178", database_name, user, password)

# pd.set_option('max_columns', 400)
# pd.set_option('max_rows', 400)
# startDate = 20181011
# endDate = 20181011
# test1 = db1.read('md_trade', start_date=startDate, end_date=endDate, symbol=2000034)
# test2 = db1.read('md_order', start_date=startDate, end_date=endDate, symbol=2000034)
# test = pd.concat([test1, test2]).sort_values(by='ApplSeqNum')
# test
# test2 = db1.read('md_snapshot_mbd', start_date=startDate, end_date=endDate, symbol=2002766)
# display(test2.head()[['skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
#                    'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
#                    'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
#                    'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
#                    'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
#                    'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
#                    'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']])

# startDate = 20200731
# endDate = 20200731

# sl = read_memb_daily(db, 'index_memb', index_id=[1000852], start_date=20170901, end_date=20201203)['skey'].unique()
# sl = sl[sl > 2000000]
pd.set_option('max_rows', 300)
test1 = db1.read_tick('md_snapshot_l2', start_date=20201106, end_date=20201106, symbol=2300903)
test1[(test1['cum_volume'] > 0) & (test1['time'] <= 145655000000) & (test1['ApplSeqNum'] == -1)][['time', 'skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
                   'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
                   'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
                   'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
                   'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
                   'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
                   'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']]
# test['ApplSeqNum'] = -1l2 = 
# db1.write('md_snapshot_l2', test)

3


Unnamed: 0,time,skey,date,cum_volume,prev_close,open,close,cum_trades_cnt,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity
3008,132136000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3009,132236000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3010,132336000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3011,132436000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3012,132536000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3013,132636000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3014,132736000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3015,132836000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3016,132936000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3017,133036000000,2300903,20201106,22589805,57.82,55.0,71.5,66723,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [366]:
test = db1.read_tick('md_snapshot_mbd', start_date=20201102, end_date=20201102, symbol=2300901)
test[['ApplSeqNum', 'time', 'skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
                   'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
                   'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
                   'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
                   'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
                   'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
                   'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']]

3
3
3
3
3
3
3
3
3


Unnamed: 0,ApplSeqNum,time,skey,date,cum_volume,prev_close,open,close,cum_trades_cnt,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity
0,272958,92500000000,2300901,20201102,473385,35.38,37.88,37.88,824,37.60,37.69,37.70,37.72,37.73,37.74,37.80,37.81,37.88,399.00,37.89,37.95,37.97,37.98,37.99,38.0,38.10,38.18,38.20,38.21,10400,400,300,100,1400,500,16600,7500,24715,6200,900,100,200,400,400,13300,200,1000,700,200,6,1,2,1,2,1,11,1,1,1,3,1,2,1,2,39,1,1,1,1,3901015,719119
1,275188,93000000000,2300901,20201102,473385,35.38,37.88,37.88,824,37.60,37.69,37.70,37.72,37.73,37.74,37.80,37.81,37.88,399.00,37.89,37.95,37.97,37.98,37.99,38.0,38.10,38.18,38.20,38.21,10400,400,300,100,1400,500,16600,7500,24715,6200,900,100,200,400,400,13300,200,1000,700,200,6,1,2,1,2,1,11,1,1,1,3,1,2,1,2,39,1,1,1,1,3814415,719119
2,275189,93000000000,2300901,20201102,490585,35.38,37.88,38.20,876,37.60,37.69,37.70,37.72,37.73,37.74,37.80,37.81,37.88,38.20,38.21,38.22,38.23,38.26,38.28,38.3,38.33,38.35,38.37,38.38,10400,400,300,100,1400,500,16600,7500,24715,34700,200,200,100,100,100,1400,300,1100,500,3700,6,1,2,1,2,1,11,1,1,1,1,1,1,1,1,2,1,3,2,6,3842915,701919
3,275244,93000000000,2300901,20201102,490585,35.38,37.88,38.20,875,37.70,37.72,37.73,37.74,37.80,37.81,37.88,38.16,38.20,399.00,38.21,38.22,38.23,38.26,38.28,38.3,38.33,38.35,38.37,38.38,300,100,1400,500,16600,7500,24715,20100,28500,6200,200,200,100,100,100,1400,300,1100,500,3700,2,1,2,1,11,1,1,1,1,1,1,1,1,1,1,2,1,3,2,6,3863015,701919
4,275273,93000000000,2300901,20201102,490585,35.38,37.88,38.20,875,37.70,37.72,37.73,37.74,37.80,37.81,37.88,38.16,38.20,399.00,38.21,38.22,38.23,38.26,38.28,38.3,38.33,38.35,38.37,38.38,300,200,1400,500,16600,7500,24715,20100,28500,6200,200,200,100,100,100,1400,300,1100,500,3700,2,2,2,1,11,1,1,1,1,1,1,1,1,1,1,2,1,3,2,6,3863115,701919
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165373,23781075,145654630000,2300901,20201102,33651679,35.38,37.88,38.03,74286,37.87,37.88,37.90,37.91,37.92,37.93,37.95,37.96,37.98,37.99,38.03,38.04,38.07,38.08,38.09,38.1,38.11,38.15,38.20,38.21,5600,7900,25300,2200,4100,20700,6600,1000,200,8100,200,100,200,300,800,1500,100,100,3400,100,8,10,26,9,1,10,6,2,2,8,1,1,1,1,3,5,1,1,13,1,10567548,2029840
165374,23781115,145654660000,2300901,20201102,33651679,35.38,37.88,38.03,74286,37.87,37.88,37.90,37.91,37.92,37.93,37.95,37.96,37.98,37.99,38.03,38.04,38.07,38.08,38.09,38.1,38.11,38.15,38.20,38.21,5600,7900,25300,2200,4100,20700,6600,1000,200,8100,200,100,200,300,800,1500,100,100,3400,100,8,10,26,9,1,10,6,2,2,8,1,1,1,1,3,5,1,1,13,1,10567248,2029840
165375,23781197,145654680000,2300901,20201102,33651779,35.38,37.88,37.99,74287,37.87,37.88,37.90,37.91,37.92,37.93,37.95,37.96,37.98,37.99,38.03,38.04,38.07,38.08,38.09,38.1,38.11,38.15,38.20,38.21,5600,7900,25300,2200,4100,20700,6600,1000,200,8000,200,100,200,300,800,1500,100,100,3400,100,8,10,26,9,1,10,6,2,2,8,1,1,1,1,3,5,1,1,13,1,10567148,2029840
165376,23781458,145654810000,2300901,20201102,33651979,35.38,37.88,37.99,74288,37.87,37.88,37.90,37.91,37.92,37.93,37.95,37.96,37.98,37.99,38.03,38.04,38.07,38.08,38.09,38.1,38.11,38.15,38.20,38.21,5600,7900,25300,2200,4100,20700,6600,1000,200,7800,200,100,200,300,800,1500,100,100,3400,100,8,10,26,9,1,10,6,2,2,8,1,1,1,1,3,5,1,1,13,1,10566948,2029840


In [306]:
trade = pd.read_csv(r'\\192.168.10.34\trading\dailyRawData\20201211\logs_20201211_zs_96_03_day_pcap\mdTradePcap_SZ_20201211_0900.csv')
y = 20201211
trade = trade[trade['ID'] == 2300568]
trade["SecurityID"] = trade["ID"] - 2000000
trade = trade.rename(columns={"time":'TransactTime'})
trade['skey'] = trade['SecurityID'] + 2000000
trade["TradeBSFlag"] = 'N'
trade['date'] = int(y)
trade['time1'] = int(y) * 1000000000 + trade['TransactTime']
trade["TransactTime"] = trade['TransactTime'].astype('int64') * 1000
trade["clockAtArrival"] = trade["time1"].astype(str).apply(
    lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp() * 1e6))
trade.drop("time1", axis=1, inplace=True)
trade['datetime'] = trade["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x / 1e6))
trade['TradePrice'] = (trade['TradePrice'] / 10000).round(2)
trade = trade.rename(columns={"TradeQty":"trade_qty", "TradePrice":"trade_price", "ExecType":"trade_type", 'TransactTime':'time'})
trade["trade_flag"] = 0
trade["trade_type"] = np.where(trade["trade_type"] == 'F', 1, trade["trade_type"])
for col in ["skey", "date", "ApplSeqNum", "BidApplSeqNum", "OfferApplSeqNum", "trade_qty", "trade_type", "trade_flag"]:
    trade[col] = trade[col].astype('int32')
trade = trade.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
trade = trade[["skey", "date", "time", "clockAtArrival", "ApplSeqNum", "trade_type", "trade_flag",
                                             "trade_price", "trade_qty", "BidApplSeqNum", "OfferApplSeqNum"]]
print(trade['date'].iloc[0])
print("trade finished")
db1.write('md_trade', trade)

20201211
trade finished


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [11]:
order = pd.read_csv(r'L:\KR\data\quant360_data\2020\202007\20200727\SZ\order\000001.csv')
order['SecurityID'] = 1

order = order.rename(columns={"OrdType": "OrderType"})
order["date"] = order["TransactTime"].iloc[0]//1000000000
order["OrderType"] = np.where(order["OrderType"] == 'U', 3, order["OrderType"])
order["skey"] = order["SecurityID"] + 2000000
order["clockAtArrival"] = order["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
order['datetime'] = order["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
order["time"] = (order['TransactTime'] - int(order['TransactTime'].iloc[0]//1000000000*1000000000)).astype(np.int64)*1000
order = order[order['ChannelNo'] != 4001]

for col in ["skey", "date", "ApplSeqNum", "OrderQty", "Side", "OrderType"]:
    order[col] = order[col].astype('int32')
#     for cols in ["Price"]:
#         print(cols)
#         print(order[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())

assert(order[((order["Side"] != 1) & (order["Side"] != 2)) | (order["OrderType"].isnull())].shape[0] == 0)

order = order.rename(columns={"Side":"order_side", "OrderType":"order_type", "Price":"order_price", "OrderQty":"order_qty"})
order = order[["skey", "date", "time", "clockAtArrival", "ApplSeqNum", "order_side", "order_type", "order_price",
                                             "order_qty"]]

print(order["date"].iloc[0])
print("order finished")

database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db1 = DB("192.168.10.178", database_name, user, password)
db1.write('md_order', order)

  interactivity=interactivity, compiler=compiler, result=result)


20200727
order finished


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [10]:
order[order['Side'] == 'F']

Unnamed: 0,OrderQty,OrderType,TransactTime,ExpirationDays,Side,ApplSeqNum,Contactor,SendingTime,Price,ChannelNo,ExpirationType,ContactInfo,ConfirmID,SecurityID,date,skey,clockAtArrival,datetime,time
115603,580000,,20200727144828620,182,F,31,,20200727144829000,2.0,4001,1,,,1,20200727,2000001,1595832508620000,2020-07-27 14:48:28.620,144828620000
115970,1000000,,20200727144922710,182,F,33,,20200727144922000,2.0,4001,1,,,1,20200727,2000001,1595832562710000,2020-07-27 14:49:22.710,144922710000


In [336]:
order[(order['ApplSeqNum'] < 272958) & (order['order_side'] == 2)]['order_qty'].sum() - \
trade[(trade['ApplSeqNum'] <= 272958) & (trade['OfferApplSeqNum'] != 0)]['trade_qty'].sum()

719119

In [337]:
order[(order['ApplSeqNum'] < 272958) & (order['order_side'] == 1)]['order_qty'].sum() - \
trade[(trade['ApplSeqNum'] <= 272958) & (trade['BidApplSeqNum'] != 0)]['trade_qty'].sum()

3901015

In [234]:
order[(order['ApplSeqNum'] < 349411) & (order['order_side'] == 2) & (~order['ApplSeqNum'].isin(trade[(trade['ApplSeqNum'] <= 349411) & (trade['OfferApplSeqNum'] != 0)]['OfferApplSeqNum'].unique()))]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
0,2300909,20201124,91500020000,1606180500020000,491,2,2,93.78,700
1,2300909,20201124,91500030000,1606180500030000,1527,2,2,84.94,1200
2,2300909,20201124,91500030000,1606180500030000,1531,2,2,89.94,1200
3,2300909,20201124,91500030000,1606180500030000,1641,2,2,82.50,100
4,2300909,20201124,91500030000,1606180500030000,1724,2,2,82.30,100
...,...,...,...,...,...,...,...,...,...
926,2300909,20201124,92456150000,1606181096150000,304863,2,2,81.00,100
928,2300909,20201124,92457750000,1606181097750000,306402,2,2,78.77,1000
929,2300909,20201124,92458380000,1606181098380000,307231,2,2,81.99,100
930,2300909,20201124,92458620000,1606181098620000,307491,2,2,78.09,1200


In [253]:
order = pd.read_csv(r'\\192.168.10.34\random_backup\Kevin_zhenyu\KR_daily_data\20201106\SZ\order\300903.csv')
trade = pd.read_csv(r'\\192.168.10.34\random_backup\Kevin_zhenyu\KR_daily_data\20201106\SZ\tick\300903.csv')

In [321]:
trade[trade['ApplSeqNum'] >= 649857].head(50)

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
143,2300816,20201204,93028500000,1607045428500000,649857,1,0,125.88,100,649837,8174
144,2300816,20201204,93028500000,1607045428500000,649858,1,0,125.88,200,649837,161747
145,2300816,20201204,93028500000,1607045428500000,649859,1,0,125.88,200,649837,204641
146,2300816,20201204,93028500000,1607045428500000,649860,1,0,125.88,400,649837,234672
147,2300816,20201204,93028500000,1607045428500000,649861,1,0,125.88,1000,649837,356320
148,2300816,20201204,93028500000,1607045428500000,649862,1,0,125.88,500,649837,362495
149,2300816,20201204,93028500000,1607045428500000,649863,1,0,125.88,500,649837,427461
150,2300816,20201204,93028500000,1607045428500000,649864,4,0,0.0,1100,649837,0
151,2300816,20201204,93028670000,1607045428670000,650651,1,0,106.66,100,649875,650650
152,2300816,20201204,93029300000,1607045429300000,653088,1,0,106.66,100,649904,653087


In [329]:
trade[trade['ApplSeqNum'] > 653156].head(50)

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
154,2300816,20201204,93029320000,1607045429320000,653157,4,0,0.0,100,649909,0
155,2300816,20201204,93030660000,1607045430660000,657629,4,0,0.0,200,565937,0
156,2300816,20201204,93031530000,1607045431530000,662929,1,0,106.4,100,637268,662928
157,2300816,20201204,93031540000,1607045431540000,662978,4,0,0.0,100,657724,0
158,2300816,20201204,93031540000,1607045431540000,662979,4,0,0.0,100,653123,0
159,2300816,20201204,93031540000,1607045431540000,662983,4,0,0.0,100,657720,0
160,2300816,20201204,93031540000,1607045431540000,662986,4,0,0.0,100,653124,0
161,2300816,20201204,93031550000,1607045431550000,663008,4,0,0.0,100,657688,0
162,2300816,20201204,93031550000,1607045431550000,663009,4,0,0.0,100,653156,0
163,2300816,20201204,93031840000,1607045431840000,664153,1,0,106.55,100,662982,664152


In [328]:
trade[trade['BidApplSeqNum'] == 650194]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
8985,2300816,20201204,150000000000,1607065200000000,19741593,1,0,100.56,100,650194,19653356


In [327]:
order[order['ApplSeqNum'] >= 649837].head(50)

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
213,2300816,20201204,93028500000,1607045428500000,649837,1,1,125.88,9400
214,2300816,20201204,93028510000,1607045428510000,649875,1,2,106.66,100
215,2300816,20201204,93028510000,1607045428510000,649904,1,2,106.66,100
216,2300816,20201204,93028510000,1607045428510000,649906,1,2,106.66,100
217,2300816,20201204,93028510000,1607045428510000,649909,1,2,106.66,100
218,2300816,20201204,93028560000,1607045428560000,650194,1,2,125.88,100
219,2300816,20201204,93028670000,1607045428670000,650650,2,2,106.38,100
220,2300816,20201204,93028740000,1607045428740000,650880,1,2,106.4,3200
221,2300816,20201204,93029300000,1607045429300000,653087,2,2,105.78,100
222,2300816,20201204,93029310000,1607045429310000,653123,1,2,106.39,100


In [347]:
order[order['ApplSeqNum'] == 276155]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
1402,2300913,20201210,93000000000,1607563800000000,276155,1,2,48.0,100


In [346]:
trade[trade['ApplSeqNum'] == 1030149]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
3542,2300913,20201210,93156870000,1607563916870000,1030149,4,0,0.0,100,276155,0


In [3]:
database_name = 'com_md_eq_cn'
user = 'zhenyuy'
password = 'bnONBrzSMGoE'

import sys

pd.set_option('max_columns', 200)
db1 = DB("192.168.10.178", database_name, user, password)

order = db1.read_tick('md_order', start_date=20200814, end_date=20200814, symbol=2000725)
trade = db1.read_tick('md_trade', start_date=20200814, end_date=20200814, symbol=2000725)
print(order)
print(trade)

3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
           skey      date          time    clockAtArrival  ApplSeqNum  \
0       2000725  20200814   91500000000  1597367700000000         206   
1       2000725  20200814   91500000000  1597367700000000         559   
2       2000725  20200814   91500000000  1597367700000000         661   
3       2000725  20200814   91500000000  1597367700000000         800   
4       2000725  20200814   91500000000  1597367700000000         809   
...         ...       ...           ...               ...         ...   
239995  2000725  20200814  104329690000  1597373009690000    10678018   
239996  2000725  20200814  104329730000  1597373009730000    10678067   
239997  2000725  20200814  104329830000  1597373009830000    10678147   
239998  2000725  20200814  104329990000  1597373009990000    10678275   
239999  2000725  20200814  104330000000  1597373010000000    10678280   

        order_side  order_type  order_price  order_qty  
0     

In [29]:
set(trade['ApplSeqNum'].unique()) - set(pd.read_csv(r'E:\unzip_data\2017\SZ\20171130\tick\000651.csv')['ApplSeqNum'].unique())

{5883862}

In [30]:
trade[trade['ApplSeqNum'] == 5883862]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
64272,2000651,20171130,135013680000,1512021013680000,5883862,4,0,0.0,100,3582185,0


In [31]:
order[order['ApplSeqNum'] == 3582185]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
51152,2000651,20171130,103948320000,1512009588320000,3582185,1,2,42.01,100


In [38]:
test[test['time'] <= 135015000000].tail()

Unnamed: 0,skey,date,time,clockAtArrival,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount,ApplSeqNum
3608,2000651,20171130,135003000000,1512021003000000,3609,0,48415,34953178,1485074000.0,42.45,42.5,42.94,42.07,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9800,22400,8900,11300,4600,900,5500,1000,32100,2300,23100,1100,3400,30500,7900,31399,28900,11,6,4,6,5,7,4,6,3,6,1,14,6,33,5,6,12,5,30,19,3000,100,100,2000,100,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5596800,6720683,41.48,43.91,0,0,0,0,0,0,0,0,0.0,0,0,0.0,5880195
3609,2000651,20171130,135006000000,1512021006000000,3610,0,48422,34957478,1485257000.0,42.45,42.5,42.94,42.07,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,5800,39600,31300,5400,23100,1100,3400,30500,7900,31399,28900,11,6,4,7,5,7,4,6,3,8,2,14,8,33,5,6,12,5,30,19,2500,100,100,2000,100,200,300,500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,39500,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5597200,6761783,41.48,43.9,0,0,0,0,0,0,0,0,0.0,0,0,0.0,5881246
3610,2000651,20171130,135009000000,1512021009000000,3611,0,48425,34958778,1485312000.0,42.45,42.5,42.94,42.07,42.47,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,5600,38700,31300,5400,23500,1100,3400,30500,7900,31899,28800,11,6,4,7,5,7,4,6,3,8,3,14,8,35,5,6,12,5,31,18,2300,100,100,2000,100,200,300,500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,38400,100,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5597600,6760183,41.48,43.9,0,0,0,0,0,0,0,0,0.0,0,0,0.0,5882245
3611,2000651,20171130,135012000000,1512021012000000,3612,0,48428,34960378,1485380000.0,42.45,42.5,42.94,42.07,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,4200,38800,30400,5400,22500,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,10,4,14,8,35,5,6,11,5,32,18,700,100,100,2000,100,200,300,500,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,38400,100,200,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5596200,6758383,41.48,43.9,0,0,0,0,0,0,0,0,0.0,0,0,0.0,5883299
3612,2000651,20171130,135015000000,1512021015000000,3613,0,48439,34964278,1485546000.0,42.45,42.5,42.94,42.07,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,300,38800,29200,5400,22400,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,3,4,13,8,34,5,6,11,5,32,18,100,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,38400,100,200,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5592200,6756083,41.48,43.9,0,0,0,0,0,0,0,0,0.0,0,0,0.0,-1


In [49]:
test[(test['cum_volume'] <= 34964278) & (test['cum_volume'] >= 34960378)]

Unnamed: 0,skey,date,time,clockAtArrival,ordering,ApplSeqNum,bbo_improve,pass_filter,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels
91687,2000651,20171130,135011340000,1512021011340000,91688,5883136,1,0,48428,34960378,1485380000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,4100,38700,30400,5400,22500,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,9,3,14,8,35,5,6,11,5,32,18,5596000,6758083,41.479849,43.901467,6977,7070,335,397
91688,2000651,20171130,135011470000,1512021011470000,91689,5883175,0,-1,48428,34960378,1485380000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,4200,38700,30400,5400,22500,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,10,3,14,8,35,5,6,11,5,32,18,5596100,6758083,41.479866,43.901467,6978,7070,335,397
91689,2000651,20171130,135011560000,1512021011560000,91690,5883212,0,-1,48428,34960378,1485380000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,4200,38800,30400,5400,22500,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,10,4,14,8,35,5,6,11,5,32,18,5596100,6758183,41.479866,43.901446,6978,7071,335,397
91690,2000651,20171130,135011810000,1512021011810000,91691,5883298,0,-1,48428,34960378,1485380000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,4200,38800,30400,5400,22500,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,10,4,14,8,35,5,6,11,5,32,18,5596100,6758383,41.479866,43.901529,6978,7072,335,397
91691,2000651,20171130,135011820000,1512021011820000,91692,5883299,0,-1,48428,34960378,1485380000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,4200,38800,30400,5400,22500,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,10,4,14,8,35,5,6,11,5,32,18,5596200,6758383,41.479881,43.901529,6979,7072,335,397
91692,2000651,20171130,135012740000,1512021012740000,91693,5883568,0,-1,48428,34960378,1485380000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,4200,38800,30400,5400,22500,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,10,4,14,8,35,5,6,11,5,32,18,5596200,6758483,41.479881,43.90151,6979,7073,335,397
91693,2000651,20171130,135012760000,1512021012760000,91694,5883576,0,-1,48428,34960378,1485380000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,4200,38800,30400,5400,22400,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,10,4,14,8,34,5,6,11,5,32,18,5596200,6758383,41.479881,43.901531,6979,7072,335,397
91694,2000651,20171130,135013420000,1512021013420000,91695,5883785,0,-1,48428,34960378,1485380000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,4200,38800,29200,5400,22400,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,10,4,13,8,34,5,6,11,5,32,18,5596200,6757183,41.479881,43.901784,6979,7071,335,397
91695,2000651,20171130,135013500000,1512021013500000,91696,5883797,1,0,48432,34961578,1485431000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,3000,38800,29200,5400,22400,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,7,4,13,8,34,5,6,11,5,32,18,5595000,6757183,41.479671,43.901784,6976,7071,335,397
91696,2000651,20171130,135013680000,1512021013680000,91697,5883862,0,-1,48432,34961578,1485431000.0,42.45,42.5,42.46,42.36,42.37,42.38,42.4,42.41,42.42,42.43,42.44,42.45,42.46,42.47,42.48,42.49,42.5,42.51,42.52,42.53,42.54,42.55,42.56,2700,4600,3000,9900,22400,8900,11300,4600,900,3000,38800,29200,5400,22400,1100,3400,30000,7900,32399,28800,11,6,4,7,5,7,4,6,3,7,4,13,8,34,5,6,11,5,32,18,5594900,6757183,41.479661,43.901784,6975,7071,335,397


In [50]:
mbd[(mbd['ApplSeqNum'] >= 5883136) & (mbd['ApplSeqNum'] <= 5884277)]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
80150,2000651,20171130,135011340000,1512021011340000,5883136,2.0,2.0,42.46,500.0,,,,,,
64264,2000651,20171130,135011340000,1512021011340000,5883137,,,,,1.0,0.0,42.46,500.0,5872941.0,5883136.0
80151,2000651,20171130,135011470000,1512021011470000,5883175,1.0,2.0,42.46,100.0,,,,,,
80152,2000651,20171130,135011560000,1512021011560000,5883212,2.0,2.0,42.47,100.0,,,,,,
80153,2000651,20171130,135011810000,1512021011810000,5883298,2.0,2.0,46.7,200.0,,,,,,
80154,2000651,20171130,135011820000,1512021011820000,5883299,1.0,2.0,42.3,100.0,,,,,,
80155,2000651,20171130,135012740000,1512021012740000,5883568,2.0,2.0,42.65,100.0,,,,,,
64265,2000651,20171130,135012760000,1512021012760000,5883576,,,,,4.0,0.0,0.0,100.0,0.0,5705952.0
64266,2000651,20171130,135013420000,1512021013420000,5883785,,,,,4.0,0.0,0.0,1200.0,0.0,5876543.0
80156,2000651,20171130,135013500000,1512021013500000,5883797,2.0,2.0,42.46,1200.0,,,,,,


In [47]:
order[order['ApplSeqNum'] == 5876543]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
80079,2000651,20171130,134952830000,1512020992830000,5876543,2,2,42.48,1200


In [375]:
test = db1.read_tick('md_order', start_date=20200813, end_date=20200813, symbol=2000021)
test

3


Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
0,2000021,20200813,91500020000,1597281300020000,1005,2,2,27.00,200
1,2000021,20200813,91500030000,1597281300030000,1108,2,2,25.95,1000
2,2000021,20200813,91500030000,1597281300030000,1182,2,2,27.70,400
3,2000021,20200813,91500040000,1597281300040000,2274,2,2,25.62,500
4,2000021,20200813,91500040000,1597281300040000,2409,2,2,25.52,400
...,...,...,...,...,...,...,...,...,...
19995,2000021,20200813,102114780000,1597285274780000,9134892,2,2,27.50,200
19996,2000021,20200813,102114870000,1597285274870000,9135003,1,2,25.25,500
19997,2000021,20200813,102115320000,1597285275320000,9135851,1,2,25.27,2000
19998,2000021,20200813,102115860000,1597285275860000,9136869,1,2,25.29,2200


In [15]:
np.sort(db1.read_tick('md_snapshot_l2', 20170103, 20201211, symbol=1600000)['date'].unique())

3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3


array([20170103, 20170104, 20170105, 20170106, 20170109, 20170110,
       20170111, 20170112, 20170113, 20170116, 20170117, 20170118,
       20170119, 20170120, 20170123, 20170124, 20170125, 20170126,
       20170203, 20170206, 20170207, 20170208, 20170209, 20170210,
       20170213, 20170214, 20170215, 20170216, 20170217, 20170220,
       20170221, 20170222, 20170223, 20170224, 20170227, 20170228,
       20170301, 20170302, 20170303, 20170306, 20170307, 20170308,
       20170309, 20170310, 20170313, 20170314, 20170315, 20170316,
       20170317, 20170320, 20170321, 20170322, 20170323, 20170324,
       20170327, 20170328, 20170329, 20170330, 20170331, 20170405,
       20170406, 20170407, 20170410, 20170411, 20170412, 20170413,
       20170414, 20170417, 20170418, 20170419, 20170420, 20170421,
       20170424, 20170425, 20170426, 20170427, 20170428, 20170502,
       20170503, 20170504, 20170505, 20170508, 20170509, 20170510,
       20170511, 20170512, 20170515, 20170516, 20170517, 20170

In [16]:
import sys
sys.getsizeof(test) / (1024**3) * 4000

28.70655059814453

In [369]:
order[order['order_price'] == 399]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
1732,2300901,20201102,92420700000,1604280260700000,211237,1,2,399.0,6200


In [372]:
trade[trade['BidApplSeqNum'] == 211237]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
952,2300901,20201102,93000020000,1604280600020000,276136,4,0,0.0,6200,211237,0


In [370]:
order[order['order_price'] >= 399]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
374,2300901,20201102,91501210000,1604279701210000,60166,2,2,30898.0,100
375,2300901,20201102,91501210000,1604279701210000,60167,2,2,30898.0,100
376,2300901,20201102,91501210000,1604279701210000,60168,2,2,30898.0,100
377,2300901,20201102,91501210000,1604279701210000,60169,2,2,30898.0,100
378,2300901,20201102,91501210000,1604279701210000,60170,2,2,30898.0,100
379,2300901,20201102,91501210000,1604279701210000,60171,2,2,30898.0,100
380,2300901,20201102,91501210000,1604279701210000,60172,2,2,30898.0,100
381,2300901,20201102,91501210000,1604279701210000,60173,2,2,30898.0,100
382,2300901,20201102,91501210000,1604279701210000,60174,2,2,30898.0,100
383,2300901,20201102,91501210000,1604279701210000,60175,2,2,30898.0,100


In [64]:
test[test['ApplSeqNum'] >= 19211505].head(10)
# OrderedDict([(18769334, 2100), (18773050, 1000), (18774306, 100), (18775693, 100), (18779756, 2000), (18785645, 1000), (18800971, 10600)

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type
21211,19211505,,,1582263645700000,20200221,2020-02-21 13:40:45.700,18.5,9300.0,2.0,2.0,2000672,134045700000,,,,
18333,19211506,18769334.0,19211505.0,1582263645700000,20200221,2020-02-21 13:40:45.700,,,,,2000672,134045700000,0.0,18.5,2100.0,1.0
18334,19211507,18773050.0,19211505.0,1582263645700000,20200221,2020-02-21 13:40:45.700,,,,,2000672,134045700000,0.0,18.5,1000.0,1.0
18335,19211508,18774306.0,19211505.0,1582263645700000,20200221,2020-02-21 13:40:45.700,,,,,2000672,134045700000,0.0,18.5,100.0,1.0
18336,19211509,18775693.0,19211505.0,1582263645700000,20200221,2020-02-21 13:40:45.700,,,,,2000672,134045700000,0.0,18.5,100.0,1.0
18337,19211510,18779756.0,19211505.0,1582263645700000,20200221,2020-02-21 13:40:45.700,,,,,2000672,134045700000,0.0,18.5,2000.0,1.0
18338,19211511,18800971.0,19211505.0,1582263645700000,20200221,2020-02-21 13:40:45.700,,,,,2000672,134045700000,0.0,18.5,4000.0,1.0
21212,19211713,,,1582263645820000,20200221,2020-02-21 13:40:45.820,18.49,3397.0,2.0,2.0,2000672,134045820000,,,,
18339,19211714,18800971.0,19211713.0,1582263645820000,20200221,2020-02-21 13:40:45.820,,,,,2000672,134045820000,0.0,18.5,3397.0,1.0
21213,19211737,,,1582263645820000,20200221,2020-02-21 13:40:45.820,18.5,200.0,2.0,2.0,2000672,134045820000,,,,


In [74]:
test[(test['ApplSeqNum'] >= 18785645) & (test['order_price'] <= 18.5) & (test['order_side'] == 2) 
     & (test['ApplSeqNum'] <= 19211505)]

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type
20746,18790757,,,1582263328840000,20200221,2020-02-21 13:35:28.840,18.5,500.0,2.0,2.0,2000672,133528840000,,,,
20748,18793558,,,1582263330830000,20200221,2020-02-21 13:35:30.830,18.5,11700.0,2.0,2.0,2000672,133530830000,,,,
20756,18801986,,,1582263336240000,20200221,2020-02-21 13:35:36.240,18.5,500.0,2.0,2.0,2000672,133536240000,,,,
20757,18802313,,,1582263336440000,20200221,2020-02-21 13:35:36.440,18.5,200.0,2.0,2.0,2000672,133536440000,,,,
21155,19147204,,,1582263607070000,20200221,2020-02-21 13:40:07.070,18.5,200.0,2.0,2.0,2000672,134007070000,,,,
21162,19156222,,,1582263612150000,20200221,2020-02-21 13:40:12.150,18.5,400.0,2.0,2.0,2000672,134012150000,,,,
21187,19187574,,,1582263630810000,20200221,2020-02-21 13:40:30.810,18.5,900.0,2.0,2.0,2000672,134030810000,,,,
21190,19190331,,,1582263632130000,20200221,2020-02-21 13:40:32.130,18.5,2000.0,2.0,2.0,2000672,134032130000,,,,
21191,19191244,,,1582263632810000,20200221,2020-02-21 13:40:32.810,18.5,1000.0,2.0,2.0,2000672,134032810000,,,,
21194,19193667,,,1582263634290000,20200221,2020-02-21 13:40:34.290,18.5,3000.0,2.0,2.0,2000672,134034290000,,,,


In [73]:
re1 = test[(test['ApplSeqNum'] >= 18785645) & (test['order_price'] <= 18.5) & (test['order_side'] == 2) 
     & (test['ApplSeqNum'] <= 19211505)].groupby('ApplSeqNum')['order_qty'].sum().reset_index()
re2 = test[test['OfferApplSeqNum'].isin(test[(test['ApplSeqNum'] >= 18785645) & (test['order_price'] <= 18.5) & (test['order_side'] == 2) 
     & (test['ApplSeqNum'] <= 19211505)]['ApplSeqNum'].unique())].groupby('OfferApplSeqNum')['trade_qty'].sum().reset_index()
pd.merge(re1, re2, left_on='ApplSeqNum', right_on='OfferApplSeqNum')


Unnamed: 0,ApplSeqNum,order_qty,OfferApplSeqNum,trade_qty
0,18790757,500.0,18790757.0,500.0
1,18793558,11700.0,18793558.0,11700.0
2,18801986,500.0,18801986.0,500.0
3,18802313,200.0,18802313.0,200.0
4,19147204,200.0,19147204.0,200.0
5,19156222,400.0,19156222.0,400.0
6,19187574,900.0,19187574.0,900.0
7,19190331,2000.0,19190331.0,2000.0
8,19191244,1000.0,19191244.0,1000.0
9,19193667,3000.0,19193667.0,3000.0


In [31]:
test[test['ApplSeqNum'] >= 19215866].head(20)

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type
21217,19215866,,,1582263648560000,20200221,2020-02-21 13:40:48.560,18.5,10000.0,2.0,2.0,2000672,134048560000,,,,
18343,19215867,18800971.0,19215866.0,1582263648560000,20200221,2020-02-21 13:40:48.560,,,,,2000672,134048560000,0.0,18.5,2803.0,1.0
18344,19215868,18807469.0,19215866.0,1582263648560000,20200221,2020-02-21 13:40:48.560,,,,,2000672,134048560000,0.0,18.5,2600.0,1.0
18345,19215869,18870054.0,19215866.0,1582263648560000,20200221,2020-02-21 13:40:48.560,,,,,2000672,134048560000,0.0,18.5,100.0,1.0
18346,19215870,18876400.0,19215866.0,1582263648560000,20200221,2020-02-21 13:40:48.560,,,,,2000672,134048560000,0.0,18.5,4497.0,1.0
21218,19216086,,,1582263648700000,20200221,2020-02-21 13:40:48.700,18.5,400.0,2.0,2.0,2000672,134048700000,,,,
18347,19216087,18876400.0,19216086.0,1582263648700000,20200221,2020-02-21 13:40:48.700,,,,,2000672,134048700000,0.0,18.5,400.0,1.0
21219,19216251,,,1582263648800000,20200221,2020-02-21 13:40:48.800,18.51,11500.0,2.0,2.0,2000672,134048800000,,,,
21220,19216938,,,1582263649150000,20200221,2020-02-21 13:40:49.150,18.5,300.0,1.0,2.0,2000672,134049150000,,,,
21221,19218287,,,1582263649980000,20200221,2020-02-21 13:40:49.980,18.51,2200.0,1.0,2.0,2000672,134049980000,,,,


In [59]:
test[test['BidApplSeqNum'] == 18785645]

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type


In [58]:
pd.set_option('max_rows', 200)
test[test['ApplSeqNum'] >= 18785645].head(200)

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type
20740,18785645,,,1582263325530000,20200221,2020-02-21 13:35:25.530,18.5,1000.0,1.0,2.0,2000672,133525530000,,,,
20741,18785654,,,1582263325530000,20200221,2020-02-21 13:35:25.530,18.42,1000.0,1.0,2.0,2000672,133525530000,,,,
20742,18786139,,,1582263325870000,20200221,2020-02-21 13:35:25.870,18.51,600.0,2.0,2.0,2000672,133525870000,,,,
17913,18786140,18778859.0,18786139.0,1582263325870000,20200221,2020-02-21 13:35:25.870,,,,,2000672,133525870000,0.0,18.51,100.0,1.0
20743,18786646,,,1582263326160000,20200221,2020-02-21 13:35:26.160,18.51,100.0,1.0,2.0,2000672,133526160000,,,,
17914,18786647,18786646.0,18786139.0,1582263326160000,20200221,2020-02-21 13:35:26.160,,,,,2000672,133526160000,0.0,18.51,100.0,1.0
20744,18787175,,,1582263326560000,20200221,2020-02-21 13:35:26.560,18.51,700.0,1.0,2.0,2000672,133526560000,,,,
17915,18787176,18787175.0,18786139.0,1582263326560000,20200221,2020-02-21 13:35:26.560,,,,,2000672,133526560000,0.0,18.51,400.0,1.0
17916,18787571,0.0,15869791.0,1582263326780000,20200221,2020-02-21 13:35:26.780,,,,,2000672,133526780000,0.0,0.0,600.0,4.0
20745,18787840,,,1582263327020000,20200221,2020-02-21 13:35:27.020,18.48,500.0,1.0,2.0,2000672,133527020000,,,,


In [147]:
startDate = 20170302
endDate = 20170302
kk = db1.read('md_trade', start_date = startDate, end_date = endDate, symbol=2002850)
kk

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
0,2002850,20170302,91500620000,1488417300620000,2017-03-02 09:15:00.620,24080,4,0,0.0,4000,24072,0
1,2002850,20170302,91501270000,1488417301270000,2017-03-02 09:15:01.270,43822,4,0,0.0,300,43813,0
2,2002850,20170302,91501460000,1488417301460000,2017-03-02 09:15:01.460,48443,4,0,0.0,500,48436,0
3,2002850,20170302,91501560000,1488417301560000,2017-03-02 09:15:01.560,49973,4,0,0.0,1000,49964,0
4,2002850,20170302,91501850000,1488417301850000,2017-03-02 09:15:01.850,55926,4,0,0.0,800,55496,0
...,...,...,...,...,...,...,...,...,...,...,...,...
19024,2002850,20170302,145652670000,1488437812670000,2017-03-02 14:56:52.670,9951669,4,0,0.0,1900,9895257,0
19025,2002850,20170302,145653240000,1488437813240000,2017-03-02 14:56:53.240,9952374,4,0,0.0,500,1682,0
19026,2002850,20170302,145656150000,1488437816150000,2017-03-02 14:56:56.150,9955739,4,0,0.0,300,2467495,0
19027,2002850,20170302,145658260000,1488437818260000,2017-03-02 14:56:58.260,9958257,4,0,0.0,200,9928247,0


In [117]:
test[(test['ApplSeqNum'] >= 5388941) & (test['time'] <= 111419000000)]

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type
5302,5388941,,,1539227640030000,20181011,2018-10-11 11:14:00.030,14.96,3900.0,2.0,2.0,2000034,111400030000,,,,
5171,5388942,5384249.0,5388941.0,1539227640030000,20181011,2018-10-11 11:14:00.030,,,,,2000034,111400030000,0.0,14.96,3900.0,1.0
5303,5389119,,,1539227640350000,20181011,2018-10-11 11:14:00.350,14.96,1000.0,2.0,2.0,2000034,111400350000,,,,
5172,5389120,5384249.0,5389119.0,1539227640350000,20181011,2018-10-11 11:14:00.350,,,,,2000034,111400350000,0.0,14.96,1000.0,1.0
5304,5389138,,,1539227640390000,20181011,2018-10-11 11:14:00.390,14.96,3200.0,2.0,2.0,2000034,111400390000,,,,
5173,5389139,5384249.0,5389138.0,1539227640390000,20181011,2018-10-11 11:14:00.390,,,,,2000034,111400390000,0.0,14.96,3200.0,1.0
5305,5389343,,,1539227640700000,20181011,2018-10-11 11:14:00.700,14.96,600.0,1.0,2.0,2000034,111400700000,,,,
5306,5389354,,,1539227640710000,20181011,2018-10-11 11:14:00.710,14.96,100.0,1.0,2.0,2000034,111400710000,,,,
5307,5389395,,,1539227640790000,20181011,2018-10-11 11:14:00.790,14.96,400.0,1.0,2.0,2000034,111400790000,,,,
5174,5389415,5389343.0,0.0,1539227640820000,20181011,2018-10-11 11:14:00.820,,,,,2000034,111400820000,0.0,0.0,600.0,4.0


In [120]:
test[test['ApplSeqNum'] == 5384249]

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type
5287,5384249,,,1539227630880000,20181011,2018-10-11 11:13:50.880,14.96,856900.0,1.0,2.0,2000034,111350880000,,,,


In [119]:
test[(test['time'] >= 111417000000) & (test['order_side'] == 1) & (test['order_price'] == 14.96)]

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type
5432,5401660,,,1539227663450000,20181011,2018-10-11 11:14:23.450,14.96,521400.0,1.0,2.0,2000034,111423450000,,,,
5461,5403866,,,1539227667370000,20181011,2018-10-11 11:14:27.370,14.96,1100.0,1.0,2.0,2000034,111427370000,,,,
5505,5406807,,,1539227672360000,20181011,2018-10-11 11:14:32.360,14.96,12700.0,1.0,2.0,2000034,111432360000,,,,
5559,5408844,,,1539227675250000,20181011,2018-10-11 11:14:35.250,14.96,200.0,1.0,2.0,2000034,111435250000,,,,
5607,5411510,,,1539227680410000,20181011,2018-10-11 11:14:40.410,14.96,100.0,1.0,2.0,2000034,111440410000,,,,
5684,5414935,,,1539227686970000,20181011,2018-10-11 11:14:46.970,14.96,100.0,1.0,2.0,2000034,111446970000,,,,
5831,5421961,,,1539227700700000,20181011,2018-10-11 11:15:00.700,14.96,200.0,1.0,2.0,2000034,111500700000,,,,
6155,5436003,,,1539227729410000,20181011,2018-10-11 11:15:29.410,14.96,100.0,1.0,2.0,2000034,111529410000,,,,
6305,5442184,,,1539227742340000,20181011,2018-10-11 11:15:42.340,14.96,1400.0,1.0,2.0,2000034,111542340000,,,,
6405,5448434,,,1539227755580000,20181011,2018-10-11 11:15:55.580,14.96,10700.0,1.0,2.0,2000034,111555580000,,,,


In [113]:
test[(test['order_price'] == 14.96) & (test['order_side'] == 2) & (test['time'] >= 111351000000)]

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type
5299,5388507,,,1539227639010000,20181011,2018-10-11 11:13:59.010,14.96,400.0,2.0,2.0,2000034,111359010000,,,,
5302,5388941,,,1539227640030000,20181011,2018-10-11 11:14:00.030,14.96,3900.0,2.0,2.0,2000034,111400030000,,,,
5303,5389119,,,1539227640350000,20181011,2018-10-11 11:14:00.350,14.96,1000.0,2.0,2.0,2000034,111400350000,,,,
5304,5389138,,,1539227640390000,20181011,2018-10-11 11:14:00.390,14.96,3200.0,2.0,2.0,2000034,111400390000,,,,
5308,5389536,,,1539227640970000,20181011,2018-10-11 11:14:00.970,14.96,2100.0,2.0,2.0,2000034,111400970000,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11572,8571451,,,1539239730950000,20181011,2018-10-11 14:35:30.950,14.96,1400.0,2.0,2.0,2000034,143530950000,,,,
11645,8649454,,,1539239901920000,20181011,2018-10-11 14:38:21.920,14.96,13300.0,2.0,2.0,2000034,143821920000,,,,
11887,8833351,,,1539240218360000,20181011,2018-10-11 14:43:38.360,14.96,200.0,2.0,2.0,2000034,144338360000,,,,
12346,9082057,,,1539240647970000,20181011,2018-10-11 14:50:47.970,14.96,500.0,2.0,2.0,2000034,145047970000,,,,


In [53]:
test[test['ApplSeqNum'] == 5384249]

Unnamed: 0,ApplSeqNum,BidApplSeqNum,OfferApplSeqNum,clockAtArrival,date,datetime,order_price,order_qty,order_side,order_type,skey,time,trade_flag,trade_price,trade_qty,trade_type
5287,5384249,,,1539227630880000,20181011,2018-10-11 11:13:50.880,14.96,856900.0,1.0,2.0,2000034,111350880000,,,,


In [55]:
    def delete_tick(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None
        collection.delete_many(query)

856900.0

In [9]:
order = db1.read('md_order', start_date = startDate, end_date = endDate, symbol=list(sl))
order.groupby('skey')['date'].size().sort_values(ascending=False)

skey
2002185    285830
2002151    255299
2002610    251067
2002400    229651
2002600    195164
            ...  
2000752      1101
2002766      1082
2002586       951
2002188       840
2002499       560
Name: date, Length: 1012, dtype: int64

In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()

def dailyDB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    url = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    client = pymongo.MongoClient(url, maxPoolSize=None)
    db = client[db_name]
    return db

def read_stock_daily(db, name, start_date=None, end_date=None, skey=None, index_name=None, interval=None, col=None, return_sdi=True):
    collection = db[name]
    # Build projection
    prj = {'_id': 0}
    if col is not None:
        if return_sdi:
            col = ['skey', 'date'] + col
        for col_name in col:
            prj[col_name] = 1

    # Build query
    query = {}
    if skey is not None:
        query['skey'] = {'$in': skey}
    if index_name is not None:
        query['index_name'] = {'$in': index_name}
    if start_date is not None:
        if end_date is not None:
            query['date'] = {'$gte': start_date, '$lte': end_date}
        else:
            query['date'] = {'$gte': start_date}
    elif end_date is not None:
        query['date'] = {'$lte': end_date}

    # Load data
    cur = collection.find(query, prj)
    df = pd.DataFrame.from_records(cur)
    if df.empty:
        df = pd.DataFrame()
    else:
        df = df.sort_values(by=['date', 'skey'])
    return df   

def read_memb_daily(db, name, start_date=None, end_date=None, skey=None, index_id=None, interval=None, col=None, return_sdi=True):
    collection = db[name]
    # Build projection
    prj = {'_id': 0}
    if col is not None:
        if return_sdi:
            col = ['skey', 'date', 'index_id'] + col
        for col_name in col:
            prj[col_name] = 1

    # Build query
    query = {}
    if skey is not None:
        query['skey'] = {'$in': skey}
    if index_id is not None:
        query['index_id'] = {'$in': index_id}
    if interval is not None:
        query['interval'] = {'$in': interval}
    if start_date is not None:
        if end_date is not None:
            query['date'] = {'$gte': start_date, '$lte': end_date}
        else:
            query['date'] = {'$gte': start_date}
    elif end_date is not None:
        query['date'] = {'$lte': end_date}

    # Load data
    cur = collection.find(query, prj)
    df = pd.DataFrame.from_records(cur)
    if df.empty:
        df = pd.DataFrame()
    else:
        df = df.sort_values(by=['date', 'index_id', 'skey'])
    return df 



import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
import sys
pd.set_option("max_columns", 200)


startDate = '20200102'
endDate = '20200630'
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db1 = DB("192.168.10.178", database_name, user, password)
db2 = dailyDB("192.168.10.178", database_name, user, password)
startTm = datetime.datetime.now()
data1 = db1.read('md_snapshot_l2', start_date=str(startDate), end_date=str(endDate), symbol=2002185)
print(datetime.datetime.now() - startTm)
print(sys.getsizeof(data1) / (1024 ** 3))
startTm = datetime.datetime.now()
data2 = db1.read('md_snapshot_mbd', start_date=str(startDate), end_date=str(endDate), symbol=2002185)
print(datetime.datetime.now() - startTm)
print(sys.getsizeof(data2) / (1024 ** 3))

0:00:05.179154
0.549378015100956
0:01:49.007772
17.994836151599884


In [7]:
data2.head()

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,ApplSeqNum,bbo_improve,pass_filter,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels
0,2002185,20200102,92500000000,1577928300000000,2020-01-02 09:25:00.000,1,257062,1,2,550,1907597,14364205.41,7.47,7.53,7.53,7.44,7.45,7.46,7.47,7.48,7.49,7.5,7.51,7.52,7.53,7.54,7.55,7.56,7.57,7.58,7.59,7.6,7.61,7.62,7.63,4200,50400,16100,113900,116000,100000,572800,36000,45200,64303,294990,37500,15896,8600,87400,52559,21700,6600,230200,259700,4,20,10,55,25,7,80,14,20,30,59,8,5,8,22,19,23,3,21,19,3889003,5089161,7.293204,7.923305,914,1168,78,69
1,2002185,20200102,93000000000,1577928600000000,2020-01-02 09:30:00.000,2,280728,1,2,694,2625900,19754671.0,7.47,7.53,7.5,7.4,7.41,7.42,7.43,7.44,7.45,7.46,7.47,7.48,7.49,7.5,7.54,7.55,7.56,7.57,7.58,7.59,7.6,7.61,7.62,171100,78300,31300,2500,4200,50400,16100,113900,116000,100000,281697,294990,37500,15896,8600,87400,52559,21700,6600,230200,33,22,10,4,4,20,10,55,25,7,1,59,8,5,8,22,19,23,3,21,3170700,5370858,7.245349,7.901103,770,1169,74,70
2,2002185,20200102,93000000000,1577928600000000,2020-01-02 09:30:00.000,3,281001,0,-1,694,2625900,19754671.0,7.47,7.53,7.5,7.4,7.41,7.42,7.43,7.44,7.45,7.46,7.47,7.48,7.49,7.5,7.54,7.55,7.56,7.57,7.58,7.59,7.6,7.61,7.62,171100,78300,31300,2500,4200,50400,16100,113900,116000,100000,281697,294990,37500,15896,8600,87400,52559,21700,6600,230200,33,22,10,4,4,20,10,55,25,7,1,59,8,5,8,22,19,23,3,21,3170700,5383758,7.245349,7.901172,770,1170,74,70
3,2002185,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,4,281204,0,-1,694,2625900,19754671.0,7.47,7.53,7.5,7.4,7.41,7.42,7.43,7.44,7.45,7.46,7.47,7.48,7.49,7.5,7.54,7.55,7.56,7.57,7.58,7.59,7.6,7.61,7.62,171100,78300,31300,2500,4200,50400,16100,113900,116000,100000,281697,294990,37500,15896,8600,97400,52559,21700,6600,230200,33,22,10,4,4,20,10,55,25,7,1,59,8,5,8,23,19,23,3,21,3170700,5393758,7.245349,7.900577,770,1171,74,70
4,2002185,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,5,281219,1,0,695,2626400,19758421.0,7.47,7.53,7.5,7.4,7.41,7.42,7.43,7.44,7.45,7.46,7.47,7.48,7.49,7.5,7.54,7.55,7.56,7.57,7.58,7.59,7.6,7.61,7.62,171100,78300,31300,2500,4200,50400,16100,113900,116000,100000,281197,294990,37500,15896,8600,97400,52559,21700,6600,230200,33,22,10,4,4,20,10,55,25,7,1,59,8,5,8,23,19,23,3,21,3170700,5393258,7.245349,7.900614,770,1171,74,70


In [41]:
sl = read_stock_daily(db, 'mdbar1d_tr', start_date=20201207, end_date=20201207)['skey'].unique()
len(sl[sl > 2000000])

2320

In [42]:
num

15.05027436092496

In [33]:
test1 = db1.read('md_trade', start_date=startDate, end_date=endDate, symbol=[2300621])
test1['cum_volume'] = np.where(test1['trade_type'] == 1, test1['trade_qty'], 0)
test1['cum_volume'] = test1['cum_volume'].cumsum()
test1.head(50)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum,cum_volume
0,2300621,20200102,91900820000,1577927940820000,2020-01-02 09:19:00.820,165999,4,0,0.0,16600,157364,0,0
1,2300621,20200102,91924490000,1577927964490000,2020-01-02 09:19:24.490,169980,4,0,0.0,5000,89898,0,0
2,2300621,20200102,93000220000,1577928600220000,2020-01-02 09:30:00.220,321619,1,0,10.47,1000,321618,315029,1000
3,2300621,20200102,93000220000,1577928600220000,2020-01-02 09:30:00.220,321620,1,0,10.49,1000,321618,46705,2000
4,2300621,20200102,93000220000,1577928600220000,2020-01-02 09:30:00.220,321970,4,0,0.0,500,0,253500,2000
5,2300621,20200102,93000230000,1577928600230000,2020-01-02 09:30:00.230,322775,4,0,0.0,700,0,253638,2000
6,2300621,20200102,93000370000,1577928600370000,2020-01-02 09:30:00.370,334118,1,0,10.49,300,321618,334117,2300
7,2300621,20200102,93000520000,1577928600520000,2020-01-02 09:30:00.520,343057,1,0,10.49,2700,321618,343056,5000
8,2300621,20200102,93000520000,1577928600520000,2020-01-02 09:30:00.520,343058,1,0,10.49,900,332164,343056,5900
9,2300621,20200102,93000580000,1577928600580000,2020-01-02 09:30:00.580,346709,1,0,10.48,2400,346708,343056,8300


In [32]:
test1 = db1.read('md_order', start_date=startDate, end_date=endDate, symbol=[2300621])
test1[test1['ApplSeqNum'] >= 321618].head(50)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
121,2300621,20200102,93000220000,1577928600220000,2020-01-02 09:30:00.220,321618,1,2,10.49,5000
122,2300621,20200102,93000230000,1577928600230000,2020-01-02 09:30:00.230,322283,1,2,10.41,2300
123,2300621,20200102,93000340000,1577928600340000,2020-01-02 09:30:00.340,332164,1,2,10.49,900
124,2300621,20200102,93000370000,1577928600370000,2020-01-02 09:30:00.370,334117,2,2,10.48,300
125,2300621,20200102,93000520000,1577928600520000,2020-01-02 09:30:00.520,343056,2,2,10.48,6000
126,2300621,20200102,93000580000,1577928600580000,2020-01-02 09:30:00.580,346708,1,2,10.5,5000
127,2300621,20200102,93000860000,1577928600860000,2020-01-02 09:30:00.860,363536,2,2,10.83,3500
128,2300621,20200102,93000870000,1577928600870000,2020-01-02 09:30:00.870,363942,2,2,11.44,2000
129,2300621,20200102,93000990000,1577928600990000,2020-01-02 09:30:00.990,371569,2,2,10.68,600
130,2300621,20200102,93001040000,1577928601040000,2020-01-02 09:30:01.040,373551,1,2,10.42,5000


In [27]:
re[re['skey'] == 1600157]

Unnamed: 0,date,skey,index_id,index_name,weight,lag
45,20200102,1600157,3011031,AMAC 采矿,1.59,
7381,20200103,1600157,3011031,AMAC 采矿,1.56,
14717,20200106,1600157,3011031,AMAC 采矿,1.54,
22055,20200107,1600157,3011031,AMAC 采矿,1.62,
29392,20200108,1600157,3011031,AMAC 采矿,1.56,
36729,20200109,1600157,3011031,AMAC 采矿,1.61,
44066,20200110,1600157,3011031,AMAC 采矿,1.61,
51403,20200113,1600157,3011031,AMAC 采矿,1.61,
58740,20200114,1600157,3011031,AMAC 采矿,1.61,
66078,20200115,1600157,3011031,AMAC 采矿,1.62,


In [32]:
data = pd.read_pickle(r'E:\daily_AMAC_table.pkl')
data['date'] = data.intdate.map(lambda x: datetime.datetime(1899, 12, 30) + datetime.timedelta(int(x)))
data[(data['ID'] == 'SH600157') & (data['date'] >= datetime.datetime(2020, 4, 1)) & (data['date'] <= datetime.datetime(2020, 5, 1))]

Unnamed: 0,intdate,AMACCode,ID,date
2435185,43922,CSIH11031,SH600157,2020-04-01
2439075,43923,CSIH11031,SH600157,2020-04-02
2442965,43924,CSIH11031,SH600157,2020-04-03
2446855,43928,CSIH11031,SH600157,2020-04-07
2450745,43929,CSIH11031,SH600157,2020-04-08
2454635,43930,CSIH11031,SH600157,2020-04-09
2458525,43931,CSIH11031,SH600157,2020-04-10
2462415,43934,CSIH11031,SH600157,2020-04-13
2466305,43935,CSIH11031,SH600157,2020-04-14
2470195,43936,CSIH11031,SH600157,2020-04-15


In [47]:
import TSLPy3
import lzma
import pytz
import numpy as np

def updateAShare(date):
    TRDate = str(date)
    tsstr = """
           BegT:=%s;
           EndT:=%s;
           SetSysParam(pn_stock(),'SH000001');
           SetSysParam(PN_Cycle(),cy_day());
           dateArr:=MarketTradeDayQk(BegT,EndT);
           r:=array();
           for nI:=0 to length(dateArr)-1 do
           begin
             echo dateArr[nI];
             t:= getabkbydate('A股',dateArr[nI]);
             r:=r union2 t;
           end;
           r:= select [0] as 'StockID' from `r end;
           r := select * from r order by ['StockID'] end;
           return r;
            """%(TRDate + 'T', TRDate + 'T + 0.99')
    stockList = pd.DataFrame(TSLPy3.RemoteExecute(tsstr,[],{})[1])
    stockList.columns = list(pd.Series(stockList.columns).str.decode('GBK'))
    stockList['StockID'] = stockList['StockID'].str.decode('GBK')
    stockList['skey'] = np.where(stockList['StockID'].str[:2] == 'SH', 1000000 + stockList['StockID'].str[2:].astype(int),
                                2000000 + stockList['StockID'].str[2:].astype(int))
    stockList['date'] = int(TRDate)
    return stockList

dl = [20200828, 20200814]
total_stock = []
for d in dl:
    data = updateAShare(d)
    total_stock += [data]
total_stock = pd.concat(total_stock, sort=False)
total_stock[total_stock['date'] == 20200828]['skey'].nunique()

3958

In [13]:
data["tn_update"] = data.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))
display(data[data['tn_update'] != 0].groupby(['skey'])['date'].size())
display(data[data['tn_update'] == 0].groupby(['skey'])['date'].size())

skey
1600612    1323
Name: date, dtype: int64

skey
1600612    1644
Name: date, dtype: int64

In [4]:
data[data['tn_update'] != 0].groupby(['skey'])['date'].size()

skey
2000622    524
2300597     33
2300599     45
Name: date, dtype: int64

In [5]:
data[data['tn_update'] == 0].groupby(['skey'])['date'].size()

skey
2000622     961
2300597    2508
2300599    2725
Name: date, dtype: int64

In [8]:
data[(data['skey'] == 2300597) & (data['tn_update'] != 0) & (data['time'] >= 100000000000)].groupby("skey")["time"].min()

skey
2300597    100012000000
Name: time, dtype: int64

In [6]:
data[(data['ordering'] >= 1230) & (data['ordering'] <= 1240) & (data['skey'] == 1600539)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,...,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q
1229,1600539,20151214,130819000000,1450069699000000,2015-12-14 13:08:19,1230,0,914,2240347,28275654,...,6100,9001,2300,10000,32700,1000,15000,2700,17900,3900
1230,1600539,20151214,130825000000,1450069705000000,2015-12-14 13:08:25,1231,0,914,2240347,28275654,...,6100,9001,2300,10000,32700,1000,15000,2700,17900,3900
1231,1600539,20151214,130937000000,1450069777000000,2015-12-14 13:09:37,1232,1,925,2259047,28510471,...,6300,6100,9001,2300,10000,32700,1000,15000,2700,17900
1232,1600539,20151214,130943000000,1450069783000000,2015-12-14 13:09:43,1233,0,925,2259047,28510471,...,6300,1000,6100,9001,2300,10000,32700,1000,15000,2700
1233,1600539,20151214,130946000000,1450069786000000,2015-12-14 13:09:46,1234,0,926,2260047,28523011,...,6300,1000,6100,9001,2300,10000,32700,1000,15000,2700
1234,1600539,20151214,130955000000,1450069795000000,2015-12-14 13:09:55,1235,0,926,2260047,28523011,...,6300,6100,9001,2300,10000,32700,1000,15000,2700,17900
1235,1600539,20151214,131019000000,1450069819000000,2015-12-14 13:10:19,1236,0,928,2262047,28548091,...,3000,6300,6100,9001,2300,10000,32700,1000,15000,2700
1236,1600539,20151214,131031000000,1450069831000000,2015-12-14 13:10:31,1237,0,928,2262047,28548091,...,3000,6300,6100,9001,2300,10000,32700,1000,15000,2700
1237,1600539,20151214,131040000000,1450069840000000,2015-12-14 13:10:40,1238,0,929,2262147,28549345,...,2900,6300,6100,9001,2300,10000,32700,1000,15000,2700
1238,1600539,20151214,131046000000,1450069846000000,2015-12-14 13:10:46,1239,0,930,2263647,28568155,...,1400,6300,6100,9001,2300,10000,32700,1000,15000,2700


In [7]:
data["tn_update"] = data.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))
f2 = data[(data["time"] >= 130000000000) & (data["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
f2 = f2.rename(columns={"time": "time2"})
data = pd.merge(data, f2, on="skey", how="left")
data[(data["time"] > 93000000000) & (data["time"] < 145700000000) & (data["time"] != data["time2"]) & (data["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).round(0).reset_index()

Unnamed: 0,skey,tn_update
0,1600539,9.0


In [88]:
order['cum_sell_aggLimit_improveNBBO_trade_amount'].astype(str).apply(lambda x: len(x.split('.')[1])).unique()

array([1, 2], dtype=int64)

In [31]:
order['order_type'].unique()

array([2, 3, 1], dtype=int64)

In [7]:
pd.set_option('max_rows', 200)
data.dtypes

date            int64
weight        float64
skey            int64
index_id        int64
index_name     object
dtype: object

In [8]:
order.groupby('order_type')['order_side'].size()

order_type
1       348
2    107634
3         8
Name: order_side, dtype: int64

In [11]:
order[order['order_type'] == 1]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
5448,2000002,20200904,93000270000,1599183000270000,2020-09-04 09:30:00.270,504189,2,1,0.0,500
5740,2000002,20200904,93001340000,1599183001340000,2020-09-04 09:30:01.340,590057,2,1,0.0,1900
5999,2000002,20200904,93002640000,1599183002640000,2020-09-04 09:30:02.640,664809,1,1,30.59,300
6024,2000002,20200904,93002770000,1599183002770000,2020-09-04 09:30:02.770,670820,2,1,0.0,500
6040,2000002,20200904,93002850000,1599183002850000,2020-09-04 09:30:02.850,674902,2,1,1.0,2000
6096,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,688352,2,1,0.0,4800
6115,2000002,20200904,93003180000,1599183003180000,2020-09-04 09:30:03.180,692716,2,1,0.0,1500
6134,2000002,20200904,93003250000,1599183003250000,2020-09-04 09:30:03.250,696629,2,1,0.0,700
6150,2000002,20200904,93003370000,1599183003370000,2020-09-04 09:30:03.370,702283,2,1,1.0,400
6211,2000002,20200904,93003670000,1599183003670000,2020-09-04 09:30:03.670,713804,2,1,0.0,100


In [18]:
trade[trade['OfferApplSeqNum'] == 688352]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
1069,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,688353,1,0,27.58,4600,686255,688352
1070,2000002,20200904,93003100000,1599183003100000,2020-09-04 09:30:03.100,688638,1,0,27.58,200,688637,688352


In [24]:
order[(order['ApplSeqNum'] <= 688364) & (order['ApplSeqNum'] >= 688301)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
6095,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,688301,2,2,27.56,500
6096,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,688352,2,1,0.0,4800
6097,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,688364,2,2,27.62,500


In [27]:
trade[(trade['ApplSeqNum'] <= 688364) & (trade['ApplSeqNum'] >= 688301)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
1068,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,688302,1,0,27.58,500,686255,688301
1069,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,688353,1,0,27.58,4600,686255,688352


In [26]:
data[data['ApplSeqNum'] >= 688114].head()

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,ApplSeqNum,bbo_improve,pass_filter,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid10qInsert,bid9qInsert,bid8qInsert,bid7qInsert,bid6qInsert,bid5qInsert,bid4qInsert,bid3qInsert,bid2qInsert,bid1qInsert,ask1qInsert,ask2qInsert,ask3qInsert,ask4qInsert,ask5qInsert,ask6qInsert,ask7qInsert,ask8qInsert,ask9qInsert,ask10qInsert,bid10qCancel,bid9qCancel,bid8qCancel,bid7qCancel,bid6qCancel,bid5qCancel,bid4qCancel,bid3qCancel,bid2qCancel,bid1qCancel,ask1qCancel,ask2qCancel,ask3qCancel,ask4qCancel,ask5qCancel,ask6qCancel,ask7qCancel,ask8qCancel,ask9qCancel,ask10qCancel,bid10sCancel,bid9sCancel,bid8sCancel,bid7sCancel,bid6sCancel,bid5sCancel,bid4sCancel,bid3sCancel,bid2sCancel,bid1sCancel,ask1sCancel,ask2sCancel,ask3sCancel,ask4sCancel,ask5sCancel,ask6sCancel,ask7sCancel,ask8sCancel,ask9sCancel,ask10sCancel,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,cum_buy_market_order_volume,cum_sell_market_order_volume,cum_buy_market_order_amount,cum_sell_market_order_amount,cum_buy_market_trade_volume,cum_sell_market_trade_volume,cum_buy_market_trade_amount,cum_sell_market_trade_amount,cum_buy_aggLimit_onNBBO_order_volume,cum_sell_aggLimit_onNBBO_order_volume,cum_buy_aggLimit_onNBBO_order_amount,cum_sell_aggLimit_onNBBO_order_amount,cum_buy_aggLimit_onNBBO_trade_volume,cum_sell_aggLimit_onNBBO_trade_volume,cum_buy_aggLimit_onNBBO_trade_amount,cum_sell_aggLimit_onNBBO_trade_amount,cum_buy_aggLimit_improveNBBO_order_volume,cum_sell_aggLimit_improveNBBO_order_volume,cum_buy_aggLimit_improveNBBO_order_amount,cum_sell_aggLimit_improveNBBO_order_amount,cum_buy_aggLimit_improveNBBO_trade_volume,cum_sell_aggLimit_improveNBBO_trade_volume,cum_buy_aggLimit_improveNBBO_trade_amount,cum_sell_aggLimit_improveNBBO_trade_amount
841,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,842,688114,0,-1,1481600,40841920.0,27.81,27.56,27.58,27.49,27.5,27.51,27.52,27.53,27.54,27.55,27.56,27.57,27.58,27.65,27.66,27.67,27.68,27.69,27.7,27.71,27.72,27.73,27.74,4200,107600,57200,12100,21200,21300,68900,92000,461500,5100,12700,2000,500,8200,3600,25400,10000,7200,73200,17500,4,118,44,13,13,4,37,27,57,1,5,2,2,3,4,20,1,4,7,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3233200,6098740,27.083899,28.762871,1954,3067,192,269,300,4900,8280.0,135142.0,300,4500,8280.0,124106.0,825400,43900,22757408.0,1210052.0,166500,43800,4591395.0,1207294.0,33300,468600,925218.0,12891767.0,32600,468600,899521.0,12919656.0
842,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,843,688301,1,0,1482100,40855710.0,27.81,27.56,27.58,27.49,27.5,27.51,27.52,27.53,27.54,27.55,27.56,27.57,27.58,27.65,27.66,27.67,27.68,27.69,27.7,27.71,27.72,27.73,27.74,4200,107600,57200,12100,21200,21300,68900,92000,461500,4600,12700,2000,500,8200,3600,25400,10000,7200,73200,17500,4,118,44,13,13,4,37,27,57,1,5,2,2,3,4,20,1,4,7,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3232700,6098740,27.083823,28.762871,1954,3067,192,269,300,4900,8280.0,135142.0,300,4500,8280.0,124106.0,825400,43900,22757408.0,1210052.0,166500,43800,4591395.0,1207294.0,33300,469100,925218.0,12905547.0,32600,469100,899521.0,12933446.0
843,2000002,20200904,93003090000,1599183003090000,2020-09-04 09:30:03.090,844,688364,1,2,1486700,40982578.0,27.81,27.56,27.58,27.48,27.49,27.5,27.51,27.52,27.53,27.54,27.55,27.56,27.57,27.58,27.62,27.65,27.66,27.67,27.68,27.69,27.7,27.71,27.72,31900,4200,107600,57200,12100,21200,21300,68900,92000,461500,200,500,12700,2000,500,8200,3600,25400,10000,7200,22,4,118,44,13,13,4,37,27,57,1,1,5,2,2,3,4,20,1,4,0,0,0,0,0,0,0,0,0,0,200,500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3228100,6099440,27.083115,28.762739,1953,3069,191,271,300,9700,8280.0,267526.0,300,9100,8280.0,250974.0,825400,43900,22757408.0,1210052.0,166500,43800,4591395.0,1207294.0,33300,469100,925218.0,12905547.0,32600,469100,899521.0,12933446.0
844,2000002,20200904,93003100000,1599183003100000,2020-09-04 09:30:03.100,845,688637,1,2,1486900,40988094.0,27.81,27.56,27.58,27.49,27.5,27.51,27.52,27.53,27.54,27.55,27.56,27.57,27.6,27.62,27.65,27.66,27.67,27.68,27.69,27.7,27.71,27.72,27.73,4200,107600,57200,12100,21200,21300,68900,92000,461500,17400,500,12700,2000,500,8200,3600,25400,10000,7200,73200,4,118,44,13,13,4,37,27,57,1,1,5,2,2,3,4,20,1,4,7,0,0,0,0,0,0,0,0,0,17400,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3245500,6099240,27.085887,28.762777,1954,3068,192,270,300,9700,8280.0,267526.0,300,9100,8280.0,250974.0,825400,43900,22757408.0,1210052.0,166500,43800,4591395.0,1207294.0,50900,469100,1410978.0,12905547.0,32800,469100,905037.0,12933446.0
845,2000002,20200904,93003100000,1599183003100000,2020-09-04 09:30:03.100,846,688663,0,-1,1486900,40988094.0,27.81,27.56,27.58,27.49,27.5,27.51,27.52,27.53,27.54,27.55,27.56,27.57,27.6,27.62,27.65,27.66,27.67,27.68,27.69,27.7,27.71,27.72,27.73,4200,107600,57200,12100,21200,21300,68900,92000,461500,17400,500,12700,2000,500,8200,3600,25400,10000,7200,73200,4,118,44,13,13,4,37,27,57,1,1,5,2,2,3,4,20,1,4,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3246500,6099240,27.085768,28.762777,1955,3068,192,270,300,9700,8280.0,267526.0,300,9100,8280.0,250974.0,825400,43900,22757408.0,1210052.0,166500,43800,4591395.0,1207294.0,50900,469100,1410978.0,12905547.0,32800,469100,905037.0,12933446.0


In [6]:
data = pd.read_pickle(r'L:\ShareWithServer\result\300313.pkl')
data

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,ApplSeqNum,bbo_improve,pass_filter,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid10qInsert,bid9qInsert,bid8qInsert,bid7qInsert,bid6qInsert,bid5qInsert,bid4qInsert,bid3qInsert,bid2qInsert,bid1qInsert,ask1qInsert,ask2qInsert,ask3qInsert,ask4qInsert,ask5qInsert,ask6qInsert,ask7qInsert,ask8qInsert,ask9qInsert,ask10qInsert,bid10qCancel,bid9qCancel,bid8qCancel,bid7qCancel,bid6qCancel,bid5qCancel,bid4qCancel,bid3qCancel,bid2qCancel,bid1qCancel,ask1qCancel,ask2qCancel,ask3qCancel,ask4qCancel,ask5qCancel,ask6qCancel,ask7qCancel,ask8qCancel,ask9qCancel,ask10qCancel,bid10sCancel,bid9sCancel,bid8sCancel,bid7sCancel,bid6sCancel,bid5sCancel,bid4sCancel,bid3sCancel,bid2sCancel,bid1sCancel,ask1sCancel,ask2sCancel,ask3sCancel,ask4sCancel,ask5sCancel,ask6sCancel,ask7sCancel,ask8sCancel,ask9sCancel,ask10sCancel,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,cum_buy_market_order_volume,cum_sell_market_order_volume,cum_buy_market_order_amount,cum_sell_market_order_amount,cum_buy_market_trade_volume,cum_sell_market_trade_volume,cum_buy_market_trade_amount,cum_sell_market_trade_amount,cum_buy_aggLimit_onNBBO_order_volume,cum_sell_aggLimit_onNBBO_order_volume,cum_buy_aggLimit_onNBBO_order_amount,cum_sell_aggLimit_onNBBO_order_amount,cum_buy_aggLimit_onNBBO_trade_volume,cum_sell_aggLimit_onNBBO_trade_volume,cum_buy_aggLimit_onNBBO_trade_amount,cum_sell_aggLimit_onNBBO_trade_amount,cum_buy_aggLimit_improveNBBO_order_volume,cum_sell_aggLimit_improveNBBO_order_volume,cum_buy_aggLimit_improveNBBO_order_amount,cum_sell_aggLimit_improveNBBO_order_amount,cum_buy_aggLimit_improveNBBO_trade_volume,cum_sell_aggLimit_improveNBBO_trade_volume,cum_buy_aggLimit_improveNBBO_trade_amount,cum_sell_aggLimit_improveNBBO_trade_amount
0,2300313,20200824,92500000000,1598232300000000,2020-08-24 09:25:00.000,1,399168,1,2,1341200,1.071619e+07,7.76,7.99,7.99,7.86,7.88,7.89,7.90,7.91,7.92,7.95,7.96,7.97,7.98,7.99,8.0,8.01,8.03,8.1,8.12,8.15,8.16,8.18,8.2,6800,2200,1000,1400,6600,3800,13300,200,900,200,9700,377900,600,1300,1000,1000,400,1400,1100,9100,4,3,1,4,2,5,1,1,1,2,3,78,1,1,1,1,1,2,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1217300,1540600,7.017999,8.697529,395,386,121,62,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.00,0,0,0.00,0.00,0,0,0.0,0.00,0,0,0.00,0.00
1,2300313,20200824,93000000000,1598232600000000,2020-08-24 09:30:00.000,2,413677,1,2,1347300,1.076471e+07,7.76,7.99,7.95,7.83,7.84,7.85,7.86,7.88,7.89,7.90,7.91,7.92,7.95,7.99,8.0,8.01,8.03,8.1,8.12,8.15,8.16,8.18,8.2,66400,1100,3100,6800,2200,1000,1400,6600,3800,8500,9700,377900,600,1300,1000,1000,400,1400,1100,9100,2,1,4,4,3,1,4,2,5,1,3,78,1,1,1,1,1,2,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1211200,1540600,7.013284,8.697529,391,386,118,62,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.00,0,0,0.00,0.00,0,6100,0.0,48068.00,0,6100,0.00,48521.00
2,2300313,20200824,93000020000,1598232600020000,2020-08-24 09:30:00.020,3,414926,1,2,1348800,1.077669e+07,7.76,7.99,7.99,7.83,7.84,7.85,7.86,7.88,7.89,7.90,7.91,7.92,7.95,7.99,8.0,8.01,8.03,8.1,8.12,8.15,8.16,8.18,8.2,66400,1100,3100,6800,2200,1000,1400,6600,3800,8500,8200,377900,600,1300,1000,1000,400,1400,1100,9100,2,1,4,4,3,1,4,2,5,1,2,78,1,1,1,1,1,2,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1211200,1539100,7.013284,8.698218,391,385,118,62,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.00,0,0,0.00,0.00,1500,6100,12090.0,48068.00,1500,6100,11985.00,48521.00
3,2300313,20200824,93000020000,1598232600020000,2020-08-24 09:30:00.020,4,415978,1,0,1350000,1.078628e+07,7.76,7.99,7.99,7.83,7.84,7.85,7.86,7.88,7.89,7.90,7.91,7.92,7.95,7.99,8.0,8.01,8.03,8.1,8.12,8.15,8.16,8.18,8.2,66400,1100,3100,6800,2200,1000,1400,6600,3800,8500,7000,377900,600,1300,1000,1000,400,1400,1100,9100,2,1,4,4,3,1,4,2,5,1,2,78,1,1,1,1,1,2,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1211200,1537900,7.013284,8.698771,391,385,118,62,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.00,0,0,0.00,0.00,2700,6100,21690.0,48068.00,2700,6100,21573.00,48521.00
4,2300313,20200824,93000020000,1598232600020000,2020-08-24 09:30:00.020,5,416043,1,2,1350600,1.079108e+07,7.76,7.99,7.99,7.83,7.84,7.85,7.86,7.88,7.89,7.90,7.91,7.92,7.95,7.99,8.0,8.01,8.03,8.1,8.12,8.15,8.16,8.18,8.2,66400,1100,3100,6800,2200,1000,1400,6600,3800,8500,6400,377900,600,1300,1000,1000,400,1400,1100,9100,2,1,4,4,3,1,4,2,5,1,2,78,1,1,1,1,1,2,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1211200,1537300,7.013284,8.699048,391,385,118,62,0,0,0.0,0.0,0,0,0.0,0.0,0,0,0.0,0.00,0,0,0.00,0.00,3300,6100,26490.0,48068.00,3300,6100,26367.00,48521.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54662,2300313,20200824,145649740000,1598252209740000,2020-08-24 14:56:49.740,54663,24217189,0,-1,27509223,2.398161e+08,7.76,7.99,9.31,9.22,9.23,9.24,9.25,9.26,9.27,9.28,9.29,9.30,9.31,0.00,0.0,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.0,4600,100,200,22100,4500,6800,16200,11100,57600,5018499,0,0,0,0,0,0,0,0,0,0,4,1,2,9,4,11,14,12,48,2241,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19400,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,180614.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6776599,0,8.899220,0.000000,3309,0,221,0,446800,375500,3924037.0,3284433.0,362000,291900,3158815.0,2570334.0,4255900,4717831,37580441.0,42467537.01,3304676,3339831,29242341.84,30138037.01,10009900,10089116,87737231.0,87046995.04,8997100,8962616,78602258.72,77630687.74
54663,2300313,20200824,145650510000,1598252210510000,2020-08-24 14:56:50.510,54664,24218851,0,-1,27509223,2.398161e+08,7.76,7.99,9.31,9.22,9.23,9.24,9.25,9.26,9.27,9.28,9.29,9.30,9.31,0.00,0.0,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.0,4600,100,200,22100,4500,6800,16200,11100,57600,5017399,0,0,0,0,0,0,0,0,0,0,4,1,2,9,4,11,14,12,48,2240,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1100,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10241.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6775499,0,8.899153,0.000000,3308,0,221,0,446800,375500,3924037.0,3284433.0,362000,291900,3158815.0,2570334.0,4255900,4717831,37580441.0,42467537.01,3304676,3339831,29242341.84,30138037.01,10009900,10089116,87737231.0,87046995.04,8997100,8962616,78602258.72,77630687.74
54664,2300313,20200824,145650510000,1598252210510000,2020-08-24 14:56:50.510,54665,24218852,0,-1,27509223,2.398161e+08,7.76,7.99,9.31,9.22,9.23,9.24,9.25,9.26,9.27,9.28,9.29,9.30,9.31,0.00,0.0,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.0,4600,100,200,22100,4500,6800,16200,11100,57600,5016099,0,0,0,0,0,0,0,0,0,0,4,1,2,9,4,11,14,12,48,2239,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1300,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12103.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6774199,0,8.899075,0.000000,3307,0,221,0,446800,375500,3924037.0,3284433.0,362000,291900,3158815.0,2570334.0,4255900,4717831,37580441.0,42467537.01,3304676,3339831,29242341.84,30138037.01,10009900,10089116,87737231.0,87046995.04,8997100,8962616,78602258.72,77630687.74
54665,2300313,20200824,145652330000,1598252212330000,2020-08-24 14:56:52.330,54666,24222529,0,-1,27509223,2.398161e+08,7.76,7.99,9.31,9.22,9.23,9.24,9.25,9.26,9.27,9.28,9.29,9.30,9.31,0.00,0.0,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.0,4600,100,200,22100,4500,6800,16200,11100,57600,5015599,0,0,0,0,0,0,0,0,0,0,4,1,2,9,4,11,14,12,48,2238,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4655.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6773699,0,8.899044,0.000000,3306,0,221,0,446800,375500,3924037.0,3284433.0,362000,291900,3158815.0,2570334.0,4255900,4717831,37580441.0,42467537.01,3304676,3339831,29242341.84,30138037.01,10009900,10089116,87737231.0,87046995.04,8997100,8962616,78602258.72,77630687.74


In [91]:
data[data['bid1qCancel'] != 0]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,ApplSeqNum,bbo_improve,pass_filter,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid10qInsert,bid9qInsert,bid8qInsert,bid7qInsert,bid6qInsert,bid5qInsert,bid4qInsert,bid3qInsert,bid2qInsert,bid1qInsert,ask1qInsert,ask2qInsert,ask3qInsert,ask4qInsert,ask5qInsert,ask6qInsert,ask7qInsert,ask8qInsert,ask9qInsert,ask10qInsert,bid10qCancel,bid9qCancel,bid8qCancel,bid7qCancel,bid6qCancel,bid5qCancel,bid4qCancel,bid3qCancel,bid2qCancel,bid1qCancel,ask1qCancel,ask2qCancel,ask3qCancel,ask4qCancel,ask5qCancel,ask6qCancel,ask7qCancel,ask8qCancel,ask9qCancel,ask10qCancel,bid10sCancel,bid9sCancel,bid8sCancel,bid7sCancel,bid6sCancel,bid5sCancel,bid4sCancel,bid3sCancel,bid2sCancel,bid1sCancel,ask1sCancel,ask2sCancel,ask3sCancel,ask4sCancel,ask5sCancel,ask6sCancel,ask7sCancel,ask8sCancel,ask9sCancel,ask10sCancel,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,cum_buy_market_order_volume,cum_sell_market_order_volume,cum_buy_market_order_amount,cum_sell_market_order_amount,cum_buy_market_trade_volume,cum_sell_market_trade_volume,cum_buy_market_trade_amount,cum_sell_market_trade_amount,cum_buy_aggLimit_onNBBO_order_volume,cum_sell_aggLimit_onNBBO_order_volume,cum_buy_aggLimit_onNBBO_order_amount,cum_sell_aggLimit_onNBBO_order_amount,cum_buy_aggLimit_onNBBO_trade_volume,cum_sell_aggLimit_onNBBO_trade_volume,cum_buy_aggLimit_onNBBO_trade_amount,cum_sell_aggLimit_onNBBO_trade_amount,cum_buy_aggLimit_improveNBBO_order_volume,cum_sell_aggLimit_improveNBBO_order_volume,cum_buy_aggLimit_improveNBBO_order_amount,cum_sell_aggLimit_improveNBBO_order_amount,cum_buy_aggLimit_improveNBBO_trade_volume,cum_sell_aggLimit_improveNBBO_trade_volume,cum_buy_aggLimit_improveNBBO_trade_amount,cum_sell_aggLimit_improveNBBO_trade_amount
1193,2300313,20200824,93002970000,1598232602970000,2020-08-24 09:30:02.970,1194,605287,0,-1,2590900,2.065643e+07,7.76,7.99,8.00,7.84,7.85,7.86,7.88,7.90,7.92,7.96,7.97,7.98,7.99,8.00,8.01,8.03,8.07,8.08,8.09,8.10,8.12,8.15,8.16,300,200,2000,4800,1500,300,10600,2100,12200,63200,31100,900,3700,8200,1800,1700,3200,1100,1600,2200,2,1,2,3,4,1,7,2,6,45,7,2,2,1,1,4,6,2,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,400,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3196.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1512600,1499000,7.086954,8.911091,594,366,133,72,900,0,7171.0,0.0,900,0,7171.0,0.0,82900,16200,660411.0,129236.00,82700,15800,658851.00,126040.00,764300,448400,6118497.0,3536264.00,761700,304500,6067612.00,2410074.00
2159,2300313,20200824,93014580000,1598232614580000,2020-08-24 09:30:14.580,2160,776065,0,-1,3140400,2.508588e+07,7.76,7.99,8.02,7.89,7.90,7.91,7.92,7.95,7.96,7.97,7.98,7.99,8.00,8.02,8.03,8.04,8.07,8.09,8.10,8.13,8.14,8.17,8.28,5900,30400,100,35800,3200,29900,2700,31500,341600,352000,1600,11400,1000,500,2900,3400,8200,1600,300,16500,3,19,1,4,2,21,3,14,186,72,1,1,1,1,1,1,1,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1600,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12800.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2658400,1720600,7.407672,8.985201,1031,355,143,70,4300,50900,34991.0,411235.0,4300,11100,34999.0,90232.0,102900,30400,820792.0,242727.00,102700,30000,819232.00,239531.00,825400,684800,6612213.0,5423183.00,822800,534400,6557849.00,4251571.00
2301,2300313,20200824,93020070000,1598232620070000,2020-08-24 09:30:20.070,2302,815710,1,2,3229500,2.581484e+07,7.76,7.99,8.38,7.99,8.00,8.01,8.02,8.03,8.04,8.07,8.08,8.10,8.28,8.38,8.39,8.40,8.43,8.45,8.46,8.47,8.48,8.49,8.50,316200,343100,24300,24600,48800,6300,6700,9200,8000,1400,9500,122700,2500,1000,15900,1900,45900,34200,13000,112100,181,72,7,5,20,2,8,2,1,1,3,3,2,1,3,1,4,6,2,28,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8300.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2736600,1636000,7.426815,9.021991,1075,327,152,55,29700,52800,240725.0,426554.0,29700,13000,243620.0,105551.0,116100,30400,926878.0,242727.00,111800,30000,892385.00,239531.00,836200,698500,6699574.0,5532862.00,833600,548100,6644862.00,4361800.00
3256,2300313,20200824,93047500000,1598232647500000,2020-08-24 09:30:47.500,3257,991470,0,-1,4228600,3.399178e+07,7.76,7.99,8.16,7.92,7.95,7.96,7.97,7.98,7.99,8.00,8.01,8.02,8.03,8.16,8.18,8.19,8.20,8.28,8.29,8.30,8.31,8.32,8.33,35800,3200,25500,2700,31000,290200,343500,15900,10300,28100,4400,300,56800,15000,54300,2100,70600,100,900,12400,4,2,16,3,13,163,73,20,7,9,2,2,4,2,10,3,21,1,1,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2000,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,16060.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2598000,1957200,7.399193,8.866252,1031,406,147,70,34400,91200,279505.0,738813.0,34400,50100,282400.0,407209.0,186700,46500,1505785.0,374258.00,181700,37400,1465553.00,299794.00,1258600,1120400,10199667.0,8957609.00,1224400,919000,9853653.00,7391827.00
3619,2300313,20200824,93058950000,1598232658950000,2020-08-24 09:30:58.950,3620,1060376,0,-1,4542400,3.656714e+07,7.76,7.99,8.20,8.08,8.09,8.10,8.11,8.12,8.14,8.15,8.16,8.17,8.20,8.24,8.25,8.28,8.29,8.30,8.32,8.33,8.34,8.35,8.36,200,34900,29600,900,8300,22100,15000,38100,1700,36100,13600,4600,10000,32000,80700,900,4700,40000,1700,200,1,14,5,1,2,1,1,13,1,22,2,2,3,1,23,1,4,1,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1100,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,9020.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2749200,1855500,7.454955,8.899215,1084,389,159,67,38800,91300,315874.0,739638.0,38800,50200,318769.0,408034.0,242500,54700,1966066.0,441772.00,207700,45400,1679107.00,365664.00,1413000,1271500,11468420.0,10191040.00,1348300,1061600,10868790.00,8563278.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54662,2300313,20200824,145649740000,1598252209740000,2020-08-24 14:56:49.740,54663,24217189,0,-1,27509223,2.398161e+08,7.76,7.99,9.31,9.22,9.23,9.24,9.25,9.26,9.27,9.28,9.29,9.30,9.31,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,4600,100,200,22100,4500,6800,16200,11100,57600,5018499,0,0,0,0,0,0,0,0,0,0,4,1,2,9,4,11,14,12,48,2241,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,19400,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,180614.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6776599,0,8.899220,0.000000,3309,0,221,0,446800,375500,3924037.0,3284433.0,362000,291900,3158815.0,2570334.0,4255900,4717831,37580441.0,42467537.01,3304676,3339831,29242341.84,30138037.01,10009900,10089116,87737231.0,87046995.04,8997100,8962616,78602258.72,77630687.74
54663,2300313,20200824,145650510000,1598252210510000,2020-08-24 14:56:50.510,54664,24218851,0,-1,27509223,2.398161e+08,7.76,7.99,9.31,9.22,9.23,9.24,9.25,9.26,9.27,9.28,9.29,9.30,9.31,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,4600,100,200,22100,4500,6800,16200,11100,57600,5017399,0,0,0,0,0,0,0,0,0,0,4,1,2,9,4,11,14,12,48,2240,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1100,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10241.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6775499,0,8.899153,0.000000,3308,0,221,0,446800,375500,3924037.0,3284433.0,362000,291900,3158815.0,2570334.0,4255900,4717831,37580441.0,42467537.01,3304676,3339831,29242341.84,30138037.01,10009900,10089116,87737231.0,87046995.04,8997100,8962616,78602258.72,77630687.74
54664,2300313,20200824,145650510000,1598252210510000,2020-08-24 14:56:50.510,54665,24218852,0,-1,27509223,2.398161e+08,7.76,7.99,9.31,9.22,9.23,9.24,9.25,9.26,9.27,9.28,9.29,9.30,9.31,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,4600,100,200,22100,4500,6800,16200,11100,57600,5016099,0,0,0,0,0,0,0,0,0,0,4,1,2,9,4,11,14,12,48,2239,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1300,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12103.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6774199,0,8.899075,0.000000,3307,0,221,0,446800,375500,3924037.0,3284433.0,362000,291900,3158815.0,2570334.0,4255900,4717831,37580441.0,42467537.01,3304676,3339831,29242341.84,30138037.01,10009900,10089116,87737231.0,87046995.04,8997100,8962616,78602258.72,77630687.74
54665,2300313,20200824,145652330000,1598252212330000,2020-08-24 14:56:52.330,54666,24222529,0,-1,27509223,2.398161e+08,7.76,7.99,9.31,9.22,9.23,9.24,9.25,9.26,9.27,9.28,9.29,9.30,9.31,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,4600,100,200,22100,4500,6800,16200,11100,57600,5015599,0,0,0,0,0,0,0,0,0,0,4,1,2,9,4,11,14,12,48,2238,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4655.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6773699,0,8.899044,0.000000,3306,0,221,0,446800,375500,3924037.0,3284433.0,362000,291900,3158815.0,2570334.0,4255900,4717831,37580441.0,42467537.01,3304676,3339831,29242341.84,30138037.01,10009900,10089116,87737231.0,87046995.04,8997100,8962616,78602258.72,77630687.74


In [48]:
re = pd.merge(data, data1, on=list(data1.columns[~data1.columns.isin(['datetime', 'total_bid_vwap', 'total_ask_vwap'])]), how='outer')

In [226]:
data[data['ordering'] <= 26].tail()

Unnamed: 0.1,Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,ApplSeqNum,bbo_improve,pass_filter,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid10qInsert,bid9qInsert,bid8qInsert,bid7qInsert,bid6qInsert,bid5qInsert,bid4qInsert,bid3qInsert,bid2qInsert,bid1qInsert,ask1qInsert,ask2qInsert,ask3qInsert,ask4qInsert,ask5qInsert,ask6qInsert,ask7qInsert,ask8qInsert,ask9qInsert,ask10qInsert,bid10qCancel,bid9qCancel,bid8qCancel,bid7qCancel,bid6qCancel,bid5qCancel,bid4qCancel,bid3qCancel,bid2qCancel,bid1qCancel,ask1qCancel,ask2qCancel,ask3qCancel,ask4qCancel,ask5qCancel,ask6qCancel,ask7qCancel,ask8qCancel,ask9qCancel,ask10qCancel,bid10sCancel,bid9sCancel,bid8sCancel,bid7sCancel,bid6sCancel,bid5sCancel,bid4sCancel,bid3sCancel,bid2sCancel,bid1sCancel,ask1sCancel,ask2sCancel,ask3sCancel,ask4sCancel,ask5sCancel,ask6sCancel,ask7sCancel,ask8sCancel,ask9sCancel,ask10sCancel,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,cum_buy_market_order_volume,cum_sell_market_order_volume,cum_buy_market_order_amount,cum_sell_market_order_amount,cum_buy_market_trade_volume,cum_sell_market_trade_volume,cum_buy_market_trade_amount,cum_sell_market_trade_amount,cum_buy_aggLimit_onNBBO_order_volume,cum_sell_aggLimit_onNBBO_order_volume,cum_buy_aggLimit_onNBBO_order_amount,cum_sell_aggLimit_onNBBO_order_amount,cum_buy_aggLimit_onNBBO_trade_volume,cum_sell_aggLimit_onNBBO_trade_volume,cum_buy_aggLimit_onNBBO_trade_amount,cum_sell_aggLimit_onNBBO_trade_amount,cum_buy_aggLimit_improveNBBO_order_volume,cum_sell_aggLimit_improveNBBO_order_volume,cum_buy_aggLimit_improveNBBO_order_amount,cum_sell_aggLimit_improveNBBO_order_amount,cum_buy_aggLimit_improveNBBO_trade_volume,cum_sell_aggLimit_improveNBBO_trade_volume,cum_buy_aggLimit_improveNBBO_trade_amount,cum_sell_aggLimit_improveNBBO_trade_amount
21,21,2300601,20200731,93000120000,1596159000120000,2020-07-31 09:30:00.120,22,435703,1,2,67717,15168156.0,226.65,224.0,224.0,222.23,222.3,222.31,222.5,223.0,223.01,223.04,223.1,223.23,224.0,224.01,224.02,224.03,224.04,225.0,225.01,226.0,226.01,226.54,226.64,2000,1000,100,100,6500,300,400,1000,700,1183,100,100,100,100,100,2400,600,100,100,3800,6,1,1,1,6,1,1,1,1,1,1,1,1,1,1,2,3,1,1,5,0,0,0,0,0,0,0,0,0,1183,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,123583,111500,217.837785,237.696422,170,152,87,79,0,0,0.0,0.0,0,0,0.0,0.0,9000,0,2016000.0,0.0,7817,0,1751008.0,0.0,400,1200,95103.0,267492.0,400,1200,89600.0,268348.0
22,22,2300601,20200731,93000130000,1596159000130000,2020-07-31 09:30:00.130,23,436593,0,-1,67717,15168156.0,226.65,224.0,224.0,222.23,222.3,222.31,222.5,223.0,223.01,223.04,223.1,223.23,224.0,224.01,224.02,224.03,224.04,225.0,225.01,226.0,226.01,226.54,226.64,2000,1000,100,100,6500,300,400,1000,700,1183,100,100,100,100,100,2400,600,100,100,3800,6,1,1,1,6,1,1,1,1,1,1,1,1,1,1,2,3,1,1,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,123483,111500,217.834317,237.696422,169,152,87,79,0,0,0.0,0.0,0,0,0.0,0.0,9000,0,2016000.0,0.0,7817,0,1751008.0,0.0,400,1200,95103.0,267492.0,400,1200,89600.0,268348.0
23,23,2300601,20200731,93000130000,1596159000130000,2020-07-31 09:30:00.130,24,436912,1,2,67817,15190557.0,226.65,224.0,224.01,222.23,222.3,222.31,222.5,223.0,223.01,223.04,223.1,223.23,224.0,224.02,224.03,224.04,225.0,225.01,226.0,226.01,226.54,226.64,226.65,2000,1000,100,100,6500,300,400,1000,700,1183,100,100,100,100,2400,600,100,100,3800,300,6,1,1,1,6,1,1,1,1,1,1,1,1,1,2,3,1,1,5,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,123483,111400,217.834317,237.708707,169,151,87,78,0,0,0.0,0.0,0,0,0.0,0.0,9000,0,2016000.0,0.0,7817,0,1751008.0,0.0,500,1200,117553.0,267492.0,500,1200,112001.0,268348.0
24,24,2300601,20200731,93000130000,1596159000130000,2020-07-31 09:30:00.130,25,436942,1,0,67917,15212957.0,226.65,224.0,224.0,222.23,222.3,222.31,222.5,223.0,223.01,223.04,223.1,223.23,224.0,224.02,224.03,224.04,225.0,225.01,226.0,226.01,226.54,226.64,226.65,2000,1000,100,100,6500,300,400,1000,700,1083,100,100,100,100,2400,600,100,100,3800,300,6,1,1,1,6,1,1,1,1,1,1,1,1,1,2,3,1,1,5,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,123383,111400,217.82932,237.708707,169,151,87,78,0,0,0.0,0.0,0,0,0.0,0.0,9000,0,2016000.0,0.0,7817,0,1751008.0,0.0,500,1300,117553.0,289815.0,500,1300,112001.0,290748.0
25,25,2300601,20200731,93000130000,1596159000130000,2020-07-31 09:30:00.130,26,437219,1,2,68217,15280166.0,226.65,224.0,224.04,222.23,222.3,222.31,222.5,223.0,223.01,223.04,223.1,223.23,224.0,225.0,225.01,226.0,226.01,226.54,226.64,226.65,227.0,227.88,227.9,2000,1000,100,100,6500,300,400,1000,700,1083,100,2400,600,100,100,3800,300,1800,100,200,6,1,1,1,6,1,1,1,1,1,1,2,3,1,1,5,3,4,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,123383,111100,217.82932,237.745644,169,148,87,75,0,0,0.0,0.0,0,0,0.0,0.0,9000,0,2016000.0,0.0,7817,0,1751008.0,0.0,800,1300,185056.0,289815.0,900,1200,201610.0,268348.0


In [215]:
order[order['ApplSeqNum'] == 437219]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
504,2300601,20200731,93000130000,1596159000130000,2020-07-31 09:30:00.130,437219,1,2,225.01,300


In [229]:
order[order['ApplSeqNum'] == 435703]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
501,2300601,20200731,93000120000,1596159000120000,2020-07-31 09:30:00.120,435703,1,2,224.0,9000


In [228]:
trade[trade['BidApplSeqNum'] == 435703]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
167,2300601,20200731,93000120000,1596159000120000,2020-07-31 09:30:00.120,435704,1,0,224.0,2917,435703,317312
168,2300601,20200731,93000120000,1596159000120000,2020-07-31 09:30:00.120,435705,1,0,224.0,2500,435703,317319
169,2300601,20200731,93000120000,1596159000120000,2020-07-31 09:30:00.120,435706,1,0,224.0,1300,435703,317328
170,2300601,20200731,93000120000,1596159000120000,2020-07-31 09:30:00.120,435707,1,0,224.0,600,435703,325670
171,2300601,20200731,93000120000,1596159000120000,2020-07-31 09:30:00.120,435708,1,0,224.0,500,435703,429977
174,2300601,20200731,93000130000,1596159000130000,2020-07-31 09:30:00.130,436943,1,0,224.0,100,435703,436942
180,2300601,20200731,93000150000,1596159000150000,2020-07-31 09:30:00.150,439035,1,0,224.0,1083,435703,439034


In [225]:
trade[(trade['ApplSeqNum'] >= 436942) & (trade['ApplSeqNum'] <= 437219)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
174,2300601,20200731,93000130000,1596159000130000,2020-07-31 09:30:00.130,436943,1,0,224.0,100,435703,436942


In [141]:
order[(order['ApplSeqNum'] <= 17972382) & (order['ApplSeqNum'] >= 17971996)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
38106,2300601,20200731,131543430000,1596172543430000,2020-07-31 13:15:43.430,17971996,1,2,221.15,100
38107,2300601,20200731,131543890000,1596172543890000,2020-07-31 13:15:43.890,17972371,1,1,249.32,5000


In [121]:
order[order['ApplSeqNum'] == 352978]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
446,2300601,20200731,92451740000,1596158691740000,2020-07-31 09:24:51.740,352978,1,2,223.1,1000


In [122]:
trade[trade['BidApplSeqNum'] == 352978]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
202,2300601,20200731,93000280000,1596159000280000,2020-07-31 09:30:00.280,460425,1,0,223.1,500,352978,460423
208,2300601,20200731,93000350000,1596159000350000,2020-07-31 09:30:00.350,468970,1,0,223.1,100,352978,468969
209,2300601,20200731,93000350000,1596159000350000,2020-07-31 09:30:00.350,468975,4,0,0.0,400,352978,0


In [90]:
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

pd.set_option('max_columns', 200)
db1 = DB("192.168.10.178", database_name, user, password)
order = db1.read('md_order', 20200731, 20200731, symbol=[2300601])

In [42]:
trade[trade['BidApplSeqNum'] == 391691]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
393,2002049,20200731,93000090000,1596159000090000,2020-07-31 09:30:00.090,391692,1,0,125.25,100,391691,24807


In [40]:
order[order['ApplSeqNum'] == 391691]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
2956,2002049,20200731,93000090000,1596159000090000,2020-07-31 09:30:00.090,391691,1,2,136.38,100


In [10]:
data1.groupby('order_type')['order_price'].size()

order_type
1      1691
2    381131
3        54
Name: order_price, dtype: int64

In [15]:
pd.set_option('max_rows', 400)
data1.dtypes

skey                                                   int32
date                                                   int32
time                                                   int64
clockAtArrival                                         int64
datetime                                      datetime64[ns]
ordering                                               int32
ApplSeqNum                                             int32
bbo_improve                                            int32
pass_filter                                            int32
cum_volume                                             int64
cum_amount                                           float64
prev_close                                           float64
open                                                 float64
close                                                float64
bid10p                                               float64
bid9p                                                float64
bid8p                   

In [8]:
pd.set_option('max_rows', 200)
data1.head(100)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
0,2000006,20201015,91502200000,1602724502200000,2020-10-15 09:15:02.200,90945,4,0,0.0,200,90943,0
1,2000006,20201015,91505590000,1602724505590000,2020-10-15 09:15:05.590,128773,4,0,0.0,500,112504,0
2,2000006,20201015,91506070000,1602724506070000,2020-10-15 09:15:06.070,132322,4,0,0.0,100,50890,0
3,2000006,20201015,91516990000,1602724516990000,2020-10-15 09:15:16.990,147890,4,0,0.0,1000,50995,0
4,2000006,20201015,91525550000,1602724525550000,2020-10-15 09:15:25.550,148598,4,0,0.0,5000,0,111046
5,2000006,20201015,91539720000,1602724539720000,2020-10-15 09:15:39.720,149938,4,0,0.0,100,113595,0
6,2000006,20201015,91635560000,1602724595560000,2020-10-15 09:16:35.560,157360,4,0,0.0,200,0,76135
7,2000006,20201015,91645060000,1602724605060000,2020-10-15 09:16:45.060,158894,4,0,0.0,3500,0,156279
8,2000006,20201015,91645110000,1602724605110000,2020-10-15 09:16:45.110,158907,4,0,0.0,5000,0,153902
9,2000006,20201015,91645150000,1602724605150000,2020-10-15 09:16:45.150,158915,4,0,0.0,5000,0,152750


In [67]:
datetime.datetime.fromtimestamp(1577928300930000/1e6).strftime("%Y-%m-%d %H:%M:%S %f")

'2020-01-02 09:25:00 930000'

In [51]:
datetime.datetime.strptime('20200612083048000', '%Y%m%d%H%M%S%f').timestamp() * 1e6

1591921848000000.0

In [50]:
import numpy as np
data1['datetime'].apply(lambda x: np.int64((x + datetime.timedelta(hours=-8)).timestamp() * 1e6))

0       1591921848000000
1       1591921908000000
2       1591921968000000
3       1591922028000000
4       1591922088000000
              ...       
5037    1591949403000000
5038    1591949463000000
5039    1591949523000000
5040    1591949583000000
5041    1591949643000000
Name: datetime, Length: 5042, dtype: int64

In [15]:
d = pd.read_csv(r"F:\data\20200929\logs_20200929_zt_88_03_day_88data\mdLog_SH_20200929_0900.csv")

In [3]:
pd.set_option('max_rows', 300)
data1.dtypes

skey                                                   int32
date                                                   int32
time                                                   int64
clockAtArrival                                         int64
datetime                                      datetime64[ns]
ordering                                               int32
ApplSeqNum                                             int32
bbo_improve                                            int32
pass_filter                                            int32
cum_volume                                             int64
cum_amount                                           float64
prev_close                                           float64
open                                                 float64
close                                                float64
bid10p                                               float64
bid9p                                                float64
bid8p                   

In [4]:
import sys
display(sys.getsizeof(data1) / (1024 ** 3))
display(sys.getsizeof(data1[['bid1qList']]) / (1024 ** 3))
display(sys.getsizeof(data1['bid1qList'].apply(lambda x: x[:500])) / (1024 ** 3))
display(sys.getsizeof(data1['bid1qList'].apply(lambda x: ','.join(map(str, x)))) / (1024 ** 3))

13.10368887335062

12.48599537461996

0.2984924539923668

7.385638653300703

In [5]:
import sys
sys.getsizeof(data1) / (1024 ** 3)

13.10368887335062

In [6]:
data1['bid1qList'] = data1['bid1qList'].apply(lambda x: x[:500])
sys.getsizeof(data1) / (1024 ** 2)

938.1744155883789

In [9]:
sys.getsizeof(data1) / (1024 ** 3)

0.922108419239521

In [8]:
data1['bid1qlen'] = data1['bid1qList'].apply(lambda x: len(x))
data1['bid2qlen'] = data1['bid2qList'].apply(lambda x: len(x))
data1['bid3qlen'] = data1['bid3qList'].apply(lambda x: len(x))
data1['bid4qlen'] = data1['bid4qList'].apply(lambda x: len(x))
data1['bid5qlen'] = data1['bid5qList'].apply(lambda x: len(x))
data1['bid6qlen'] = data1['bid6qList'].apply(lambda x: len(x))
data1['bid7qlen'] = data1['bid7qList'].apply(lambda x: len(x))
data1['bid8qlen'] = data1['bid8qList'].apply(lambda x: len(x))
data1['bid9qlen'] = data1['bid9qList'].apply(lambda x: len(x))
data1['bid10qlen'] = data1['bid10qList'].apply(lambda x: len(x))


In [18]:
data1['bid10qlen'].unique()

array([100], dtype=int64)

In [65]:
data1['bid4qlen'].describe([0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]).astype('int64')

count    234064
mean         43
std          44
min           1
5%            7
10%          11
20%          16
30%          21
40%          27
50%          32
60%          39
70%          47
80%          61
90%          88
95%         117
99%         231
max         610
Name: bid4qlen, dtype: int64

In [3]:
data1.groupby('pass_filter')['date'].size()

pass_filter
-1    12768
 0     1276
 1      621
Name: date, dtype: int64

In [54]:
data1.groupby('pass_filter')['date'].size()

pass_filter
-1    12768
 0     1276
 1      621
Name: date, dtype: int64

In [23]:
data1[data1['bbo_improve'] == 0]['pass_filter'].unique()

array([-1], dtype=int64)

In [115]:
k1 = data2.groupby('BidApplSeqNum')['trade_qty'].sum().reset_index()
k1 = k1.rename(columns={'trade_qty':"total_qty"})
k2 = data2.drop_duplicates('BidApplSeqNum', keep='last')
k1 = pd.merge(k1, k2, on='BidApplSeqNum')
k1 = k1[k1['BidApplSeqNum'] != 0]

re = pd.merge(data1, k1, left_on='ApplSeqNum', right_on='BidApplSeqNum', how='outer')
re[re['trade_qty'].isnull()]

display(re[re['trade_qty'].isnull()].shape[0])
display(re[~re['trade_qty'].isnull()].shape[0])

11988

9466

In [119]:
import numpy as np
re['wait_time'] = np.where(~re['trade_qty'].isnull(), re['clockAtArrival_y'] - re['clockAtArrival_x'], 10000000000)
re

Unnamed: 0,skey_x,date_x,time_x,clockAtArrival_x,datetime_x,ApplSeqNum_x,order_side,order_type,order_price,order_qty,BidApplSeqNum,total_qty,skey_y,date_y,time_y,clockAtArrival_y,datetime_y,ApplSeqNum_y,trade_type,trade_flag,trade_price,trade_qty,OfferApplSeqNum,wait_time
0,2002385,20200102,91500000000,1577927700000000,2020-01-02 09:15:00.000,1,2,2,5.48,7500,,,,,,,NaT,,,,,,,1.000000e+10
1,2002385,20200102,91500000000,1577927700000000,2020-01-02 09:15:00.000,4,1,2,5.48,202000,4.0,202000.0,2002385.0,20200102.0,9.250000e+10,1.577928e+15,2020-01-02 09:25:00.000,277512.0,1.0,0.0,5.48,50950.0,234042.0,6.000000e+08
2,2002385,20200102,91500000000,1577927700000000,2020-01-02 09:15:00.000,8,1,2,5.48,912500,8.0,912500.0,2002385.0,20200102.0,9.250000e+10,1.577928e+15,2020-01-02 09:25:00.000,277685.0,1.0,0.0,5.48,59600.0,7995.0,6.000000e+08
3,2002385,20200102,91500000000,1577927700000000,2020-01-02 09:15:00.000,14,1,2,5.48,765900,14.0,765900.0,2002385.0,20200102.0,9.194274e+10,1.577928e+15,2020-01-02 09:19:42.740,173130.0,4.0,0.0,0.00,765900.0,0.0,2.827400e+08
4,2002385,20200102,91500000000,1577927700000000,2020-01-02 09:15:00.000,15,1,2,5.48,981900,15.0,981900.0,2002385.0,20200102.0,9.250000e+10,1.577928e+15,2020-01-02 09:25:00.000,277895.0,1.0,0.0,5.48,350.0,154316.0,6.000000e+08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21449,2002385,20200102,145953200000,1577948393200000,2020-01-02 14:59:53.200,17671141,1,2,5.48,300,,,,,,,NaT,,,,,,,1.000000e+10
21450,2002385,20200102,145954130000,1577948394130000,2020-01-02 14:59:54.130,17671470,1,2,5.48,7000,,,,,,,NaT,,,,,,,1.000000e+10
21451,2002385,20200102,145954140000,1577948394140000,2020-01-02 14:59:54.140,17671476,1,2,5.48,200,,,,,,,NaT,,,,,,,1.000000e+10
21452,2002385,20200102,145955180000,1577948395180000,2020-01-02 14:59:55.180,17671818,1,2,5.48,300,,,,,,,NaT,,,,,,,1.000000e+10


In [83]:
data2[data2['trade_type'] == 1]['time'].describe([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]).astype('int64')

count            8953
mean     100813481544
std       15255085448
min       92500000000
10%       92500000000
20%       93330060000
30%       93643690000
40%       93806838000
50%       94354940000
60%       94624820000
70%       94751746000
80%      102350502000
90%      131051958000
max      150000000000
Name: time, dtype: int64

In [76]:
re[re['trade_qty1']  !=  re['order_qty']]

Unnamed: 0,skey_x,date_x,time_x,clockAtArrival_x,datetime_x,ApplSeqNum_x,order_side,order_type,order_price,order_qty,skey_y,date_y,time_y,clockAtArrival_y,datetime_y,ApplSeqNum_y,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum,trade_qty1
0,2002385.0,20200102.0,9.150000e+10,1.577928e+15,2020-01-02 09:15:00,4.0,1.0,2.0,5.48,202000.0,2002385.0,20200102.0,9.250000e+10,1.577928e+15,2020-01-02 09:25:00.000,277407.0,1.0,0.0,5.48,3000.0,4.0,295.0,3000.0
1,2002385.0,20200102.0,9.150000e+10,1.577928e+15,2020-01-02 09:15:00,4.0,1.0,2.0,5.48,202000.0,2002385.0,20200102.0,9.250000e+10,1.577928e+15,2020-01-02 09:25:00.000,277408.0,1.0,0.0,5.48,100.0,4.0,196333.0,3100.0
2,2002385.0,20200102.0,9.150000e+10,1.577928e+15,2020-01-02 09:15:00,4.0,1.0,2.0,5.48,202000.0,2002385.0,20200102.0,9.250000e+10,1.577928e+15,2020-01-02 09:25:00.000,277409.0,1.0,0.0,5.48,17800.0,4.0,226344.0,20900.0
3,2002385.0,20200102.0,9.150000e+10,1.577928e+15,2020-01-02 09:15:00,4.0,1.0,2.0,5.48,202000.0,2002385.0,20200102.0,9.250000e+10,1.577928e+15,2020-01-02 09:25:00.000,277410.0,1.0,0.0,5.48,1700.0,4.0,246108.0,22600.0
4,2002385.0,20200102.0,9.150000e+10,1.577928e+15,2020-01-02 09:15:00,4.0,1.0,2.0,5.48,202000.0,2002385.0,20200102.0,9.250000e+10,1.577928e+15,2020-01-02 09:25:00.000,277411.0,1.0,0.0,5.48,300.0,4.0,136533.0,22900.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17587,,,,,NaT,,,,,,2002385.0,20200102.0,1.456524e+11,1.577948e+15,2020-01-02 14:56:52.380,17581916.0,4.0,0.0,0.00,200.0,14511644.0,0.0,200.0
17588,,,,,NaT,,,,,,2002385.0,20200102.0,1.456533e+11,1.577948e+15,2020-01-02 14:56:53.300,17583345.0,4.0,0.0,0.00,400.0,16343554.0,0.0,400.0
17589,,,,,NaT,,,,,,2002385.0,20200102.0,1.456553e+11,1.577948e+15,2020-01-02 14:56:55.280,17586452.0,4.0,0.0,0.00,100.0,13654373.0,0.0,100.0
17590,,,,,NaT,,,,,,2002385.0,20200102.0,1.456554e+11,1.577948e+15,2020-01-02 14:56:55.440,17586637.0,4.0,0.0,0.00,200.0,15279416.0,0.0,200.0


In [66]:
k2 = data1[(data1['time'] <= 94736560000) & (data1['order_side'] == 2)]
k2

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
0,2002385,20200102,91500000000,1577927700000000,2020-01-02 09:15:00.000,1,2,2,5.48,7500
40,2002385,20200102,91500000000,1577927700000000,2020-01-02 09:15:00.000,295,2,2,4.48,3000
70,2002385,20200102,91500010000,1577927700010000,2020-01-02 09:15:00.010,582,2,2,5.12,1000
102,2002385,20200102,91500020000,1577927700020000,2020-01-02 09:15:00.020,1157,2,2,5.00,100
103,2002385,20200102,91500020000,1577927700020000,2020-01-02 09:15:00.020,1158,2,2,5.06,100
...,...,...,...,...,...,...,...,...,...,...
11629,2002385,20200102,94734140000,1577929654140000,2020-01-02 09:47:34.140,3325995,2,2,5.48,4600
11633,2002385,20200102,94734810000,1577929654810000,2020-01-02 09:47:34.810,3327012,2,2,5.48,1000
11636,2002385,20200102,94735950000,1577929655950000,2020-01-02 09:47:35.950,3328815,2,2,5.48,1000
11637,2002385,20200102,94735970000,1577929655970000,2020-01-02 09:47:35.970,3328847,2,2,5.48,1000


In [37]:
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

pd.set_option('max_columns', 200)
db1 = DB("192.168.10.178", database_name, user, password)
data1 = db1.read('md_order', 20200820, 20200820)
re = data1.groupby('skey')['time'].size().sort_values()

skey
2000679       597
2002692       655
2002513       695
2002200       754
2000780       833
            ...  
2002108    268762
2000725    286165
2002400    322085
2002241    341575
2002506    412091
Name: time, Length: 2235, dtype: int64

In [43]:
re.describe()

count      2235.000000
mean      20866.561521
std       26766.769040
min         597.000000
25%        7090.000000
50%       13274.000000
75%       24758.500000
max      412091.000000
Name: time, dtype: float64

In [51]:
re[re < 20000]

skey
2000679      597
2002692      655
2002513      695
2002200      754
2000780      833
           ...  
2300296    19933
2300091    19947
2300548    19964
2300132    19966
2300850    19966
Name: time, Length: 1511, dtype: int64

In [53]:
data1 = db1.read('md_snapshot_l2', 20200820, 20200820, 2300296)
data1

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount
0,2300296,20200820,90109000000,1597885269000000,2020-08-20 09:01:09,1,0,0,0,0.000000e+00,6.58,0.0,0.00,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
1,2300296,20200820,90209000000,1597885329000000,2020-08-20 09:02:09,2,0,0,0,0.000000e+00,6.58,0.0,0.00,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
2,2300296,20200820,90309000000,1597885389000000,2020-08-20 09:03:09,3,0,0,0,0.000000e+00,6.58,0.0,0.00,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
3,2300296,20200820,90409000000,1597885449000000,2020-08-20 09:04:09,4,0,0,0,0.000000e+00,6.58,0.0,0.00,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4,2300296,20200820,90509000000,1597885509000000,2020-08-20 09:05:09,5,0,0,0,0.000000e+00,6.58,0.0,0.00,0.00,0.0,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4678,2300296,20200820,145936000000,1597906776000000,2020-08-20 14:59:36,4679,0,12479,29821188,1.949731e+08,6.58,6.5,6.63,6.47,6.5,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,6.50,6.5,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,386300,386300,122647,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4679,2300296,20200820,145945000000,1597906785000000,2020-08-20 14:59:45,4680,0,12479,29821188,1.949731e+08,6.58,6.5,6.63,6.47,6.5,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,6.50,6.5,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,411700,411700,112747,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4680,2300296,20200820,145954000000,1597906794000000,2020-08-20 14:59:54,4681,0,12479,29821188,1.949731e+08,6.58,6.5,6.63,6.47,6.5,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,6.50,6.5,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,431500,431500,99147,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4681,2300296,20200820,150003000000,1597906803000000,2020-08-20 15:00:03,4682,0,12612,30260588,1.978292e+08,6.58,6.5,6.63,6.47,6.5,6.4,6.41,6.42,6.43,6.44,6.45,6.46,6.47,6.48,6.49,6.5,6.51,6.52,6.53,6.54,6.55,6.56,6.57,6.58,6.59,208600,138700,89500,106400,70400,421900,259100,147370,204300,430097,116247,72300,135000,118400,129450,33400,35400,61588,124700,51400,82,34,24,20,21,108,78,49,82,102,17,16,15,20,14,11,14,21,31,15,1897,4200,400,1000,44000,6000,200,6800,5000,1000,200,2500,500,200,2000,1000,3000,500,1500,23300,12500,1000,8500,1000,300,11700,500,1000,2000,300,500,1000,1000,500,2000,2000,30000,200,1300,1000,1000,2000,2000,1900,5000,600,2000,3000,6500,100,4647,40600,300,5500,12300,400,10000,3000,500,1200,3000,17200,100,10000,4300,200,3000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3294767,6609335,6.37,6.89,0,0,0,0,0,0,0,0,0.0,0,0,0.0


In [11]:
kk = pd.read_csv(r'\\192.168.10.34\trading\dailyRawData\20200921\logs_20200921_zs_96_03_day_pcap\mdTradePcap_SZ_20200921_0900.csv')

In [12]:
kk[kk['ID'] == 2000001]

Unnamed: 0,clockAtArrival,sequenceNo,ID,time,ApplSeqNum,ExecType,TradePrice,TradeQty,TradeMoney,BidApplSeqNum,OfferApplSeqNum
142,1600650918406443,694810,2000001,91500050,3038,4,0,100,0,3035,0
143,1600650918406443,694812,2000001,91500050,3040,4,0,100,0,0,3039
537,1600650919690807,975430,2000001,91500910,56888,4,0,100,0,0,56887
728,1600650920069260,1056449,2000001,91501240,72312,4,0,100,0,23337,0
829,1600650920279388,1100163,2000001,91501460,81280,4,0,100,0,80790,0
...,...,...,...,...,...,...,...,...,...,...,...
47632133,1600671618850303,238876953,2000001,150000000,24371732,F,158600,500,79300000,24359833,24369801
47632134,1600671618850303,238876954,2000001,150000000,24371733,F,158600,200,31720000,24362569,24369801
47632135,1600671618850303,238876955,2000001,150000000,24371734,F,158600,1216,192857600,24364428,24369801
47632136,1600671618850303,238876956,2000001,150000000,24371735,F,158600,784,124342400,24364428,23943565


In [4]:
data1['max_time'] = data1.groupby('skey')['time'].transform('max')
data1[data1['max_time'] == 145312630000]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum,max_time
2524818,1600265,20200922,93003080000,1600738203080000,2020-09-22 09:30:03.080,34624,1,2,20.62,500,116743,196610,145312630000
2524819,1600265,20200922,93006060000,1600738206060000,2020-09-22 09:30:06.060,44254,1,1,20.66,400,226493,35248,145312630000
2524820,1600265,20200922,93006060000,1600738206060000,2020-09-22 09:30:06.060,44255,1,1,20.66,500,226493,35950,145312630000
2524821,1600265,20200922,93354210000,1600738434210000,2020-09-22 09:33:54.210,194260,1,2,20.63,500,408626,500404,145312630000
2524822,1600265,20200922,93354210000,1600738434210000,2020-09-22 09:33:54.210,194261,1,2,20.63,2000,462683,500404,145312630000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2524902,1600265,20200922,145151830000,1600757511830000,2020-09-22 14:51:51.830,2718921,1,2,20.67,68,4634452,4767267,145312630000
2524903,1600265,20200922,145151830000,1600757511830000,2020-09-22 14:51:51.830,2718922,1,2,20.66,432,2090019,4767267,145312630000
2524904,1600265,20200922,145221710000,1600757541710000,2020-09-22 14:52:21.710,2726914,1,2,20.66,500,2090019,4779229,145312630000
2524905,1600265,20200922,145244330000,1600757564330000,2020-09-22 14:52:44.330,2731997,1,2,20.66,400,2090019,4787920,145312630000


In [30]:
(set(data[data['skey'] == 2300088]['BidApplSeqNum'].unique()) | set(data[data['skey'] == 2300088]['OfferApplSeqNum'].unique())) - \
set(data1[data1['skey'] == 2300088]['ApplSeqNum'].unique())

{0}

In [32]:
data[data['skey'] == 2300088]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
0,2300088,20200824,91500400000,1598231700400000,2020-08-24 09:15:00.400,25557,4,0,0.00,2500,0,13611
1,2300088,20200824,91501640000,1598231701640000,2020-08-24 09:15:01.640,82723,4,0,0.00,1000,60831,0
2,2300088,20200824,91501690000,1598231701690000,2020-08-24 09:15:01.690,84932,4,0,0.00,1500,0,82873
3,2300088,20200824,91502490000,1598231702490000,2020-08-24 09:15:02.490,106544,4,0,0.00,1500,82340,0
4,2300088,20200824,91502510000,1598231702510000,2020-08-24 09:15:02.510,107014,4,0,0.00,500,85868,0
...,...,...,...,...,...,...,...,...,...,...,...,...
129274,2300088,20200824,150000000000,1598252400000000,2020-08-24 15:00:00.000,21965158,1,0,11.89,1000,21917658,21856241
129275,2300088,20200824,150000000000,1598252400000000,2020-08-24 15:00:00.000,21965159,1,0,11.89,300,21917658,21856389
129276,2300088,20200824,150000000000,1598252400000000,2020-08-24 15:00:00.000,21965160,1,0,11.89,1700,21917703,21856389
129277,2300088,20200824,150000000000,1598252400000000,2020-08-24 15:00:00.000,21965161,1,0,11.89,500,21917799,21856389


In [33]:
data1[data1['skey'] == 2300088]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
0,2300088,20200824,91500010000,1598231700010000,2020-08-24 09:15:00.010,1526,1,2,11.31,100
1,2300088,20200824,91500030000,1598231700030000,2020-08-24 09:15:00.030,1743,2,2,12.85,600
2,2300088,20200824,91500030000,1598231700030000,2020-08-24 09:15:00.030,1866,2,2,13.00,100
3,2300088,20200824,91500030000,1598231700030000,2020-08-24 09:15:00.030,1889,2,2,12.98,1400
4,2300088,20200824,91500030000,1598231700030000,2020-08-24 09:15:00.030,1923,2,2,13.50,12000
...,...,...,...,...,...,...,...,...,...,...
147484,2300088,20200824,145958930000,1598252398930000,2020-08-24 14:59:58.930,21918727,2,2,12.02,1000
147485,2300088,20200824,145959120000,1598252399120000,2020-08-24 14:59:59.120,21918899,2,2,11.88,2000
147486,2300088,20200824,145959430000,1598252399430000,2020-08-24 14:59:59.430,21919130,2,2,11.79,7400
147487,2300088,20200824,145959600000,1598252399600000,2020-08-24 14:59:59.600,21919235,2,2,11.84,1000


In [21]:
kk[(kk['ID'] == 1000016) & (kk['time'] >= 145900000)].head(20)

Unnamed: 0,clockAtArrival,sequenceNo,ID,time,cum_volume,cum_amount,close,open,prevClose
691077,1600325943644492,46748979,1000016,145900810,23113348,638786687440000,32624513,32861485,32955836
691293,1600325948592781,46757132,1000016,145905780,23113348,638786687440000,32624513,32861485,32955836
691502,1600325953554871,46765913,1000016,145910480,23113348,638786687440000,32624513,32861485,32955836
691719,1600325958651908,46774437,1000016,145915850,23113348,638786687440000,32624513,32861485,32955836
691937,1600325963624995,46782665,1000016,145920580,23113348,638786687440000,32624513,32861485,32955836
692124,1600325966601673,46787883,1000016,145920580,23113348,638786687440000,32624513,32861485,32955836
692148,1600325968595206,46791515,1000016,145925430,23113348,638786687440000,32624513,32861485,32955836
692372,1600325973557492,46799020,1000016,145930720,23113348,638786687440000,32624513,32861485,32955836
692592,1600325978653830,46807654,1000016,145935710,23113348,638786687440000,32624513,32861485,32955836
692802,1600325983620301,46816529,1000016,145940430,23113348,638786687440000,32624513,32861485,32955836


In [20]:
pd.concat([data[(data['OfferApplSeqNum'] == 6105809) & (data['skey'] == 2300882)],
           data[(data['OfferApplSeqNum'] == 8505225) & (data['skey'] == 2300883)],
           data[(data['OfferApplSeqNum'] == 8603913) & (data['skey'] == 2300882)],
           data[(data['OfferApplSeqNum'] == 9492008) & (data['skey'] == 2300882)],
           data[(data['OfferApplSeqNum'] == 8568511) & (data['skey'] == 2300885)],
           data[(data['OfferApplSeqNum'] == 10194961) & (data['skey'] == 2300883)],
           data[(data['OfferApplSeqNum'] == 12879098) & (data['skey'] == 2300883)],
           data[(data['OfferApplSeqNum'] == 12921318) & (data['skey'] == 2300883)],
           data[(data['OfferApplSeqNum'] == 13724717) & (data['skey'] == 2300883)],
           data[(data['OfferApplSeqNum'] == 16132262) & (data['skey'] == 2300883)]])

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
11567,2300882,20200915,95203990000,1600134723990000,2020-09-15 09:52:03.990,6273101,4,0,0.0,2000,0,6105809
110126,2300883,20200915,102851710000,1600136931710000,2020-09-15 10:28:51.710,10484766,4,0,0.0,200,0,8505225
18803,2300882,20200915,101028300000,1600135828300000,2020-09-15 10:10:28.300,8658712,4,0,0.0,4400,0,8603913
20767,2300882,20200915,102115200000,1600136475200000,2020-09-15 10:21:15.200,9791613,4,0,0.0,2500,0,9492008
188425,2300885,20200915,101626480000,1600136186480000,2020-09-15 10:16:26.480,8620501,4,0,0.0,100,0,8568511
134714,2300883,20200915,105728330000,1600138648330000,2020-09-15 10:57:28.330,12899365,4,0,0.0,700,0,12879098
134997,2300883,20200915,105838300000,1600138718300000,2020-09-15 10:58:38.300,12971831,4,0,0.0,700,0,12921318
151512,2300883,20200915,133816690000,1600148296690000,2020-09-15 13:38:16.690,17944352,4,0,0.0,100,0,13724717
154173,2300883,20200915,135108950000,1600149068950000,2020-09-15 13:51:08.950,18981628,4,0,0.0,4900,0,16132262


In [31]:
pd.concat([data[(data['ApplSeqNum'] == 6105809) & (data['skey'] == 2300882)],
           data[(data['ApplSeqNum'] == 8505225) & (data['skey'] == 2300883)],
           data[(data['ApplSeqNum'] == 8603913) & (data['skey'] == 2300882)],
           data[(data['ApplSeqNum'] == 9492008) & (data['skey'] == 2300882)],
           data[(data['ApplSeqNum'] == 8568511) & (data['skey'] == 2300885)],
           data[(data['ApplSeqNum'] == 10194961) & (data['skey'] == 2300883)],
           data[(data['ApplSeqNum'] == 12879098) & (data['skey'] == 2300883)],
           data[(data['ApplSeqNum'] == 12921318) & (data['skey'] == 2300883)],
           data[(data['ApplSeqNum'] == 13724717) & (data['skey'] == 2300883)],
           data[(data['ApplSeqNum'] == 16132262) & (data['skey'] == 2300883)]])

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
49852152,2300882,20200915,95105520000,1600134665520000,2020-09-15 09:51:05.520,6105809,2,2,500000.0,2000
49952920,2300883,20200915,100859150000,1600135739150000,2020-09-15 10:08:59.150,8505225,2,2,999999.0,200
49860280,2300882,20200915,101002660000,1600135802660000,2020-09-15 10:10:02.660,8603913,2,2,100000000.0,4400
49862228,2300882,20200915,101834500000,1600136314500000,2020-09-15 10:18:34.500,9492008,2,2,999999.0,2500
50045287,2300885,20200915,101559340000,1600136159340000,2020-09-15 10:15:59.340,8568511,2,2,460000.0,100
49957639,2300883,20200915,102519500000,1600136719500000,2020-09-15 10:25:19.500,10194961,2,2,888888.0,100
49985985,2300883,20200915,105712330000,1600138632330000,2020-09-15 10:57:12.330,12879098,2,2,888888.0,700
49986218,2300883,20200915,105749100000,1600138669100000,2020-09-15 10:57:49.100,12921318,2,2,888888.0,700
49990228,2300883,20200915,111037920000,1600139437920000,2020-09-15 11:10:37.920,13724717,2,2,999999.0,100
50000209,2300883,20200915,131413150000,1600146853150000,2020-09-15 13:14:13.150,16132262,2,2,900000.0,4900


In [1]:
import pandas as pd
data = pd.read_csv(r'F:\data\mdTradeLog_20200915_0844.csv')

  interactivity=interactivity, compiler=compiler, result=result)


In [2]:
pd.concat([data[(data['OfferApplSeqNum'] == 6105809) & (data['SecurityID'] == 300882)],
           data[(data['OfferApplSeqNum'] == 8505225) & (data['SecurityID'] == 300883)],
           data[(data['OfferApplSeqNum'] == 8603913) & (data['SecurityID'] == 300882)],
           data[(data['OfferApplSeqNum'] == 9492008) & (data['SecurityID'] == 300882)],
           data[(data['OfferApplSeqNum'] == 8568511) & (data['SecurityID'] == 300885)],
           data[(data['OfferApplSeqNum'] == 10194961) & (data['SecurityID'] == 300883)],
           data[(data['OfferApplSeqNum'] == 12879098) & (data['SecurityID'] == 300883)],
           data[(data['OfferApplSeqNum'] == 12921318) & (data['SecurityID'] == 300883)],
           data[(data['OfferApplSeqNum'] == 13724717) & (data['SecurityID'] == 300883)],
           data[(data['OfferApplSeqNum'] == 16132262) & (data['SecurityID'] == 300883)]])

Unnamed: 0,clockAtArrival,sequenceNo,exchId,securityType,__isRepeated,TransactTime,ChannelNo,ApplSeqNum,SecurityID,secid,mdSource,ExecType,TradeBSFlag,__origTickSeq,TradePrice,TradeQty,TradeMoney,BidApplSeqNum,OfferApplSeqNum
14155688,1600134776576415,41531007,2,1,0,95203990,2011,6273101,300882,2300882,12,4,N,-1,0,2000,0,0,6105809
25786435,1600136984318427,75274995,2,1,0,102851710,2011,10484766,300883,2300883,12,4,N,-1,0,200,0,0,8505225
20676122,1600135880908998,60167986,2,1,0,101028300,2011,8658712,300882,2300882,12,4,N,-1,0,4400,0,0,8603913
23894868,1600136527804621,69522893,2,1,0,102115200,2011,9791613,300882,2300882,12,4,N,-1,0,2500,0,0,9492008
22442436,1600136239089005,65342560,2,1,0,101626480,2013,8620501,300885,2300885,12,4,N,-1,0,100,0,0,8568511
32229831,1600138700957730,95201689,2,1,0,105728330,2011,12899365,300883,2300883,12,4,N,-1,0,700,0,0,12879098
32427718,1600138770928929,95871281,2,1,0,105838300,2011,12971831,300883,2300883,12,4,N,-1,0,700,0,0,12921318
46187593,1600148349419384,144415160,2,1,0,133816690,2011,17944352,300883,2300883,12,4,N,-1,0,100,0,0,13724717
49142335,1600149121686304,153361827,2,1,0,135108950,2011,18981628,300883,2300883,12,4,N,-1,0,4900,0,0,16132262


In [11]:
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

pd.set_option('max_columns', 200)
db1 = DB("192.168.10.178", database_name, user, password)
dd = db1.read('md_snapshot_l2', start_date=20200813, end_date=20200813, symbol=[2000002])
dd.tail(100)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount
4838,2000002,20200813,145309000000,1597301589000000,2020-08-13 14:53:09,4839,0,70073,75280989,2152427000.0,28.71,28.83,29.06,28.31,28.6,28.51,28.52,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,30379,10400,22300,10200,81020,69200,49800,95238,166043,75000,88800,65700,33500,52630,97700,42900,20300,48200,49100,96000,26,17,19,23,84,55,49,67,60,24,6,47,41,41,75,44,13,49,31,100,3600,100,3400,1000,1200,400,200,100,20400,1800,11500,20000,100,200,300,200,1400,200,100,1000,1700,200,5100,800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,87500,300,300,100,300,300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5565592,18946418,27.99,29.72,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4839,2000002,20200813,145312000000,1597301592000000,2020-08-13 14:53:12,4840,0,70083,75287389,2152610000.0,28.71,28.83,29.06,28.31,28.61,28.51,28.52,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,30379,10400,22300,10200,80220,64200,48900,94938,189843,76800,87300,65400,33500,53230,97200,42900,20300,48200,49100,96100,26,17,19,23,83,54,47,66,64,29,7,47,41,42,74,44,13,49,31,101,2700,1200,400,200,100,20400,1800,11500,20000,100,200,300,200,1400,200,100,1000,1700,200,5100,800,100,1500,1000,1700,500,200,400,1800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85500,300,300,100,300,300,500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5575992,18945218,28.0,29.72,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4840,2000002,20200813,145315000000,1597301595000000,2020-08-13 14:53:15,4841,0,70099,75311289,2153293000.0,28.71,28.83,29.06,28.31,28.6,28.51,28.52,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,30379,10400,22300,10000,79820,63200,48900,94538,132343,61900,87100,65500,34100,55730,97200,43300,20300,48200,49100,96300,26,17,19,22,82,54,47,65,64,33,10,48,42,44,74,45,13,49,31,102,2200,1800,11500,20000,100,200,300,200,1400,200,100,1000,1700,200,5100,800,100,1500,1000,1700,500,200,400,1800,500,100,2500,700,800,1300,900,200,900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,84400,300,300,100,300,300,500,500,300,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5497592,18945818,27.99,29.72,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4841,2000002,20200813,145318000000,1597301598000000,2020-08-13 14:53:18,4842,0,70120,75403389,2155928000.0,28.71,28.83,29.06,28.31,28.61,28.52,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,28.71,10400,22300,10000,79520,61200,47900,94138,136943,80100,14600,65500,34300,55730,97200,43300,20300,48200,49100,96300,30700,17,19,22,80,52,46,64,64,42,4,48,43,44,74,45,13,49,31,102,32,13800,200,100,500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2800,1000,400,100,100,100,9000,500,4000,8200,300,300,1500,9000,1200,200,200,300,2000,100,4000,1500,300,5000,200,500,500,100,100,1000,100,1500,400,400,100,300,500,400,100,1000,600,2000,600,100,500,1900,400,100,0,0,5522192,18858918,27.99,29.72,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4842,2000002,20200813,145321000000,1597301601000000,2020-08-13 14:53:21,4843,0,70138,75453189,2157353000.0,28.71,28.83,29.06,28.31,28.61,28.52,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,28.71,10400,22300,10000,78520,61100,48500,86638,127043,82700,20000,65500,34300,56430,101000,43300,24700,48200,49100,96300,30700,17,19,22,79,51,47,63,63,45,5,48,43,45,75,45,14,49,31,102,32,10300,3300,700,5000,700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2800,1000,400,100,100,100,9000,500,4000,8200,300,300,1500,9000,1200,200,200,300,2000,100,4000,1500,300,5000,200,500,500,100,100,1000,100,1500,400,400,100,300,500,400,100,1000,600,2000,600,100,500,1900,400,100,0,0,5505292,18863818,27.99,29.72,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4843,2000002,20200813,145324000000,1597301604000000,2020-08-13 14:53:24,4844,0,70163,75496889,2158603000.0,28.71,28.83,29.06,28.31,28.61,28.52,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,28.71,10400,22100,9800,78120,60800,48500,86938,124243,146400,35500,32700,34000,56430,98300,43300,24700,51100,49100,96300,26400,17,18,21,78,50,47,64,59,55,11,35,42,45,75,45,14,50,31,102,31,1700,700,5000,700,19200,100,1000,1700,1400,1100,2900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5500,200,200,300,2000,100,4000,1500,300,5000,200,500,500,100,100,1000,100,1500,400,400,100,300,500,400,100,1000,600,2000,600,100,500,1900,400,100,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5577892,18825618,28.0,29.73,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4844,2000002,20200813,145327000000,1597301607000000,2020-08-13 14:53:27,4845,0,70173,75503889,2158804000.0,28.71,28.83,29.06,28.31,28.61,28.52,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,28.71,10400,22100,9800,76720,60800,48500,70338,122443,148400,33300,31400,34400,56430,96400,43300,24700,51100,49100,95300,31400,17,18,21,76,50,47,59,59,58,14,38,42,45,75,45,14,50,31,101,32,2500,700,19200,100,1000,1700,1400,1100,2900,500,1000,600,400,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3400,200,200,300,2000,100,4000,1500,300,5000,200,500,500,100,100,1000,100,1500,400,400,100,300,500,400,100,1000,600,2000,600,100,500,1900,400,100,200,300,100,400,0,0,0,0,0,0,0,0,0,0,0,0,5556292,18819818,28.0,29.73,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4845,2000002,20200813,145330000000,1597301610000000,2020-08-13 14:53:30,4846,0,70221,75541989,2159894000.0,28.71,28.83,29.06,28.31,28.62,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,28.71,28.72,22100,9800,74720,60800,44600,68338,121943,161700,53700,43000,34400,56930,96000,43300,24700,51100,49100,96000,31400,39400,18,21,74,50,45,59,58,59,20,1,42,46,74,45,14,50,31,103,32,15,43000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,2000,400,2000,300,5000,2000,300,100,700,400,1500,500,1000,500,1000,500,100,200,100,800,300,1000,100,400,1000,400,1000,500,100,400,400,500,400,400,300,500,800,4000,600,200,700,0,0,0,0,0,0,0,0,5623192,18789218,28.0,29.73,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4846,2000002,20200813,145333000000,1597301613000000,2020-08-13 14:53:33,4847,0,70231,75564289,2160532000.0,28.71,28.83,29.06,28.31,28.62,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,28.71,28.72,22100,9800,71320,60800,44500,66838,119343,161500,56000,21700,35400,56930,96000,43300,24700,201100,49100,96000,31400,39400,18,21,72,50,44,57,55,60,24,2,45,46,74,45,14,51,31,103,32,15,20700,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,2000,400,2000,300,5000,2000,300,100,700,400,1500,500,1000,500,1000,500,100,200,100,800,300,1000,100,400,1000,400,1000,500,100,400,400,500,400,400,300,500,800,4000,600,200,700,300,200,500,0,0,0,0,0,5589992,18926918,28.0,29.72,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4847,2000002,20200813,145336000000,1597301616000000,2020-08-13 14:53:36,4848,0,70240,75577889,2160922000.0,28.71,28.83,29.06,28.31,28.63,28.53,28.54,28.55,28.56,28.57,28.58,28.59,28.6,28.61,28.62,28.63,28.64,28.65,28.66,28.67,28.68,28.69,28.7,28.71,28.72,22100,9800,71320,60800,48500,58638,117043,159000,59000,25100,21500,56230,96000,43300,24700,201100,49100,96000,31400,39400,18,21,72,50,45,56,52,59,27,10,40,45,74,45,14,51,31,103,32,15,17300,1000,100,1000,1300,1300,1000,200,1800,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,2000,300,100,700,400,1500,500,1000,500,1000,500,100,200,100,800,300,1000,100,400,1000,400,1000,500,100,400,400,500,400,400,300,500,800,600,200,700,300,200,500,300,0,0,0,0,0,0,0,0,0,0,5585892,18904818,28.0,29.72,0,0,0,0,0,0,0,0,0.0,0,0,0.0


In [71]:
dd1 = pd.read_csv(r'A:\KR_daily_data\20200813\SZ\order\000002.csv')
dd1

Unnamed: 0,OrderQty,OrdType,TransactTime,ExpirationDays,Side,ApplSeqNum,Contactor,SendingTime,Price,ChannelNo,ExpirationType,ContactInfo,ConfirmID
0,300,2,20200813091500000,0,2,102,,20200813091500000,29.00,2014,0,,
1,300,2,20200813091500010,0,2,136,,20200813091500000,30.69,2014,0,,
2,100,2,20200813091500020,0,2,249,,20200813091500000,29.26,2014,0,,
3,500,2,20200813091500030,0,2,292,,20200813091500000,30.15,2014,0,,
4,200,2,20200813091500030,0,2,351,,20200813091500000,30.59,2014,0,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
117174,100,2,20200813145958870,0,1,22331275,,20200813145958000,28.62,2014,0,,
117175,2400,2,20200813145959040,0,2,22331387,,20200813145959000,25.84,2014,0,,
117176,1500,2,20200813145959110,0,2,22331451,,20200813145959000,28.60,2014,0,,
117177,91100,2,20200813145959400,0,2,22331644,,20200813145959000,28.54,2014,0,,


In [75]:
import datetime
datetime.datetime.fromtimestamp(1597301999660000/1e6).strftime("%Y-%m-%d %H:%M:%S %f")

'2020-08-13 14:59:59 660000'

In [155]:
for cols in oo.columns[oo.columns != 'datetime']:
    try:
        assert(oo[oo[cols] < 0].shape[0] == 0)
    except:
        print(cols)

time


In [116]:
        thisDate_str = '20191230'
        mdOrderLog = db1.read('md_order', start_date=thisDate_str, end_date=thisDate_str)
        mdTradeLog = db1.read('md_trade', start_date=thisDate_str, end_date=thisDate_str)
        re = mdTradeLog.groupby('skey')['date'].count().reset_index().sort_values(by='date', ascending=False)
        re = re.rename(columns={"date": "count"})
        re1 = mdOrderLog.groupby('skey')['date'].count().reset_index().sort_values(by='date', ascending=False)
        re1 = re1.rename(columns={'date': "count1"})
        re = pd.merge(re, re1, on='skey')
        re['cc'] = re['count'] + re['count1']
        re = re.sort_values(by='cc', ascending=False)
        test_list = re['skey'].values

In [124]:
len(test_list) - 1600

581

In [125]:
test_list[:581]

array([2000725, 2300059, 2002185, 2002797, 2000858, 2002079, 2002600,
       2002261, 2000063, 2002351, 2002456, 2000009, 2002291, 2000413,
       2000927, 2000002, 2002475, 2000955, 2000568, 2002241, 2000776,
       2300033, 2300088, 2000100, 2002939, 2002463, 2002714, 2000750,
       2300014, 2002460, 2002157, 2300346, 2002152, 2002655, 2000001,
       2002466, 2002273, 2000021, 2000651, 2002415, 2000333, 2002510,
       2002131, 2000723, 2002129, 2000977, 2000818, 2300136, 2002681,
       2000878, 2002945, 2300251, 2000783, 2300303, 2000066, 2002926,
       2002966, 2002230, 2300429, 2300750, 2000166, 2002436, 2300433,
       2002065, 2002156, 2002236, 2000425, 2000563, 2002048, 2300456,
       2002027, 2002340, 2300315, 2300073, 2002385, 2000040, 2002036,
       2300803, 2300296, 2000686, 2002673, 2002384, 2002049, 2300567,
       2000401, 2002733, 2300498, 2000823, 2000673, 2002555, 2000338,
       2002304, 2002428, 2300450, 2002547, 2002969, 2000876, 2000807,
       2002552, 2002

In [47]:
oo

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,cum_volume,cum_amount,close,bid30p,...,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels
0,2000006,20190102,-1989280512,1546392300000000,2019-01-02 09:25:00.000,113130,3300,17094.00,5.18,0.00,...,0,0,141600,364900,4.878750,5.571362,40,120,27,43
1,2000006,20190102,-1489280512,1546392600000000,2019-01-02 09:30:00.000,131232,3300,17094.00,5.18,0.00,...,0,0,151600,364900,4.895323,5.571362,41,120,27,43
2,2000006,20190102,-1489270512,1546392600010000,2019-01-02 09:30:00.010,131364,3300,17094.00,5.18,0.00,...,0,0,151800,364900,4.895711,5.571362,42,120,28,43
3,2000006,20190102,-1489240512,1546392600040000,2019-01-02 09:30:00.040,133151,3300,17094.00,5.18,0.00,...,0,0,151800,365300,4.895711,5.570955,42,121,28,44
4,2000006,20190102,-1489180512,1546392600100000,2019-01-02 09:30:00.100,137729,3300,17094.00,5.18,0.00,...,0,0,153300,365300,4.897123,5.570955,43,121,29,44
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1510589,2000006,20190807,145622090000,1565160982090000,2019-08-07 14:56:22.090,12030055,3433100,16926256.01,4.91,4.51,...,0,0,536300,2007237,4.811149,5.215554,157,286,48,73
1510590,2000006,20190807,145633380000,1565160993380000,2019-08-07 14:56:33.380,12044157,3434137,16931347.68,4.91,4.52,...,0,0,545263,2006200,4.812774,5.215712,158,285,48,73
1510591,2000006,20190807,145642150000,1565161002150000,2019-08-07 14:56:42.150,12054856,3434137,16931347.68,4.91,4.52,...,0,0,545263,2010700,4.812774,5.215140,158,286,48,73
1510592,2000006,20190807,145642520000,1565161002520000,2019-08-07 14:56:42.520,12055329,3434137,16931347.68,4.91,4.52,...,0,0,546163,2010700,4.812934,5.215140,159,286,48,73


In [61]:
pd.set_option('max_rows', 400)
oo.dtypes

skey                           int32
date                           int32
time                           int64
clockAtArrival                 int64
datetime              datetime64[ns]
ApplSeqNum                     int32
cum_volume                     int64
cum_amount                   float64
close                        float64
bid30p                       float64
bid29p                       float64
bid28p                       float64
bid27p                       float64
bid26p                       float64
bid25p                       float64
bid24p                       float64
bid23p                       float64
bid22p                       float64
bid21p                       float64
bid20p                       float64
bid19p                       float64
bid18p                       float64
bid17p                       float64
bid16p                       float64
bid15p                       float64
bid14p                       float64
bid13p                       float64
b

In [58]:
oo[oo['time'] < 0]['datetime'].astype(str).apply(lambda x: int((x.split(' ')[1].replace(':', "")).replace(".", ""))).astype('int64') * 1000

4550      92500000000
4551      93000000000
4552      93000000000
4553      93000000000
4554      93000010000
             ...     
12563    145600540000
12564    145604750000
12565    145606350000
12566    145626560000
12567    145653430000
Name: datetime, Length: 4831, dtype: int64

In [60]:
oo[oo['time'] < 0]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,cum_volume,cum_amount,close,bid30p,...,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels


In [7]:
startDate = 20190906
endDate = 20190906
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
db.read('md_snapshot_mbd', start_date=startDate, end_date=endDate, symbol=[2000725])


Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,cum_volume,cum_amount,close,bid30p,...,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels
0,2000725,20190906,92500000000,1567733100000000,2019-09-06 09:25:00.000,268476,10212500,3.952238e+07,3.87,3.57,...,0,0,35042400,76272356,3.773194,4.046316,3536,8103,41,36
1,2000725,20190906,93000000000,1567733400000000,2019-09-06 09:30:00.000,297930,10212500,3.952238e+07,3.87,3.57,...,0,0,35542400,76272356,3.774274,4.046316,3537,8103,41,36
2,2000725,20190906,93000000000,1567733400000000,2019-09-06 09:30:00.000,297944,10212500,3.952238e+07,3.87,3.57,...,0,0,35542400,76322356,3.774274,4.046221,3537,8104,41,36
3,2000725,20190906,93000000000,1567733400000000,2019-09-06 09:30:00.000,298451,10389500,4.020736e+07,3.87,3.57,...,0,0,35542400,76322356,3.774274,4.046221,3537,8097,41,36
4,2000725,20190906,93000010000,1567733400010000,2019-09-06 09:30:00.010,298800,10389600,4.020775e+07,3.87,3.57,...,0,0,35542400,76145356,3.774274,4.046630,3537,8097,41,36
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
336402,2000725,20190906,145654930000,1567753014930000,2019-09-06 14:56:54.930,16219356,956417744,3.717933e+09,3.91,3.62,...,1000,900,102561134,267375645,3.828454,4.035447,15174,39318,46,31
336403,2000725,20190906,145654960000,1567753014960000,2019-09-06 14:56:54.960,16219394,956417744,3.717933e+09,3.91,3.62,...,1000,900,102561134,267367845,3.828454,4.035450,15174,39317,46,31
336404,2000725,20190906,145654970000,1567753014970000,2019-09-06 14:56:54.970,16219397,956418344,3.717935e+09,3.91,3.62,...,1000,900,102561134,267367845,3.828454,4.035450,15174,39317,46,31
336405,2000725,20190906,145654990000,1567753014990000,2019-09-06 14:56:54.990,16219425,956418344,3.717935e+09,3.91,3.62,...,1000,900,102560534,267370845,3.828453,4.035449,15174,39318,46,31


In [6]:
order[(order['skey'] == 1000016) & (order['cum_volume'] != 0)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,cum_volume,cum_amount,open,close
2399,1000016,20200102,92517000000,1577928317000000,2020-01-02 09:25:17,2400,688011,1.731303e+09,3073.9313,3073.9313
2400,1000016,20200102,92518000000,1577928318000000,2020-01-02 09:25:18,2401,688011,1.731303e+09,3073.9313,3073.9313
2401,1000016,20200102,92519000000,1577928319000000,2020-01-02 09:25:19,2402,688011,1.731303e+09,3073.9313,3073.9313
2402,1000016,20200102,92520000000,1577928320000000,2020-01-02 09:25:20,2403,688011,1.731303e+09,3073.9313,3073.9313
2403,1000016,20200102,92521000000,1577928321000000,2020-01-02 09:25:21,2404,688011,1.731303e+09,3073.9313,3073.9313
...,...,...,...,...,...,...,...,...,...,...
26215,1000016,20200102,155918000000,1577951958000000,2020-01-02 15:59:18,26216,50036392,9.477373e+10,3073.9313,3090.8331
26216,1000016,20200102,155919000000,1577951959000000,2020-01-02 15:59:19,26217,50036392,9.477373e+10,3073.9313,3090.8331
26217,1000016,20200102,155920000000,1577951960000000,2020-01-02 15:59:20,26218,50036392,9.477373e+10,3073.9313,3090.8331
26218,1000016,20200102,155921000000,1577951961000000,2020-01-02 15:59:21,26219,50036392,9.477373e+10,3073.9313,3090.8331


In [4]:
t1 = trade.groupby('skey')['BidApplSeqNum'].unique().reset_index()
t2 = trade.groupby('skey')['OfferApplSeqNum'].unique().reset_index()
t3 = order.groupby('skey')['ApplSeqNum'].unique().reset_index()
t = pd.merge(t1, t2, on='skey', how='outer')
display(t[(t['BidApplSeqNum'].isnull()) | (t['OfferApplSeqNum'].isnull())])
t['union'] = [list(set(a) | set(b)) for a, b in zip(t.BidApplSeqNum, t.OfferApplSeqNum)]
t = pd.merge(t, t3, on='skey', how='outer')
display(t[(t['BidApplSeqNum'].isnull()) | (t['OfferApplSeqNum'].isnull()) | (t['ApplSeqNum'].isnull())])
t['less'] = [len(set(a) - set(b)) for a, b in zip(t.union, t.ApplSeqNum)]
t[t['less'] > 1]

Unnamed: 0,skey,BidApplSeqNum,OfferApplSeqNum


Unnamed: 0,skey,BidApplSeqNum,OfferApplSeqNum,union,ApplSeqNum


Unnamed: 0,skey,BidApplSeqNum,OfferApplSeqNum,union,ApplSeqNum,less
0,2000001,"[0, 74980, 90334, 44360, 92313, 92479, 95476, ...","[14212, 15972, 28633, 0, 41597, 88737, 96980, ...","[0, 3407876, 2359301, 7077894, 4849671, 196608...","[649, 731, 768, 959, 960, 961, 991, 1003, 1004...",64275
1,2000002,"[3569, 27558, 0, 49540, 101729, 103335, 86408,...","[0, 13927, 76500, 4683, 65468, 58440, 41960, 7...","[0, 720896, 3670017, 983052, 851987, 851989, 5...","[309, 310, 327, 509, 518, 645, 1119, 1183, 123...",21489
2,2000004,"[94087, 94522, 0, 97964, 99489, 42266, 11799, ...","[0, 49102, 99289, 18427, 18422, 97901, 7767, 1...","[0, 958464, 2342916, 3014670, 6897678, 7069712...","[360, 2301, 2322, 2959, 3180, 3215, 7767, 9380...",3180
3,2000005,"[0, 71510, 102045, 3325, 3326, 3327, 42540, 77...","[41795, 43886, 14273, 63807, 112409, 169464, 0...","[0, 3506178, 5054467, 6225923, 3997703, 334234...","[119, 406, 680, 2849, 2882, 3325, 3326, 3327, ...",3619
4,2000006,"[0, 3998, 128627, 128970, 120404, 129602, 1285...","[115337, 0, 136066, 156344, 171607, 171716, 16...","[0, 4456448, 8093696, 4030467, 8077312, 458752...","[1203, 1204, 1205, 1206, 1208, 1209, 1210, 121...",6720
...,...,...,...,...,...,...
1804,2300670,"[16976, 0, 46923, 99099, 115324, 78948, 132875...","[0, 59527, 48747, 87585, 62353, 107155, 99333,...","[0, 1310720, 7798784, 524298, 5046283, 8060940...","[1137, 1341, 1455, 1465, 1494, 2333, 2995, 326...",21006
1805,2300671,"[227, 3597, 979, 29142, 52757, 16469, 36594, 3...","[0, 88140, 3963, 156118, 34597, 163900, 22007,...","[0, 1, 2, 233474, 204800, 3, 980995, 1570818, ...","[1, 2, 3, 8, 10, 11, 13, 14, 15, 17, 19, 22, 2...",469
1806,2300672,"[1237, 22963, 24540, 9486, 41701, 73641, 84583...","[0, 48940, 241469, 1913603, 2893906, 4555174, ...","[94208, 1, 0, 8, 24585, 151561, 11, 12, 13, 12...","[1, 2, 4, 8, 11, 12, 13, 23, 24, 27, 31, 40, 4...",461
1807,2300673,"[177, 909, 65890, 74401, 98938, 60, 38, 22, 0,...","[0, 45508, 45440, 12455, 95773, 120109, 123304...","[0, 1, 2050, 16384, 3, 5, 6, 14343, 8, 9, 11, ...","[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16...",204


In [7]:
import numpy as np
import glob
import os
startTm = datetime.datetime.now()
readPath = 'A:\\day_stock\\***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

  import sys


0:03:54.931921


In [33]:
startDate = 20200525
endDate = 20200525
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"
db1 = DB("192.168.10.223", database_name, user, password)
OrderLog1 = db1.read('md_order', start_date=startDate, end_date=endDate)

da_te = str(OrderLog1["date"].iloc[0]) 
da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
db1 = db[db["date"] == da_te]
sl = (db1["ID"].str[2:].astype(int) + 2000000).unique()
del db1
try:
    assert(len(set(sl) - set(OrderLog1["skey"].unique())) == 0)
except:
    print(set(sl) - set(OrderLog1["skey"].unique()))

{2300835, 2002990}


In [32]:
startDate = 20181228
endDate = 20181228
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"
db1 = DB("192.168.10.223", database_name, user, password)
TradeLogSZ1 = db1.read('md_trade', start_date=startDate, end_date=endDate)
TradeLogSZ1 = TradeLogSZ1[TradeLogSZ1['skey'] > 2000000]
TradeLogSZ1['trade_money'] = (TradeLogSZ1['trade_price'] * TradeLogSZ1['trade_qty']).round(2)
display(TradeLogSZ1['trade_money'].astype(str).apply(lambda x: len(x.split('.')[1])).unique())

da_te = str(TradeLogSZ1["date"].iloc[0]) 
da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
db1 = db[db["date"] == da_te]
sl = (db1["ID"].str[2:].astype(int) + 2000000).unique()
db1["max_volume"] = db1.groupby("ID")["d_volume"].transform("max")
db1["max_amount"] = db1.groupby("ID")["d_amount"].transform("max")
t1 = db1.groupby("ID")["max_volume", "max_amount"].first().reset_index()
del db1
t1["skey"] = t1["ID"].str[2:].astype(int) + 2000000
trade1 = TradeLogSZ1[TradeLogSZ1["trade_type"] == 1].groupby("skey")["trade_qty"].sum().reset_index()
trade1.columns=["skey", "cum_volume"]
trade2 = TradeLogSZ1[TradeLogSZ1["trade_type"] == 1].groupby("skey")["trade_money"].sum().reset_index()
trade2.columns=["skey", "cum_amount"]
t2 = pd.merge(trade1, trade2, on="skey")
re = pd.merge(t1, t2, on="skey", how="outer")
try:
    assert(t1.shape[0] == t2.shape[0])
    assert(re[re["cum_volume"] != re["max_volume"]].shape[0] == 0)
    assert(re[re["cum_amount"].round(2) != re["max_amount"]].shape[0] == 0)
except:
    display(set(t1["skey"]) - set(t2["skey"]))
    display(re[re["cum_volume"] != re["max_volume"]])
    display(re[re["cum_amount"].round(2) != re["max_amount"]])

array([1, 2], dtype=int64)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


{2001914}

Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
456,SZ001914,1709301.0,12813473.94,2001914,,


Unnamed: 0,ID,max_volume,max_amount,skey,cum_volume,cum_amount
456,SZ001914,1709301.0,12813473.94,2001914,,


In [25]:
startDate = 20200101
endDate = 20201231
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
data = db.read('md_index', start_date=startDate, end_date=endDate, symbol=[1000300])
display(len(data['date'].unique()))
data['date'].unique()

97

array([20200102, 20200103, 20200106, 20200107, 20200108, 20200109,
       20200110, 20200113, 20200114, 20200115, 20200116, 20200117,
       20200120, 20200121, 20200122, 20200123, 20200203, 20200204,
       20200205, 20200206, 20200207, 20200210, 20200211, 20200212,
       20200213, 20200214, 20200217, 20200218, 20200219, 20200220,
       20200221, 20200224, 20200225, 20200226, 20200227, 20200228,
       20200302, 20200303, 20200304, 20200305, 20200306, 20200309,
       20200310, 20200311, 20200312, 20200313, 20200316, 20200317,
       20200318, 20200319, 20200320, 20200323, 20200324, 20200325,
       20200326, 20200327, 20200330, 20200331, 20200401, 20200402,
       20200403, 20200407, 20200408, 20200409, 20200410, 20200413,
       20200414, 20200415, 20200416, 20200417, 20200420, 20200421,
       20200422, 20200423, 20200424, 20200427, 20200428, 20200429,
       20200430, 20200506, 20200507, 20200508, 20200511, 20200512,
       20200513, 20200514, 20200515, 20200518, 20200519, 20200

In [26]:
kk = pd.read_csv(r'A:\day_stock\SH600000.csv.gz', compression='gzip')
kk['date'] = kk['date'].apply(lambda x: int(x.split('-')[0] + x.split('-')[1] + x.split('-')[2]))
display(len(kk[(kk['date'] >= 20200102) & (kk['date'] <= 20200529)]['date'].unique()))
kk[(kk['date'] >= 20200102) & (kk['date'] <= 20200529)]['date'].unique()

97

array([20200102, 20200103, 20200106, 20200107, 20200108, 20200109,
       20200110, 20200113, 20200114, 20200115, 20200116, 20200117,
       20200120, 20200121, 20200122, 20200123, 20200203, 20200204,
       20200205, 20200206, 20200207, 20200210, 20200211, 20200212,
       20200213, 20200214, 20200217, 20200218, 20200219, 20200220,
       20200221, 20200224, 20200225, 20200226, 20200227, 20200228,
       20200302, 20200303, 20200304, 20200305, 20200306, 20200309,
       20200310, 20200311, 20200312, 20200313, 20200316, 20200317,
       20200318, 20200319, 20200320, 20200323, 20200324, 20200325,
       20200326, 20200327, 20200330, 20200331, 20200401, 20200402,
       20200403, 20200407, 20200408, 20200409, 20200410, 20200413,
       20200414, 20200415, 20200416, 20200417, 20200420, 20200421,
       20200422, 20200423, 20200424, 20200427, 20200428, 20200429,
       20200430, 20200506, 20200507, 20200508, 20200511, 20200512,
       20200513, 20200514, 20200515, 20200518, 20200519, 20200

In [57]:
import numpy as np
display(len(np.sort(data['date'].unique())))
np.sort(data['date'].unique())

646

array([20170901, 20170904, 20170905, 20170906, 20170907, 20170908,
       20170911, 20170912, 20170913, 20170914, 20170915, 20170918,
       20170919, 20170920, 20170921, 20170922, 20170925, 20170926,
       20170927, 20170928, 20170929, 20171009, 20171010, 20171011,
       20171012, 20171013, 20171016, 20171017, 20171018, 20171019,
       20171020, 20171023, 20171024, 20171025, 20171026, 20171027,
       20171030, 20171031, 20171101, 20171102, 20171103, 20171106,
       20171107, 20171108, 20171109, 20171110, 20171113, 20171114,
       20171115, 20171116, 20171117, 20171120, 20171121, 20171122,
       20171123, 20171124, 20171127, 20171128, 20171129, 20171130,
       20171201, 20171204, 20171205, 20171206, 20171207, 20171208,
       20171211, 20171212, 20171213, 20171214, 20171215, 20171218,
       20171219, 20171220, 20171221, 20171222, 20171225, 20171226,
       20171227, 20171228, 20171229, 20180102, 20180103, 20180104,
       20180105, 20180108, 20180109, 20180110, 20180111, 20180

In [63]:
data['order_type'].unique()

array([2, 1, 3], dtype=int64)

In [61]:
data['order_price'].astype(str).apply(lambda x: len(str(x.split('.')[1]))).max()

2

In [58]:
data['datetime'].iloc[4]

Timestamp('2020-01-02 09:15:00.020000')

In [55]:
data['time'][data['time']%100000 != 0].iloc[0]

91500010000

In [8]:
1e-6 * 1000000

1.0

In [20]:
data = pd.concat([data, data])
data = pd.concat([data, data[data['skey'] < 2000050]])

In [21]:
import sys
sys.getsizeof(data)/(1024**3)

4.403027199208736

In [22]:
data.memory_usage().sum()/ (1024**3)

4.403027176856995

In [23]:
data.to_pickle(r'A:\temp\database speed\data.pkl')

In [13]:
startDate = 20200102
endDate = 20200102

db = DB("mongodb://user_rw:faa96dfc@192.168.10.223")
db.delete('trade', start_date=startDate, end_date=endDate)

In [14]:
startTm = datetime.datetime.now()
db = DB("mongodb://user_rw:faa96dfc@192.168.10.223")
db.write('trade', data)
print(datetime.datetime.now() - startTm)

0:01:29.324015


In [15]:
startDate = 20200102
endDate = 20200102
db = DB("mongodb://user_rw:faa96dfc@192.168.10.223")
startTm = datetime.datetime.now()
data1 = db.read('trade', start_date=startDate, end_date=endDate)
print(datetime.datetime.now() - startTm)
sys.getsizeof(data1)/(1024**3)

0:00:34.429008


3.8526489213109016

In [24]:
kk = pd.read_csv(r'A:\day_stock\SZ000001.csv.gz', compression='gzip')
display(len(kk[(kk['date'] >= '2020-01-01') & (kk['date'] <= '2020-05-29')]['date'].unique()))
kk[(kk['date'] >= '2020-01-01') & (kk['date'] <= '2020-05-29')]['date'].unique()

97

array(['2020-01-02', '2020-01-03', '2020-01-06', '2020-01-07',
       '2020-01-08', '2020-01-09', '2020-01-10', '2020-01-13',
       '2020-01-14', '2020-01-15', '2020-01-16', '2020-01-17',
       '2020-01-20', '2020-01-21', '2020-01-22', '2020-01-23',
       '2020-02-03', '2020-02-04', '2020-02-05', '2020-02-06',
       '2020-02-07', '2020-02-10', '2020-02-11', '2020-02-12',
       '2020-02-13', '2020-02-14', '2020-02-17', '2020-02-18',
       '2020-02-19', '2020-02-20', '2020-02-21', '2020-02-24',
       '2020-02-25', '2020-02-26', '2020-02-27', '2020-02-28',
       '2020-03-02', '2020-03-03', '2020-03-04', '2020-03-05',
       '2020-03-06', '2020-03-09', '2020-03-10', '2020-03-11',
       '2020-03-12', '2020-03-13', '2020-03-16', '2020-03-17',
       '2020-03-18', '2020-03-19', '2020-03-20', '2020-03-23',
       '2020-03-24', '2020-03-25', '2020-03-26', '2020-03-27',
       '2020-03-30', '2020-03-31', '2020-04-01', '2020-04-02',
       '2020-04-03', '2020-04-07', '2020-04-08', '2020-

In [54]:
# database_name = 'com_md_eq_cn'
# user = "zhenyuy"
# password = "bnONBrzSMGoE"

# db = DB("192.168.10.223", database_name, user, password)
# db.write('md_trade', mdData)

In [28]:
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
db.write('md_snapshot_l2', SH)

In [17]:
import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)

# startTm = datetime.datetime.now()
# readPath = 'A:\\day_stock\\***'
# dataPathLs = np.array(glob.glob(readPath))
# dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SH' for i in dataPathLs])]]
# db = pd.DataFrame()
# for p in dataPathLs:
#     dayData = pd.read_csv(p, compression='gzip')
#     db = pd.concat([db, dayData])
# print(datetime.datetime.now() - startTm)

startDate = 20170714
endDate = 20170714
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
SZ = db.read('md_snapshot_l2', start_date=startDate, end_date=endDate)
SZ = SZ[SZ['skey'] > 2000000]
# date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
# date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
# date["group"] = date["time"]//10000
# SZ["group"] = SZ["time"]//10000000
# gl = date[((date["time"] >= 93000000) & (date["time"] <= 113000000))|((date["time"] >= 130000000) & (date["time"] < 150000000))]["group"].unique()
# l = set(gl) - set(SZ["group"].unique())
# SZ["has_missing1"] = 0 
# if len(l) != 0:
#     print("massive missing")
#     print(np.sort(list(l)))
#     SZ["order"] = SZ.groupby(["skey", "time"]).cumcount()
#     for i in l:
#         SZ["t"] = SZ[SZ["group"] > i].groupby("skey")["time"].transform("min")
#         SZ["has_missing1"] = np.where((SZ["time"] == SZ["t"]) & (SZ["order"] == 0), 1, SZ["has_missing1"])
#     SZ.drop(["order", "t", "group"], axis=1, inplace=True)   
# else:
#     print("no massive missing")
#     SZ.drop(["group"], axis=1, inplace=True)
# SH["has_missing"] = SH["has_missing"].astype('int32')
# SH = SH[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
#                         "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
#                         'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
#                          'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
#                          'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
#                          'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
#     'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
#     'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
#     'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
#     'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
#     'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
#     'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
#     'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
#     'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
#     'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
#     "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
#     "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"]]

# display(SH["date"].iloc[0])
# print("SH finished")

In [39]:
startDate = 20200102
endDate = 20200102
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
SH = db.read('md_index', start_date=startDate, end_date=endDate)
SH

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,cum_volume,cum_amount,prev_close,open,high,low,close
0,1000016,20200102,84509000000,1577925909000000,2020-01-02 08:45:09.000,1,0,0.000000e+00,0.0000,0.0000,0.0000,0.0000,0.0000
1,1000016,20200102,84509000000,1577925909000000,2020-01-02 08:45:09.000,2,0,0.000000e+00,3063.2190,0.0000,0.0000,0.0000,3063.2190
2,1000016,20200102,84609000000,1577925969000000,2020-01-02 08:46:09.000,3,0,0.000000e+00,3063.2190,0.0000,0.0000,0.0000,3063.2190
3,1000016,20200102,84609000000,1577925969000000,2020-01-02 08:46:09.000,4,0,0.000000e+00,3063.2190,0.0000,0.0000,0.0000,3063.2190
4,1000016,20200102,84709000000,1577926029000000,2020-01-02 08:47:09.000,5,0,0.000000e+00,3063.2190,0.0000,0.0000,0.0000,3063.2190
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14796,1000905,20200102,151209540000,1577949129540000,2020-01-02 15:12:09.540,3699,145894405,1.368138e+11,5267.6622,5306.6677,5374.9082,5288.1675,5366.1375
14797,1000905,20200102,151209540000,1577949129540000,2020-01-02 15:12:09.540,3700,145894405,1.368138e+11,5267.6622,5306.6677,5374.9082,5288.1675,5366.1375
14798,1000905,20200102,151209540000,1577949129540000,2020-01-02 15:12:09.540,3701,145894405,1.368138e+11,5267.6622,5306.6677,5374.9082,5288.1675,5366.1375
14799,1000905,20200102,151209540000,1577949129540000,2020-01-02 15:12:09.540,3702,145894405,1.368138e+11,5267.6622,5306.6677,5374.9082,5288.1675,5366.1375


In [42]:
SH[(SH['open'] != 0) & (SH['time'] > 93000000000)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,cum_volume,cum_amount,prev_close,open,high,low,close
200,1000016,20200102,93000760000,1577928600760000,2020-01-02 09:30:00.760,201,1041090,2.578057e+09,3063.2190,3073.9313,3075.5886,3073.9313,3073.9313
201,1000016,20200102,93005550000,1577928605550000,2020-01-02 09:30:05.550,202,1431161,3.463197e+09,3063.2190,3073.9313,3075.9588,3073.9313,3075.9247
202,1000016,20200102,93010740000,1577928610740000,2020-01-02 09:30:10.740,203,1577049,3.762053e+09,3063.2190,3073.9313,3077.3654,3073.9313,3075.9350
203,1000016,20200102,93015910000,1577928615910000,2020-01-02 09:30:15.910,204,1691872,4.010343e+09,3063.2190,3073.9313,3077.3702,3073.9313,3077.2202
204,1000016,20200102,93015910000,1577928615910000,2020-01-02 09:30:15.910,205,1691872,4.010343e+09,3063.2190,3073.9313,3077.3702,3073.9313,3077.2202
...,...,...,...,...,...,...,...,...,...,...,...,...,...
14796,1000905,20200102,151209540000,1577949129540000,2020-01-02 15:12:09.540,3699,145894405,1.368138e+11,5267.6622,5306.6677,5374.9082,5288.1675,5366.1375
14797,1000905,20200102,151209540000,1577949129540000,2020-01-02 15:12:09.540,3700,145894405,1.368138e+11,5267.6622,5306.6677,5374.9082,5288.1675,5366.1375
14798,1000905,20200102,151209540000,1577949129540000,2020-01-02 15:12:09.540,3701,145894405,1.368138e+11,5267.6622,5306.6677,5374.9082,5288.1675,5366.1375
14799,1000905,20200102,151209540000,1577949129540000,2020-01-02 15:12:09.540,3702,145894405,1.368138e+11,5267.6622,5306.6677,5374.9082,5288.1675,5366.1375


In [9]:
SH[SH['has_missing'] == 1]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount
4750956,2000001,20170623,110121000000,1498186881000000,2017-06-23 11:01:21,1865,1,7640,23953504,2.209757e+08,9.25,9.23,9.26,9.19,9.24,9.15,9.16,9.17,9.18,9.19,9.20,9.21,9.22,9.23,9.24,9.25,9.26,9.27,9.28,9.29,9.30,9.31,9.32,9.33,9.34,813100,581800,485700,891000,1482484,1596900,552641,574800,229786,348083,85968,279408,407866,308260,330968,545520,97260,148480,266200,466740,202,167,130,265,187,257,140,62,31,47,6,21,55,71,91,123,33,51,44,58,5883,5200,200,3800,1200,2000,20000,100,1100,3000,2000,100,2100,23800,900,1100,400,2000,1000,1000,10000,100000,8700,100,5000,1000,100,700,100,200,100000,600,6100,1200,400,3400,1000,1800,1600,600,2000,800,5700,2500,1300,10000,6300,0,0,0,47768,10000,3000,4400,4300,16500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11284194,16949550,9.14,9.59,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4755850,2000002,20170623,110121000000,1498186881000000,2017-06-23 11:01:21,1871,1,52906,64218955,1.462345e+09,21.88,21.84,23.65,21.80,23.63,23.54,23.55,23.56,23.57,23.58,23.59,23.60,23.61,23.62,23.63,23.64,23.65,23.66,23.67,23.68,23.69,23.70,23.71,23.72,23.73,25300,27700,11600,3000,18600,60700,47365,2600,18700,23300,900,75600,153700,50600,81800,99600,93900,5300,15800,85100,21,14,8,6,14,18,40,3,14,2,2,57,46,12,69,32,87,7,5,5,22500,800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,200,700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6059097,9045396,22.28,23.99,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4760496,2000004,20170623,110121000000,1498186881000000,2017-06-23 11:01:21,1595,1,2625,1505710,3.847116e+07,26.05,25.87,25.96,25.31,25.46,25.36,25.37,25.38,25.39,25.40,25.41,25.42,25.43,25.44,25.45,25.46,25.49,25.50,25.51,25.52,25.55,25.56,25.58,25.59,25.60,5300,1700,2000,3300,18500,2600,3700,34000,7700,2800,14707,400,1400,1000,200,2200,4900,2000,200,2400,3,4,6,4,39,6,3,6,9,9,2,1,2,1,1,3,3,1,1,3,600,200,100,100,300,100,300,100,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12007,2700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,375200,344730,24.82,26.89,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4764402,2000005,20170623,110121000000,1498186881000000,2017-06-23 11:01:21,1400,1,1509,3027100,1.596420e+07,5.33,5.27,5.33,5.24,5.30,5.20,5.21,5.22,5.23,5.24,5.25,5.26,5.27,5.28,5.29,5.30,5.31,5.32,5.33,5.34,5.35,5.36,5.37,5.38,5.39,132800,42100,50200,83600,244300,212600,111200,65400,98800,10200,35600,28300,36800,34600,28500,39930,14600,11700,49300,8300,91,26,24,30,46,67,40,20,17,2,5,8,11,11,13,19,4,4,13,9,8200,2000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7400,16100,6700,400,5000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1488500,1899034,5.18,5.56,0,0,0,0,0,0,0,0,0.0,0,0,0.0
4768125,2000006,20170623,110121000000,1498186881000000,2017-06-23 11:01:21,1648,1,2383,4976891,4.243135e+07,8.50,8.50,8.58,8.45,8.56,8.46,8.47,8.48,8.49,8.50,8.51,8.52,8.53,8.54,8.55,8.56,8.57,8.58,8.59,8.60,8.61,8.62,8.63,8.64,8.65,38000,26300,44000,13900,156300,70500,26400,51900,31200,39800,400,27201,53000,49600,86400,36500,20200,28000,59400,94200,22,11,24,9,49,20,11,9,7,14,2,9,11,23,24,8,7,6,6,31,600,3100,1000,10000,1000,100,2100,300,4900,100,100,500,15000,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,300,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1818500,3219081,8.30,8.96,0,0,0,0,0,0,0,0,0.0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10996071,2300663,20170623,110118000000,1498186878000000,2017-06-23 11:01:18,1885,1,46776,15944602,4.424766e+08,27.17,28.01,28.93,26.13,26.88,26.76,26.77,26.78,26.79,26.80,26.81,26.82,26.83,26.85,26.86,26.87,26.88,26.90,26.92,26.94,26.95,26.96,26.97,26.98,26.99,1300,240,600,1100,4046,1140,4207,5247,1000,200,500,780,2500,400,1000,2200,500,380,1500,2994,1,2,4,2,9,3,3,4,3,1,1,2,5,2,2,5,1,2,3,9,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,965515,2275366,25.25,28.87,0,0,0,0,0,0,0,0,0.0,0,0,0.0
10999773,2300665,20170623,110124000000,1498186884000000,2017-06-23 11:01:24,658,1,405,156200,4.889060e+06,28.45,31.30,31.30,31.30,31.30,30.18,30.30,30.45,30.77,30.81,30.86,31.11,31.13,31.29,31.30,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,800,800,800,800,800,800,800,1800,7800,4775100,0,0,0,0,0,0,0,0,0,0,2,2,2,2,1,2,2,5,8,398,0,0,0,0,0,0,0,0,0,0,910200,995600,220300,44000,190000,356400,58300,50000,377700,108800,185500,94900,118100,185300,34700,2000,97300,3300,5000,11100,22200,33000,1000,50000,300,300,2100,600,900,100,1700,600,400,600,400,200,700,13000,300,1000,200,300,400,100,600,2900,30000,6000,10000,4700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4798800,0,31.29,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
11001552,2300666,20170623,110127000000,1498186887000000,2017-06-23 11:01:27,1152,1,79,36828,4.364118e+05,10.77,11.85,11.85,11.85,11.85,11.75,11.76,11.78,11.79,11.80,11.81,11.82,11.83,11.84,11.85,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,500,1600,8600,3700,4400,5000,1200,2100,129600,18341472,0,0,0,0,0,0,0,0,0,0,1,2,3,3,9,3,2,5,38,2337,0,0,0,0,0,0,0,0,0,0,89672,992400,46200,111100,13300,997400,112200,189800,78100,344300,33300,1300,180400,500,45100,26000,129300,51800,129700,500,129900,99900,82500,171300,129500,1000000,33600,129200,19100,126800,122200,99800,3000,100,9700,2400,3300,33700,49700,800,6000,12000,11100,7000,42100,366900,5000,46400,1000,500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18875672,0,11.83,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0
11003371,2300667,20170623,110121000000,1498186881000000,2017-06-23 11:01:21,523,1,2,600,1.359600e+04,20.60,22.66,22.66,22.66,22.66,22.00,22.06,22.07,22.30,22.50,22.60,22.61,22.64,22.65,22.66,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,6100,7300,500,500,1000,4000,2000,500,1700,2033300,0,0,0,0,0,0,0,0,0,0,4,4,2,1,1,4,1,1,5,546,0,0,0,0,0,0,0,0,0,0,26700,59900,42800,2400,40000,43900,12700,3100,33300,11300,900,4900,14400,5300,23400,9200,8900,130000,3700,184300,49700,3000,10000,49800,11100,20400,1000,5000,10200,1400,1000,4400,5600,1300,23700,1600,4300,800,29100,2900,500,500,2100,1100,1000,1800,10000,21300,3600,22900,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2152100,0,22.57,0.00,0,0,0,0,0,0,0,0,0.0,0,0,0.0


In [41]:
SH['date'].iloc[0]

20160315

In [None]:
SH['has_missing'].unique()

In [56]:
pd.set_option('max_rows', 200)
SH.dtypes

skey                                 int32
date                                 int32
time                                 int64
clockAtArrival                       int64
datetime                    datetime64[ns]
ordering                             int32
has_missing                          int32
cum_trades_cnt                       int32
cum_volume                           int64
cum_amount                         float64
prev_close                         float64
open                               float64
high                               float64
low                                float64
close                              float64
bid10p                             float64
bid9p                              float64
bid8p                              float64
bid7p                              float64
bid6p                              float64
bid5p                              float64
bid4p                              float64
bid3p                              float64
bid2p      

In [33]:
SH[(SH['group'] > 13134) & (SH['skey'] == 1600000)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount,has_missing1,group,order
2988,1600000,20160104,131416000000,1451884456000000,2016-01-04 13:14:16,2989,0,20740,37500614,6.699145e+08,18.27,18.28,18.28,17.55,17.90,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000,0.000,0,0,0,0,2633,5588,0,0,0.000000e+00,0,0,0.000000e+00,0,13141,0
2989,1600000,20160104,131516000000,1451884516000000,2016-01-04 13:15:16,2990,0,20740,37500614,6.699145e+08,18.27,18.28,18.28,17.55,17.90,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000,0.000,0,0,0,0,2633,5588,0,0,0.000000e+00,0,0,0.000000e+00,0,13151,0
2990,1600000,20160104,131616000000,1451884576000000,2016-01-04 13:16:16,2991,0,20740,37500614,6.699145e+08,18.27,18.28,18.28,17.55,17.90,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000,0.000,0,0,0,0,2633,5588,0,0,0.000000e+00,0,0,0.000000e+00,0,13161,0
2991,1600000,20160104,131716000000,1451884636000000,2016-01-04 13:17:16,2992,0,20740,37500614,6.699145e+08,18.27,18.28,18.28,17.55,17.90,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000,0.000,0,0,0,0,2633,5588,0,0,0.000000e+00,0,0,0.000000e+00,0,13171,0
2992,1600000,20160104,131816000000,1451884696000000,2016-01-04 13:18:16,2993,0,20740,37500614,6.699145e+08,18.27,18.28,18.28,17.55,17.90,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.000,0.000,0,0,0,0,2633,5588,0,0,0.000000e+00,0,0,0.000000e+00,0,13181,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3248,1600000,20160104,145959000000,1451890799000000,2016-01-04 14:59:59,3249,0,22358,42240610,7.544258e+08,18.27,18.28,18.28,17.55,17.73,17.62,17.63,17.64,17.65,17.66,17.67,17.68,17.69,17.7,17.71,17.73,17.75,17.78,17.8,17.81,17.85,17.86,17.87,17.88,17.89,21900,18100,2700,91900,49700,20700,78300,6600,24937,71500,27200,223506,25000,2000,43000,500,2000,2000,10800,15900,17,12,5,55,47,13,34,9,16,8,6,4,3,1,3,1,1,3,4,12,61900,2000,500,1100,1000,2000,2300,700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,700,13000,6000,4900,1000,1600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3743537,8020664,17.155,19.016,1739,2720,115,225,3116,5588,3497,11035453,1.934477e+08,2863,17484861,3.163791e+08,0,14595,28
3249,1600000,20160104,145959000000,1451890799000000,2016-01-04 14:59:59,3250,0,22358,42240610,7.544258e+08,18.27,18.28,18.28,17.55,17.73,17.62,17.63,17.64,17.65,17.66,17.67,17.68,17.69,17.7,17.71,17.73,17.75,17.78,17.8,17.81,17.85,17.86,17.87,17.88,17.89,21900,18100,2700,91900,49700,20700,78300,6600,24937,71500,27200,223506,25000,2000,43000,500,2000,2000,10800,15900,17,12,5,55,47,13,34,9,16,8,6,4,3,1,3,1,1,3,4,12,61900,2000,500,1100,1000,2000,2300,700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,700,13000,6000,4900,1000,1600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3743537,8020664,17.155,19.016,1739,2720,115,225,3116,5588,3497,11035453,1.934477e+08,2863,17484861,3.163791e+08,0,14595,29
3250,1600000,20160104,145959000000,1451890799000000,2016-01-04 14:59:59,3251,0,22358,42240610,7.544258e+08,18.27,18.28,18.28,17.55,17.73,17.62,17.63,17.64,17.65,17.66,17.67,17.68,17.69,17.7,17.71,17.73,17.75,17.78,17.8,17.81,17.85,17.86,17.87,17.88,17.89,21900,18100,2700,91900,49700,20700,78300,6600,24937,71500,27200,223506,25000,2000,43000,500,2000,2000,10800,15900,17,12,5,55,47,13,34,9,16,8,6,4,3,1,3,1,1,3,4,12,61900,2000,500,1100,1000,2000,2300,700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,700,13000,6000,4900,1000,1600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3743537,8020664,17.155,19.016,1739,2720,115,225,3116,5588,3497,11035453,1.934477e+08,2863,17484861,3.163791e+08,0,14595,30
3251,1600000,20160104,145959000000,1451890799000000,2016-01-04 14:59:59,3252,0,22358,42240610,7.544258e+08,18.27,18.28,18.28,17.55,17.73,17.62,17.63,17.64,17.65,17.66,17.67,17.68,17.69,17.7,17.71,17.73,17.75,17.78,17.8,17.81,17.85,17.86,17.87,17.88,17.89,21900,18100,2700,91900,49700,20700,78300,6600,24937,71500,27200,223506,25000,2000,43000,500,2000,2000,10800,15900,17,12,5,55,47,13,34,9,16,8,6,4,3,1,3,1,1,3,4,12,61900,2000,500,1100,1000,2000,2300,700,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,700,13000,6000,4900,1000,1600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3743537,8020664,17.155,19.016,1739,2720,115,225,3116,5588,3497,11035453,1.934477e+08,2863,17484861,3.163791e+08,0,14595,31


In [23]:
SH[(SH['has_missing'] == 1) & (SH['skey'] == 1600000)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount,has_missing1,group
3135,1600000,20160104,133516000000,1451885716000000,2016-01-04 13:35:16,3136,1,22358,42240610,754425800.0,18.27,18.28,18.28,17.55,17.73,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,3116,5588,0,0,0.0,0,0,0.0,1,13351


In [27]:
SH[(SH['skey'] == 1600000) & (SH["time"] >= 131317000000)].head(3)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount,has_missing1,group
2987,1600000,20160104,131317000000,1451884397000000,2016-01-04 13:13:17,2988,0,20740,37500614,669914508.4,18.27,18.28,18.28,17.55,17.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,2633,5588,0,0,0.0,0,0,0.0,0,13131
2988,1600000,20160104,131416000000,1451884456000000,2016-01-04 13:14:16,2989,0,20740,37500614,669914508.4,18.27,18.28,18.28,17.55,17.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,2633,5588,0,0,0.0,0,0,0.0,0,13141
2989,1600000,20160104,131516000000,1451884516000000,2016-01-04 13:15:16,2990,0,20740,37500614,669914508.4,18.27,18.28,18.28,17.55,17.9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,0,2633,5588,0,0,0.0,0,0,0.0,0,13151


In [19]:
df1 = pd.read_csv(r'A:\rawData\logs_20200102_zs_92_01_day_data\mdLog_SZ_20200102_0836.csv')
df2 = pd.read_csv(r'A:\rawData\logs_20200102_zs_92_01_day_data\mdLog_SH_20200102_0836.csv')

### df2[df2['StockID'] < 600000]['source'].unique()

In [25]:
df2[df2['StockID'] < 600000]['StockID'].unique()

array([ 16, 300, 852, 905], dtype=int64)

In [37]:
set(df1['ID'].unique()) - set(db[db['date'] == '2020-01-02']['ID'].unique())

{'SZ000029', 'SZ002115', 'SZ002450', 'SZ002552', 'SZ300269', 'SZ300391'}

In [38]:
set(db[db['date'] == '2020-01-02']['ID'].unique()) - set(df1['ID'].unique())

{'SZ002972', 'SZ300811'}

In [28]:
startTm = datetime.datetime.now()
readPath = r'\\192.168.10.30\Kevin_zhenyu\day_stock\***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

  after removing the cwd from sys.path.


0:03:47.870446


In [51]:
for i in range(3, 4):
    print(i)

3


In [59]:
x['kk'] = range(1,4)
x['pp'] = [2, np.nan, 4]
x.sort_values(by='pp')

Unnamed: 0,0,kk,pp
0,1,1,2.0
2,3,3,4.0
1,2,2,
