In [135]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()

def dailyDB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    url = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    client = pymongo.MongoClient(url, maxPoolSize=None)
    db = client[db_name]
    return db

def read_stock_daily(db, name, start_date=None, end_date=None, skey=None, index_name=None, interval=None, col=None, return_sdi=True):
    collection = db[name]
    # Build projection
    prj = {'_id': 0}
    if col is not None:
        if return_sdi:
            col = ['skey', 'date'] + col
        for col_name in col:
            prj[col_name] = 1

    # Build query
    query = {}
    if skey is not None:
        query['skey'] = {'$in': skey}
    if index_name is not None:
        query['index_name'] = {'$in': index_name}
    if start_date is not None:
        if end_date is not None:
            query['date'] = {'$gte': start_date, '$lte': end_date}
        else:
            query['date'] = {'$gte': start_date}
    elif end_date is not None:
        query['date'] = {'$lte': end_date}

    # Load data
    cur = collection.find(query, prj)
    df = pd.DataFrame.from_records(cur)
    if df.empty:
        df = pd.DataFrame()
    else:
        df = df.sort_values(by=['date', 'skey'])
    return df   

def read_memb_daily(db, name, start_date=None, end_date=None, skey=None, index_id=None, interval=None, col=None, return_sdi=True):
    collection = db[name]
    # Build projection
    prj = {'_id': 0}
    if col is not None:
        if return_sdi:
            col = ['skey', 'date', 'index_id'] + col
        for col_name in col:
            prj[col_name] = 1

    # Build query
    query = {}
    if skey is not None:
        query['skey'] = {'$in': skey}
    if index_id is not None:
        query['index_id'] = {'$in': index_id}
    if interval is not None:
        query['interval'] = {'$in': interval}
    if start_date is not None:
        if end_date is not None:
            query['date'] = {'$gte': start_date, '$lte': end_date}
        else:
            query['date'] = {'$gte': start_date}
    elif end_date is not None:
        query['date'] = {'$lte': end_date}

    # Load data
    cur = collection.find(query, prj)
    df = pd.DataFrame.from_records(cur)
    if df.empty:
        df = pd.DataFrame()
    else:
        df = df.sort_values(by=['date', 'index_id', 'skey'])
    return df 



import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)

year = "2020"
startDate = '20200106'
endDate = '20200214'
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

startTm = datetime.datetime.now()
db1 = DB("192.168.10.178", database_name, user, password)
db2 = dailyDB("192.168.10.178", database_name, user, password)
save = {}
save['date'] = []
save['secid'] = []
mdOrderLog = db1.read('md_order', start_date=startDate, end_date=endDate, symbol=[2000001])
datelist = mdOrderLog['date'].unique()
ss = pd.read_csv('/mnt/ShareWithServer/result/shangshi.csv')
ss['skey'] = np.where(ss['证券代码'].str[-2:] == 'SZ', ss['证券代码'].str[:6].astype(int) + 2000000, ss['证券代码'].str[:6].astype(int) + 1000000)
ss['date'] = (ss['上市日期'].str[:4] + ss['上市日期'].str[5:7] + ss['上市日期'].str[8:10]).astype(int)
print(datetime.datetime.now() - startTm)

startTm = datetime.datetime.now()
for d in datelist:
    print(d)
    sl1 = read_memb_daily(db2, 'index_memb', index_id=[1000852], start_date=20170901, end_date=20201203)['skey'].unique()
    sl1 = sl1[sl1 > 2000000]
    data1 = db1.read('md_snapshot_l2', start_date=str(d), end_date=str(d), symbol=list(sl1))
    sl1 = data1['skey'].unique()
    op = read_stock_daily(db2, 'mdbar1d_tr', start_date=int(d), end_date=int(d))
    for s in sl1:
        mbd = db1.read('md_snapshot_mbd', start_date=str(d), end_date=str(d), symbol=s)
        if mbd is None:
            if ss[ss['skey'] == s]['date'].iloc[0] == d:
                continue
            else:
                save['date'].append(d)
                save['secid'].append(s)
                print(s)
                continue
        try:
            assert(mbd.shape[1] == 83)
        except:
            print('mdb data column unupdated')
            print(s)
        op1 = op[op['skey'] == s]['open'].iloc[0]
        l2 = data1[data1['skey'] == s]
        assert(mbd[mbd['cum_volume'] > 0]['open'].iloc[0] == op1)
#         try:
#             assert(mbd['open'].iloc[0] == op1)
#         except:
#             print(s)
#             print(mbd['open'].iloc[0])
#             print(op1)
#             assert((len(str(op1).split('.')[1]) <= 2) & (op1 == l2[l2['open'] != 0]['open'].iloc[0]))
#             mbd['open'] = op1
#             db1.write('md_snapshot_mbd', mbd)
        cols = ['skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
               'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
               'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
               'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
               'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
               'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
               'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']
        mbd = mbd.drop_duplicates(cols, keep='first')
        mbd = mbd[cols+['ApplSeqNum']]
        if l2.shape[1] == 192:
            l2 = l2[l2.columns[:-1]]
        rl2 = pd.merge(l2, mbd, on=cols, how='left')
        try:
            assert(rl2[(rl2['ApplSeqNum'].isnull()) & (rl2['cum_volume'] > 0) & (rl2['time'] <= 145655000000)].shape[0] == 0)
        except:
            print(rl2[(rl2['ApplSeqNum'].isnull()) & (rl2['cum_volume'] > 0) & (rl2['time'] <= 145655000000)][['skey', 'date', 'cum_volume', 'close', 'bid1p', 'bid2p','bid1q', 'bid2q', 'ask1p', 'ask2p', 'ask1q', 'ask2q']])
        rl2.loc[rl2['ApplSeqNum'].isnull(), 'ApplSeqNum'] = -1
        rl2['ApplSeqNum'] = rl2['ApplSeqNum'].astype('int32') 
        assert(rl2.shape[0] == l2.shape[0])
        db1.write('md_snapshot_l2', rl2)
print(datetime.datetime.now() - startTm)

0:00:01.268798
20200114
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
4501  2002168  20200114    20654120   8.76   8.76   8.75  13200  10988   8.77   

      ask2p  ask1q  ask2q  
4501   8.78  10500  42700  
        skey      date  cum_volume  close  bid1p  bid2p  bid1q    bid2q  \
338  2002501  20200114    15896227   2.04   2.05   2.04   6200  1491398   

     ask1p  ask2p  ask1q  ask2q  
338    0.0    0.0      0      0  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
2282  2300204  20200114     4229268   13.0  13.01   13.0  52300  36200    0.0   

      ask2p  ask1q  ask2q  
2282    0.0      0      0  
20200115
20200116
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
3100  2002175  20200116    16610800   2.13    0.0    0.0      0      0   2.13   

      ask2p  ask1q   ask2q  
3100   2.14  12080  249300  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \

         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
541   2002557  20200203     1374500  32.31    0.0    0.0      0      0  32.31   
795   2002557  20200203     1936200  32.33    0.0    0.0      0      0  32.32   
857   2002557  20200203     2023100  32.31    0.0    0.0      0      0  32.31   
1231  2002557  20200203     2522700  32.31    0.0    0.0      0      0  32.31   

      ask2p  ask1q  ask2q  
541   32.33    100    400  
795   32.33    800    465  
857   32.32   1400    800  
1231  32.33   1000   1000  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
1598  2002616  20200203     6066400   7.58    0.0    0.0      0      0   7.57   
1693  2002616  20200203     6158200   7.58    0.0    0.0      0      0   7.57   
1695  2002616  20200203     6159000   7.58    0.0    0.0      0      0   7.57   
1744  2002616  20200203     6238800   7.57    0.0    0.0      0      0   7.57   
1903  2002616  20200203     6384000   7.57    0.0

        skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
672  2300676  20200203    16571228  87.45  87.45  87.44   1000  28141    0.0   

     ask2p  ask1q  ask2q  
672    0.0      0      0  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
1917  2300702  20200203     2011476  50.96    0.0    0.0      0      0  50.96   
2151  2300702  20200203     2149976  50.96    0.0    0.0      0      0  50.96   
2338  2300702  20200203     2369476  50.97    0.0    0.0      0      0  50.96   

      ask2p  ask1q  ask2q  
1917  50.97    100  13547  
2151  50.97    200   2600  
2338  50.97    200   1300  
        skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
198  2300747  20200203      561700  91.72    0.0    0.0      0      0  91.72   
221  2300747  20200203      594500  91.71    0.0    0.0      0      0  91.71   
247  2300747  20200203      628300  91.71    0.0    0.0      0      0  91.71   

     ask2p  ask1q  ask2q  


20200214
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
2594  2002188  20200214     6983900    4.5    0.0    0.0      0      0   4.49   

      ask2p   ask1q  ask2q  
2594    4.5  208200  53700  
2:07:42.847446


In [217]:
d = 20200221
read_stock_daily(db2, 'mdbar1d_tr', start_date=int(d), end_date=int(d), skey=[2000700])

Unnamed: 0,skey,date,time,name,trade_status,listed_days,open,high,low,close,closeL1,yclose,ztClose,dtClose,dayReturn,volume,amount,buy_volume,sell_volume,TORate,allZT,hasZT,isZT,allDT,hasDT,isDT,isST,VWAP,TWAP,marketValue,marketShares,totalShares,SW1_name,SW1_code,SW2_code,SW3_code,index_name,index_weight,exchange
0,2000700,20200221,180000000000,模塑科技,1,5397,13.86,14.89,13.58,14.1,14.54,14.54,15.99,13.09,-0.030261,193834363,2746598000.0,96645878.0,97188485.0,0.24199,0,0,0,0,0,0,0,14.169821,14.27265,11294140000.0,801002514.0,910973188.0,申万汽车,SW801880,SW801093,SW850921,CSIRest,0.026016,SZSE


In [241]:
d = 20200221
s = 2300223
data1 = db1.read('md_snapshot_l2', start_date=str(d), end_date=str(d), symbol=s)
data2 = db1.read('md_snapshot_mbd', start_date=str(d), end_date=str(d), symbol=s)
data1[(data1['cum_volume'] > 0) & (data1['time'] <= 145655000000)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount,ApplSeqNum
101,2300223,20200221,92503000000,1582248303000000,2020-02-21 09:25:03,102,0,226,114376,1.469732e+07,129.23,128.5,128.50,128.5,128.50,128.14,128.20,128.22,128.23,128.41,128.42,128.43,128.45,128.48,128.50,128.63,128.88,128.89,128.93,128.94,128.95,129.00,129.11,129.22,129.23,500,1500,100,700,16400,1000,200,100,600,324,300,300,300,100,800,400,500,400,1200,500,1,1,1,1,28,1,2,1,1,1,1,3,1,1,1,1,3,1,1,3,324,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,163024,171300,124.69,136.10,0,0,0,0,0,0,0,0,0.0,0,0,0.0,368324
102,2300223,20200221,92603000000,1582248363000000,2020-02-21 09:26:03,103,0,226,114376,1.469732e+07,129.23,128.5,128.50,128.5,128.50,128.14,128.20,128.22,128.23,128.41,128.42,128.43,128.45,128.48,128.50,128.63,128.88,128.89,128.93,128.94,128.95,129.00,129.11,129.22,129.23,500,1500,100,700,16400,1000,200,100,600,324,300,300,300,100,800,400,500,400,1200,500,1,1,1,1,28,1,2,1,1,1,1,3,1,1,1,1,3,1,1,3,324,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,163024,171300,124.69,136.10,0,0,0,0,0,0,0,0,0.0,0,0,0.0,368324
103,2300223,20200221,92703000000,1582248423000000,2020-02-21 09:27:03,104,0,226,114376,1.469732e+07,129.23,128.5,128.50,128.5,128.50,128.14,128.20,128.22,128.23,128.41,128.42,128.43,128.45,128.48,128.50,128.63,128.88,128.89,128.93,128.94,128.95,129.00,129.11,129.22,129.23,500,1500,100,700,16400,1000,200,100,600,324,300,300,300,100,800,400,500,400,1200,500,1,1,1,1,28,1,2,1,1,1,1,3,1,1,1,1,3,1,1,3,324,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,163024,171300,124.69,136.10,0,0,0,0,0,0,0,0,0.0,0,0,0.0,368324
104,2300223,20200221,92803000000,1582248483000000,2020-02-21 09:28:03,105,0,226,114376,1.469732e+07,129.23,128.5,128.50,128.5,128.50,128.14,128.20,128.22,128.23,128.41,128.42,128.43,128.45,128.48,128.50,128.63,128.88,128.89,128.93,128.94,128.95,129.00,129.11,129.22,129.23,500,1500,100,700,16400,1000,200,100,600,324,300,300,300,100,800,400,500,400,1200,500,1,1,1,1,28,1,2,1,1,1,1,3,1,1,1,1,3,1,1,3,324,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,163024,171300,124.69,136.10,0,0,0,0,0,0,0,0,0.0,0,0,0.0,368324
105,2300223,20200221,92903000000,1582248543000000,2020-02-21 09:29:03,106,0,226,114376,1.469732e+07,129.23,128.5,128.50,128.5,128.50,128.14,128.20,128.22,128.23,128.41,128.42,128.43,128.45,128.48,128.50,128.63,128.88,128.89,128.93,128.94,128.95,129.00,129.11,129.22,129.23,500,1500,100,700,16400,1000,200,100,600,324,300,300,300,100,800,400,500,400,1200,500,1,1,1,1,28,1,2,1,1,1,1,3,1,1,1,1,3,1,1,3,324,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,163024,171300,124.69,136.10,0,0,0,0,0,0,0,0,0.0,0,0,0.0,368324
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4928,2300223,20200221,145642000000,1582268202000000,2020-02-21 14:56:42,4929,0,33188,9990949,1.284319e+09,129.23,128.5,131.46,126.1,128.29,128.01,128.02,128.05,128.10,128.12,128.18,128.19,128.20,128.21,128.23,128.29,128.30,128.40,128.43,128.44,128.46,128.47,128.48,128.49,128.50,200,200,1100,500,1200,2800,900,3400,1500,100,4800,1100,1000,4700,300,1000,1500,1500,4300,5000,1,2,6,5,3,4,3,5,2,1,1,2,1,1,1,1,1,4,2,16,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,465778,731529,125.07,133.97,0,0,0,0,0,0,0,0,0.0,0,0,0.0,24774600
4929,2300223,20200221,145645000000,1582268205000000,2020-02-21 14:56:45,4930,0,33208,10001649,1.285691e+09,129.23,128.5,131.46,126.1,128.30,127.98,128.00,128.01,128.02,128.05,128.10,128.12,128.18,128.19,128.20,128.30,128.33,128.39,128.40,128.43,128.44,128.46,128.47,128.48,128.49,600,5700,200,200,1100,500,1200,2800,900,300,1000,400,100,1000,4700,300,1000,1500,1500,4000,2,12,1,2,6,5,3,4,3,1,3,1,1,1,1,1,1,1,4,1,300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,800,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,457878,726429,125.03,134.01,0,0,0,0,0,0,0,0,0.0,0,0,0.0,24781286
4930,2300223,20200221,145648000000,1582268208000000,2020-02-21 14:56:48,4931,0,33223,10004849,1.286102e+09,129.23,128.5,131.46,126.1,128.25,127.95,127.98,128.00,128.01,128.02,128.05,128.10,128.12,128.18,128.19,128.25,128.30,128.33,128.39,128.40,128.43,128.44,128.46,128.47,128.48,100,600,5600,200,200,1100,500,1200,2800,600,7200,100,500,800,1500,4700,300,1000,1500,1500,1,2,11,1,2,6,5,3,4,3,1,1,2,4,4,1,1,1,1,4,200,200,200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7200,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,457078,733829,125.03,133.95,0,0,0,0,0,0,0,0,0.0,0,0,0.0,24789094
4931,2300223,20200221,145651000000,1582268211000000,2020-02-21 14:56:51,4932,0,33225,10005749,1.286217e+09,129.23,128.5,131.46,126.1,128.21,127.98,128.00,128.01,128.02,128.05,128.10,128.12,128.18,128.19,128.20,128.21,128.23,128.25,128.30,128.33,128.39,128.40,128.41,128.42,128.43,600,5600,200,200,1100,500,1100,2800,600,500,3600,500,6500,1600,500,800,1600,200,100,4700,2,11,1,2,6,5,2,4,3,1,1,2,1,2,2,4,5,1,1,1,500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3600,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,457578,738929,125.03,133.91,0,0,0,0,0,0,0,0,0.0,0,0,0.0,24795128


In [230]:
data1[data1['cum_volume'] == 6517347]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,has_missing,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,high,low,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,bid1Top1q,bid1Top2q,bid1Top3q,bid1Top4q,bid1Top5q,bid1Top6q,bid1Top7q,bid1Top8q,bid1Top9q,bid1Top10q,bid1Top11q,bid1Top12q,bid1Top13q,bid1Top14q,bid1Top15q,bid1Top16q,bid1Top17q,bid1Top18q,bid1Top19q,bid1Top20q,bid1Top21q,bid1Top22q,bid1Top23q,bid1Top24q,bid1Top25q,bid1Top26q,bid1Top27q,bid1Top28q,bid1Top29q,bid1Top30q,bid1Top31q,bid1Top32q,bid1Top33q,bid1Top34q,bid1Top35q,bid1Top36q,bid1Top37q,bid1Top38q,bid1Top39q,bid1Top40q,bid1Top41q,bid1Top42q,bid1Top43q,bid1Top44q,bid1Top45q,bid1Top46q,bid1Top47q,bid1Top48q,bid1Top49q,bid1Top50q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,bid_trade_max_duration,ask_trade_max_duration,cum_canceled_buy_orders,cum_canceled_buy_volume,cum_canceled_buy_amount,cum_canceled_sell_orders,cum_canceled_sell_volume,cum_canceled_sell_amount,ApplSeqNum
3316,2300223,20200221,133606000000,1582263366000000,2020-02-21 13:36:06,3317,0,21435,6517347,841435200.0,129.23,128.5,131.46,127.1,128.0,127.81,127.82,127.85,127.86,127.88,127.89,127.9,127.95,127.98,127.99,128.0,128.2,128.23,128.26,128.27,128.29,128.3,128.31,128.48,128.5,3900,500,100,200,600,400,2400,100,3200,1740,2897,100,100,300,5700,200,11300,1000,2200,2600,6,1,1,1,2,3,6,1,9,8,1,1,1,1,1,1,2,1,2,1,440,100,500,100,300,100,100,100,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2897,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,442081,907676,125.02,135.23,0,0,0,0,0,0,0,0,0.0,0,0,0.0,-1


In [231]:
pd.set_option('max_rows', 200)
data2[(data2['cum_volume'] == 6517347)].drop_duplicates(keep='first').head(80)[cols+['ApplSeqNum']]

Unnamed: 0,skey,date,cum_volume,prev_close,open,close,cum_trades_cnt,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity,ApplSeqNum
45381,2300223,20200221,6517347,129.23,128.5,128.0,21435,127.81,127.82,127.85,127.86,127.88,127.89,127.9,127.95,127.98,127.99,128.0,128.23,128.26,128.27,128.29,128.3,128.31,128.48,128.5,128.66,3900,500,100,200,600,400,2400,100,3200,1740,2897,100,300,5700,200,11300,1000,2200,2600,100,6,1,1,1,2,3,6,1,9,8,1,1,1,1,1,2,1,2,1,1,441881,907576,17566894
45382,2300223,20200221,6517347,129.23,128.5,128.0,21435,127.81,127.82,127.85,127.86,127.88,127.89,127.9,127.95,127.98,127.99,128.0,128.2,128.23,128.26,128.27,128.29,128.3,128.31,128.48,128.5,3900,500,100,200,600,400,2400,100,3200,1740,2897,100,100,300,5700,200,11300,1000,2200,2600,6,1,1,1,2,3,6,1,9,8,1,1,1,1,1,1,2,1,2,1,441881,907676,17567351
45383,2300223,20200221,6517347,129.23,128.5,128.0,21435,127.81,127.82,127.85,127.86,127.88,127.89,127.9,127.95,127.98,127.99,128.0,128.2,128.23,128.26,128.27,128.29,128.3,128.31,128.48,128.5,3900,500,100,200,600,400,2400,100,3200,1740,2897,100,100,300,5700,200,11300,1000,2200,2600,6,1,1,1,2,3,6,1,9,8,1,1,1,1,1,1,2,1,2,1,442281,907676,17567430


In [237]:
order = db1.read('md_order', 20200120, 20200120, symbol=2002747)
trade = db1.read('md_trade', 20200120, 20200120, symbol=2002747)
kk = pd.concat([order, trade]).sort_values(by='ApplSeqNum').reset_index(drop=True)
kk[kk['time'] >= 145649340000].head(20)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
25074,2002747,20200120,145649340000,1579503409340000,2020-01-20 14:56:49.340,16324301,1.0,2.0,11.86,100.0,,,,,,
25075,2002747,20200120,145649880000,1579503409880000,2020-01-20 14:56:49.880,16325146,2.0,1.0,1.0,900.0,,,,,,
25076,2002747,20200120,145649880000,1579503409880000,2020-01-20 14:56:49.880,16325147,,,,,1.0,0.0,11.87,400.0,16322823.0,16325146.0
25077,2002747,20200120,145656780000,1579503416780000,2020-01-20 14:56:56.780,16335115,,,,,4.0,0.0,0.0,10000.0,16311173.0,0.0
25078,2002747,20200120,145701010000,1579503421010000,2020-01-20 14:57:01.010,16340173,1.0,2.0,11.87,108200.0,,,,,,
25079,2002747,20200120,145702430000,1579503422430000,2020-01-20 14:57:02.430,16341127,2.0,2.0,11.89,1000.0,,,,,,
25080,2002747,20200120,145705040000,1579503425040000,2020-01-20 14:57:05.040,16343256,2.0,2.0,11.52,100.0,,,,,,
25081,2002747,20200120,145705060000,1579503425060000,2020-01-20 14:57:05.060,16343291,1.0,2.0,12.22,100.0,,,,,,
25082,2002747,20200120,145706090000,1579503426090000,2020-01-20 14:57:06.090,16343976,1.0,2.0,11.87,1000.0,,,,,,
25083,2002747,20200120,145707880000,1579503427880000,2020-01-20 14:57:07.880,16344961,1.0,2.0,11.9,2000.0,,,,,,


In [207]:
trade[trade['OfferApplSeqNum'] == 22830536]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
254747,2000700,20200221,143832410000,1582267112410000,2020-02-21 14:38:32.410,22837649,1,0,14.2,2000,22837635,22830536


In [136]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()

def dailyDB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    url = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    client = pymongo.MongoClient(url, maxPoolSize=None)
    db = client[db_name]
    return db

def read_stock_daily(db, name, start_date=None, end_date=None, skey=None, index_name=None, interval=None, col=None, return_sdi=True):
    collection = db[name]
    # Build projection
    prj = {'_id': 0}
    if col is not None:
        if return_sdi:
            col = ['skey', 'date'] + col
        for col_name in col:
            prj[col_name] = 1

    # Build query
    query = {}
    if skey is not None:
        query['skey'] = {'$in': skey}
    if index_name is not None:
        query['index_name'] = {'$in': index_name}
    if start_date is not None:
        if end_date is not None:
            query['date'] = {'$gte': start_date, '$lte': end_date}
        else:
            query['date'] = {'$gte': start_date}
    elif end_date is not None:
        query['date'] = {'$lte': end_date}

    # Load data
    cur = collection.find(query, prj)
    df = pd.DataFrame.from_records(cur)
    if df.empty:
        df = pd.DataFrame()
    else:
        df = df.sort_values(by=['date', 'skey'])
    return df   

def read_memb_daily(db, name, start_date=None, end_date=None, skey=None, index_id=None, interval=None, col=None, return_sdi=True):
    collection = db[name]
    # Build projection
    prj = {'_id': 0}
    if col is not None:
        if return_sdi:
            col = ['skey', 'date', 'index_id'] + col
        for col_name in col:
            prj[col_name] = 1

    # Build query
    query = {}
    if skey is not None:
        query['skey'] = {'$in': skey}
    if index_id is not None:
        query['index_id'] = {'$in': index_id}
    if interval is not None:
        query['interval'] = {'$in': interval}
    if start_date is not None:
        if end_date is not None:
            query['date'] = {'$gte': start_date, '$lte': end_date}
        else:
            query['date'] = {'$gte': start_date}
    elif end_date is not None:
        query['date'] = {'$lte': end_date}

    # Load data
    cur = collection.find(query, prj)
    df = pd.DataFrame.from_records(cur)
    if df.empty:
        df = pd.DataFrame()
    else:
        df = df.sort_values(by=['date', 'index_id', 'skey'])
    return df 



import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)

year = "2020"
startDate = '20200215'
endDate = '20200731'
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

startTm = datetime.datetime.now()
db1 = DB("192.168.10.178", database_name, user, password)
db2 = dailyDB("192.168.10.178", database_name, user, password)
save = {}
save['date'] = []
save['secid'] = []
mdOrderLog = db1.read('md_order', start_date=startDate, end_date=endDate, symbol=[2000001])
datelist = mdOrderLog['date'].unique()
ss = pd.read_csv('/mnt/ShareWithServer/result/shangshi.csv')
ss['skey'] = np.where(ss['证券代码'].str[-2:] == 'SZ', ss['证券代码'].str[:6].astype(int) + 2000000, ss['证券代码'].str[:6].astype(int) + 1000000)
ss['date'] = (ss['上市日期'].str[:4] + ss['上市日期'].str[5:7] + ss['上市日期'].str[8:10]).astype(int)
print(datetime.datetime.now() - startTm)

startTm = datetime.datetime.now()
for d in datelist:
    print(d)
    sl1 = read_memb_daily(db2, 'index_memb', index_id=[1000852], start_date=20170901, end_date=20201203)['skey'].unique()
    sl1 = sl1[sl1 > 2000000]
    data1 = db1.read('md_snapshot_l2', start_date=str(d), end_date=str(d), symbol=list(sl1))
    sl1 = data1['skey'].unique()
    op = read_stock_daily(db2, 'mdbar1d_tr', start_date=int(d), end_date=int(d))
    for s in sl1:
        mbd = db1.read('md_snapshot_mbd', start_date=str(d), end_date=str(d), symbol=s)
        if mbd is None:
            if ss[ss['skey'] == s]['date'].iloc[0] == d:
                continue
            else:
                save['date'].append(d)
                save['secid'].append(s)
                print(s)
                continue
        try:
            assert(mbd.shape[1] == 83)
        except:
            print('mdb data column unupdated')
            print(s)
        op1 = op[op['skey'] == s]['open'].iloc[0]
        l2 = data1[data1['skey'] == s]
        assert(mbd[mbd['cum_volume'] > 0]['open'].iloc[0] == op1)
#         try:
#             assert(mbd['open'].iloc[0] == op1)
#         except:
#             print(s)
#             print(mbd['open'].iloc[0])
#             print(op1)
#             assert((len(str(op1).split('.')[1]) <= 2) & (op1 == l2[l2['open'] != 0]['open'].iloc[0]))
#             mbd['open'] = op1
#             db1.write('md_snapshot_mbd', mbd)
        cols = ['skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
               'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
               'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
               'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
               'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
               'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
               'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']
        mbd = mbd.drop_duplicates(cols, keep='first')
        mbd = mbd[cols+['ApplSeqNum']]
        if l2.shape[1] == 192:
            l2 = l2[l2.columns[:-1]]
        rl2 = pd.merge(l2, mbd, on=cols, how='left')
        try:
            assert(rl2[(rl2['ApplSeqNum'].isnull()) & (rl2['cum_volume'] > 0) & (rl2['time'] <= 145655000000)].shape[0] == 0)
        except:
            print(rl2[(rl2['ApplSeqNum'].isnull()) & (rl2['cum_volume'] > 0) & (rl2['time'] <= 145655000000)][['skey', 'date', 'cum_volume', 'close', 'bid1p', 'bid2p','bid1q', 'bid2q', 'ask1p', 'ask2p', 'ask1q', 'ask2q']])
        rl2.loc[rl2['ApplSeqNum'].isnull(), 'ApplSeqNum'] = -1
        rl2['ApplSeqNum'] = rl2['ApplSeqNum'].astype('int32') 
        assert(rl2.shape[0] == l2.shape[0])
        db1.write('md_snapshot_l2', rl2)
print(datetime.datetime.now() - startTm)

0:00:11.537069
20200217
         skey      date  cum_volume  close  bid1p  bid2p  bid1q   bid2q  \
2874  2002215  20200217    99889446   7.69    7.7   7.69   1000  268847   

      ask1p  ask2p  ask1q  ask2q  
2874    0.0    0.0      0      0  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
4535  2300502  20200217    12900897   65.0  65.01   65.0    500    200    0.0   
4580  2300502  20200217    13008297   65.0  65.01   65.0    600   7500    0.0   

      ask2p  ask1q  ask2q  
4535    0.0      0      0  
4580    0.0      0      0  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
4079  2300725  20200217     5308791  84.54  84.59  84.54    100    168    0.0   

      ask2p  ask1q  ask2q  
4079    0.0      0      0  
20200218
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
3623  2300724  20200218     7816217  78.54  78.54  78.53    100  16500    0.0   
3625  2300724  20200218     7816517  

         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
4682  2300081  20200228    39525184  13.65    0.0    0.0      0      0  13.64   

      ask2p  ask1q  ask2q  
4682  13.65  14100  18301  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
4319  2300276  20200228    14798013   8.45    0.0    0.0      0      0   8.44   

      ask2p  ask1q  ask2q  
4319   8.45   1100  71000  
         skey      date  cum_volume   close  bid1p  bid2p  bid1q  bid2q  \
989   2300661  20200228     1174808  321.80    0.0    0.0      0      0   
990   2300661  20200228     1175008  321.80    0.0    0.0      0      0   
1359  2300661  20200228     1278808  321.85    0.0    0.0      0      0   
1360  2300661  20200228     1279308  321.85    0.0    0.0      0      0   
1608  2300661  20200228     1378408  321.81    0.0    0.0      0      0   
1612  2300661  20200228     1380008  321.89    0.0    0.0      0      0   
1616  2300661  20200228     1380308  

        skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
693  2300773  20200413     2294400  73.43    0.0    0.0      0      0  73.42   

     ask2p  ask1q  ask2q  
693  73.43    200  55391  
20200414
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
1977  2300660  20200414     1227552  15.02  15.02  15.01    100   3400  15.03   

      ask2p  ask1q  ask2q  
1977  15.04   7620   8500  
20200415
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
3206  2002928  20200415    26558081  13.16  13.17  13.16   4500   4200    0.0   

      ask2p  ask1q  ask2q  
3206    0.0      0      0  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q   bid2q  \
1130  2300463  20200415     9409105  32.01  32.02  32.01    300  273419   
3587  2300463  20200415    14723751  32.00  32.00  31.94   1000     600   
3592  2300463  20200415    14730351  32.02  32.02  32.00    600   26700   

      ask1p  ask2p  ask1q  a

20200529
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
3963  2002409  20200529    29427150  47.75  47.75  47.74    100  35400    0.0   
3965  2002409  20200529    29429150  47.75  47.75  47.74    100  34600    0.0   
4271  2002409  20200529    30048700  47.75  47.75  47.74    100  15750    0.0   

      ask2p  ask1q  ask2q  
3963    0.0      0      0  
3965    0.0      0      0  
4271    0.0      0      0  
20200601
         skey      date  cum_volume   close   bid1p   bid2p  bid1q  bid2q  \
2889  2300751  20200601     1035757  250.23  250.23  250.22   1000    200   
2970  2300751  20200601     1049857  250.22  250.23  250.22    100    200   

      ask1p  ask2p  ask1q  ask2q  
2889    0.0    0.0      0      0  
2970    0.0    0.0      0      0  
20200602
20200603
        skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
538  2002344  20200603    87088498   4.93   4.94   4.93  21200  13000    0.0   

     ask2p  ask1q  ask2q  
53

         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
4480  2002726  20200713    45549513  11.13  11.14  11.13    100   1858    0.0   
4495  2002726  20200713    45658279  11.12  11.14  11.13   2600   3700    0.0   

      ask2p  ask1q  ask2q  
4480    0.0      0      0  
4495    0.0      0      0  
        skey      date  cum_volume   close   bid1p   bid2p  bid1q  bid2q  \
918  2002791  20200713     2084103  108.17  108.17  108.15    100    100   

     ask1p  ask2p  ask1q  ask2q  
918    0.0    0.0      0      0  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
1724  2002960  20200713     8904116  38.81  38.82  38.81    100    511    0.0   
2911  2002960  20200713    10160099  38.80  38.82  38.80    200  15111    0.0   

      ask2p  ask1q  ask2q  
1724    0.0      0      0  
2911    0.0      0      0  
         skey      date  cum_volume  close  bid1p  bid2p  bid1q  bid2q  ask1p  \
1454  2300531  20200713    12012829  23.56

IndexError: single positional indexer is out-of-bounds

In [216]:
sl1

array([], dtype=int32)

In [140]:
s

2300028

In [134]:
import pandas as pd
d1 = pd.read_pickle('/mnt/ShareWithServer/2002192.pkl')
d = 20200113
d2 = db1.read('md_snapshot_l2', start_date=str(d), end_date=str(d), symbol=2002684)
d3 = db1.read('md_snapshot_mbd', start_date=str(d), end_date=str(d), symbol=2002684)
d4 = pd.read_pickle('/mnt/ShareWithServer/2002192_1.pkl')
cols = ['skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'bid10p', 'bid9p', 'cum_trades_cnt',
               'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
               'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
               'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
               'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
               'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
               'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']
if d2.shape[1] == 192:
    d2 = d2[d2.columns[:-1]]
d3 = d3.drop_duplicates(cols, keep='first')
re = pd.merge(d2, d3[cols+['ApplSeqNum']], on=cols, how='left')
try:
    assert(re[(re['ApplSeqNum'].isnull()) & (re['cum_volume'] > 0) & (re['time'] <= 145655000000)].shape[0] == 0)
except:
    print(re[(re['ApplSeqNum'].isnull()) & (re['cum_volume'] > 0) & (re['time'] <= 145655000000)][['skey', 'date', 'cum_volume', 'close', 'bid1p', 'bid2p','bid1q', 'bid2q', 'ask1p', 'ask2p', 'ask1q', 'ask2q']])
re.loc[re['ApplSeqNum'].isnull(), 'ApplSeqNum'] = -1
re['ApplSeqNum'] = re['ApplSeqNum'].astype('int32') 
assert(re.shape[0] == d2.shape[0])
db1.write('md_snapshot_l2', re)
print(re['skey'].iloc[0])

2002684


In [None]:
d1

In [115]:
d3[(d3['cum_volume'] == 13444438)][['ApplSeqNum', 'time'] + cols]

Unnamed: 0,ApplSeqNum,time,skey,date,cum_volume,prev_close,open,close,bid10p,bid9p,cum_trades_cnt,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity
22174,7899128,104505240000,2300397,20200103,13444438,30.35,30.43,33.38,33.28,33.29,13202,33.3,33.31,33.32,33.33,33.35,33.36,33.37,33.38,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,300,200,22000,24500,2200,3035,3000,200,11800,638,0,0,0,0,0,0,0,0,0,0,1,1,8,2,3,5,2,1,9,3,0,0,0,0,0,0,0,0,0,0,889382,0
22175,7899645,104505950000,2300397,20200103,13444438,30.35,30.43,33.39,33.29,33.3,13202,33.31,33.32,33.33,33.35,33.36,33.37,33.38,33.39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,200,22000,24500,2200,3035,3000,200,11800,638,2100,0,0,0,0,0,0,0,0,0,0,1,8,2,3,5,2,1,9,3,1,0,0,0,0,0,0,0,0,0,0,891482,0


In [118]:
d2[d2['cum_volume'] == 13444438][['time'] + cols]

Unnamed: 0,time,skey,date,cum_volume,prev_close,open,close,bid10p,bid9p,cum_trades_cnt,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity
1560,104509000000,2300397,20200103,13444438,30.35,30.43,33.38,33.29,33.3,13202,33.31,33.32,33.33,33.35,33.36,33.37,33.38,33.39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,200,22000,24500,2200,3035,3000,200,11800,638,2100,0,0,0,0,0,0,0,0,0,0,1,8,2,3,5,2,1,9,3,1,0,0,0,0,0,0,0,0,0,0,891482,0


In [119]:
trade = db1.read('md_order', 20200103, 20200103, symbol=2300397)
trade[trade['ApplSeqNum'] >= 7899128].head(12)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
18308,2300397,20200103,104505240000,1578019505240000,2020-01-03 10:45:05.240,7899128,2,2,33.38,5000
18309,2300397,20200103,104505950000,1578019505950000,2020-01-03 10:45:05.950,7899645,1,2,33.39,2100
18310,2300397,20200103,104507360000,1578019507360000,2020-01-03 10:45:07.360,7901042,2,2,33.38,1200
18311,2300397,20200103,104507510000,1578019507510000,2020-01-03 10:45:07.510,7901213,1,2,33.39,500
18312,2300397,20200103,104508100000,1578019508100000,2020-01-03 10:45:08.100,7901715,2,2,33.38,300
18313,2300397,20200103,104508460000,1578019508460000,2020-01-03 10:45:08.460,7902006,1,2,33.39,2000
18314,2300397,20200103,104509950000,1578019509950000,2020-01-03 10:45:09.950,7903391,1,2,33.39,1000
18315,2300397,20200103,104510050000,1578019510050000,2020-01-03 10:45:10.050,7903509,2,2,33.39,5000
18316,2300397,20200103,104510320000,1578019510320000,2020-01-03 10:45:10.320,7903820,2,2,33.38,200
18317,2300397,20200103,104510770000,1578019510770000,2020-01-03 10:45:10.770,7904199,2,2,33.39,100


In [120]:
trade = db1.read('md_trade', 20200103, 20200103, symbol=2300397)
trade[trade['ApplSeqNum'] >= 7899645].head(12)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
17317,2300397,20200103,104507360000,1578019507360000,2020-01-03 10:45:07.360,7901043,1,0,33.39,1200,7899645,7901042
17318,2300397,20200103,104508100000,1578019508100000,2020-01-03 10:45:08.100,7901716,1,0,33.39,300,7899645,7901715
17319,2300397,20200103,104510050000,1578019510050000,2020-01-03 10:45:10.050,7903510,1,0,33.39,600,7899645,7903509
17320,2300397,20200103,104510050000,1578019510050000,2020-01-03 10:45:10.050,7903511,1,0,33.39,500,7901213,7903509
17321,2300397,20200103,104510050000,1578019510050000,2020-01-03 10:45:10.050,7903512,1,0,33.39,2000,7902006,7903509
17322,2300397,20200103,104510050000,1578019510050000,2020-01-03 10:45:10.050,7903513,1,0,33.39,1000,7903391,7903509
17323,2300397,20200103,104510320000,1578019510320000,2020-01-03 10:45:10.320,7903821,1,0,33.38,200,7890983,7903820
17324,2300397,20200103,104511630000,1578019511630000,2020-01-03 10:45:11.630,7904880,1,0,33.39,100,7904879,7903509
17325,2300397,20200103,104512160000,1578019512160000,2020-01-03 10:45:12.160,7905399,1,0,33.39,800,7905398,7903509
17326,2300397,20200103,104512160000,1578019512160000,2020-01-03 10:45:12.160,7905400,1,0,33.39,100,7905398,7904199


In [12]:
d4.shape[0]

199357

In [34]:
cols = list(d1.columns)
cols.remove('total_bid_vwap')
cols.remove('total_ask_vwap')
cols.remove('ordering')
re = pd.merge(d1, d4, on=cols, how='outer')

In [39]:
re[re['total_bid_vwap_y'].isnull()]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering_x,ApplSeqNum,bbo_improve,pass_filter,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity,total_bid_vwap_x,total_ask_vwap_x,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels,ordering_y,total_bid_vwap_y,total_ask_vwap_y
4,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,5.0,269630,1,1,321,348304,7.847312e+06,22.5,22.53,22.54,22.36,22.37,22.39,22.40,22.45,22.48,22.49,22.50,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,15500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,289700,2122400,21.641909,23.915453,351,1276,110,175,,,
5,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,6.0,269891,1,0,322,349304,7.869842e+06,22.5,22.53,22.53,22.36,22.37,22.39,22.40,22.45,22.48,22.49,22.50,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288700,2122400,21.638833,23.915453,351,1276,110,175,,,
9,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,10.0,270859,1,0,323,350304,7.892382e+06,22.5,22.53,22.54,22.36,22.37,22.39,22.40,22.45,22.48,22.49,22.50,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,8100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288900,2121800,21.639221,23.916214,352,1278,110,175,,,
11,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,12.0,272360,1,0,324,350504,7.896890e+06,22.5,22.53,22.54,22.36,22.37,22.39,22.40,22.45,22.48,22.49,22.50,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,7900,68100,23600,700,7600,200,25100,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,24,4,3,1,288900,2121700,21.639221,23.916281,352,1279,110,175,,,
23,2002351,20200102,93000040000,1577928600040000,2020-01-02 09:30:00.040,24.0,273812,1,0,325,351904,7.928446e+06,22.5,22.53,22.54,22.36,22.37,22.39,22.40,22.45,22.48,22.49,22.50,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,15500,6500,43200,23600,700,7600,200,25100,7800,2100,200,1,1,1,4,3,2,2,39,2,13,4,22,14,2,14,1,24,4,3,1,289900,2071100,21.642294,23.948294,353,1273,110,175,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199324,2002351,20200102,145646910000,1577948206910000,2020-01-02 14:56:46.910,199325.0,16172802,1,0,105334,81823932,1.952135e+09,22.5,22.53,24.75,24.66,24.67,24.68,24.69,24.70,24.71,24.72,24.73,24.74,24.75,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,3500,900,11100,5100,19100,14800,10600,18600,26900,2477600,0,0,0,0,0,0,0,0,0,0,9,3,16,10,50,13,15,22,31,1156,0,0,0,0,0,0,0,0,0,0,4809300,0,23.981446,0.000000,4192,0,335,0,,,
199329,2002351,20200102,145648250000,1577948208250000,2020-01-02 14:56:48.250,199330.0,16174725,1,0,105335,81824032,1.952137e+09,22.5,22.53,24.75,24.66,24.67,24.68,24.69,24.70,24.71,24.72,24.73,24.74,24.75,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,3500,900,11100,5100,19100,14800,10600,18600,26900,2314900,0,0,0,0,0,0,0,0,0,0,9,3,16,10,50,13,15,22,31,1156,0,0,0,0,0,0,0,0,0,0,4646600,0,23.954535,0.000000,4192,0,335,0,,,
199330,2002351,20200102,145648310000,1577948208310000,2020-01-02 14:56:48.310,199331.0,16174824,1,0,105336,81824832,1.952157e+09,22.5,22.53,24.75,24.66,24.67,24.68,24.69,24.70,24.71,24.72,24.73,24.74,24.75,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,3500,900,11100,5100,19100,14800,10600,18600,26900,2314100,0,0,0,0,0,0,0,0,0,0,9,3,16,10,50,13,15,22,31,1156,0,0,0,0,0,0,0,0,0,0,4645800,0,23.954398,0.000000,4192,0,335,0,,,
199331,2002351,20200102,145648320000,1577948208320000,2020-01-02 14:56:48.320,199332.0,16174837,1,0,105337,81826832,1.952206e+09,22.5,22.53,24.75,24.66,24.67,24.68,24.69,24.70,24.71,24.72,24.73,24.74,24.75,0.00,0.00,0.00,0.00,0.00,0.00,0.0,0.00,0.00,0.00,3500,900,11100,5100,19100,14800,10600,18600,26900,2312100,0,0,0,0,0,0,0,0,0,0,9,3,16,10,50,13,15,22,31,1156,0,0,0,0,0,0,0,0,0,0,4643800,0,23.954055,0.000000,4192,0,335,0,,,


In [49]:
d1[d1['ApplSeqNum'] >= 269291].head(20)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,ApplSeqNum,bbo_improve,pass_filter,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels
3,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,4,269291,0,-1,319,346004,7795470.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,15500,11400,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,5,26,14,2,14,1,23,4,3,1,289700,2124700,21.641909,23.913964,351,1277,110,175
4,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,5,269630,1,1,321,348304,7847312.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,15500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,289700,2122400,21.641909,23.915453,351,1276,110,175
5,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,6,269891,1,0,322,349304,7869842.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288700,2122400,21.638833,23.915453,351,1276,110,175
6,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,7,270147,0,-1,322,349304,7869842.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288700,2122500,21.638833,23.915447,351,1277,110,175
7,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,8,270162,0,-1,322,349304,7869842.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288900,2122500,21.639221,23.915447,352,1277,110,175
8,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,9,270784,0,-1,322,349304,7869842.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288900,2122800,21.639221,23.915565,352,1278,110,175
9,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,10,270859,1,0,323,350304,7892382.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,8100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288900,2121800,21.639221,23.916214,352,1278,110,175
10,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,11,272342,0,-1,323,350304,7892382.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,8100,68100,23600,700,7600,200,25100,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,24,4,3,1,288900,2121900,21.639221,23.916152,352,1279,110,175
11,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,12,272360,1,0,324,350504,7896890.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,7900,68100,23600,700,7600,200,25100,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,24,4,3,1,288900,2121700,21.639221,23.916281,352,1279,110,175
12,2002351,20200102,93000040000,1577928600040000,2020-01-02 09:30:00.040,13,273440,0,-1,324,350504,7896890.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,7900,68100,23600,700,7600,200,25100,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,24,4,3,1,288900,2121800,21.639221,23.916321,352,1280,110,175


In [48]:
d4[d4['ApplSeqNum'] >= 269291].head(20)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ordering,ApplSeqNum,bbo_improve,pass_filter,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels
3,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,4,269291,0,-1,319,346004,7795470.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,15500,11400,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,5,26,14,2,14,1,23,4,3,1,289700,2124700,21.641909,23.913964,351,1277,110,175
4,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,5,269630,1,1,322,348304,7847312.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,15500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,289700,2122400,21.641909,23.915453,351,1276,110,175
5,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,6,269891,1,0,323,349304,7869842.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288700,2122400,21.638833,23.915453,351,1276,110,175
6,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,7,270147,0,-1,322,349304,7869842.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288700,2122500,21.638833,23.915447,351,1277,110,175
7,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,8,270162,0,-1,322,349304,7869842.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288900,2122500,21.639221,23.915447,352,1277,110,175
8,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,9,270784,0,-1,322,349304,7869842.12,22.5,22.53,22.53,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,9100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288900,2122800,21.639221,23.915565,352,1278,110,175
9,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,10,270859,1,0,324,350304,7892382.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,8100,68100,23600,700,7600,200,25000,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,23,4,3,1,288900,2121800,21.639221,23.916214,352,1278,110,175
10,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,11,272342,0,-1,323,350304,7892382.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,8100,68100,23600,700,7600,200,25100,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,24,4,3,1,288900,2121900,21.639221,23.916152,352,1279,110,175
11,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,12,272360,1,0,325,350504,7896890.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,7900,68100,23600,700,7600,200,25100,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,24,4,3,1,288900,2121700,21.639221,23.916281,352,1279,110,175
12,2002351,20200102,93000040000,1577928600040000,2020-01-02 09:30:00.040,13,273440,0,-1,324,350504,7896890.12,22.5,22.53,22.54,22.36,22.37,22.39,22.4,22.45,22.48,22.49,22.5,22.51,22.53,22.54,22.55,22.56,22.57,22.58,22.59,22.6,22.61,22.62,22.64,2000,1200,200,800,3700,200,700,13500,9700,14500,7900,68100,23600,700,7600,200,25100,7800,2100,200,1,1,1,4,3,2,2,39,2,12,4,26,14,2,14,1,24,4,3,1,288900,2121800,21.639221,23.916321,352,1280,110,175


In [53]:
order = db1.read('md_order', 20200102, 20200102, symbol=2002351)
order[(order['ApplSeqNum'] <= 269630) & (order['ApplSeqNum'] > 269291)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
2018,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,269630,1,2,24.75,2300


In [57]:
trade = db1.read('md_trade', 20200102, 20200102, symbol=2002351)
trade[(trade['ApplSeqNum'] > 269630)]

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
388,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,269631,1,0,22.54,1100,269630,210677
389,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,269632,1,0,22.54,1200,269630,216591
390,2002351,20200102,93000010000,1577928600010000,2020-01-02 09:30:00.010,269892,1,0,22.53,1000,231823,269891
391,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,270860,1,0,22.54,1000,270859,216591
392,2002351,20200102,93000020000,1577928600020000,2020-01-02 09:30:00.020,272361,1,0,22.54,200,272360,216591
...,...,...,...,...,...,...,...,...,...,...,...,...
149473,2002351,20200102,150000000000,1577948400000000,2020-01-02 15:00:00.000,16288124,1,0,24.75,1000,12758236,16260836
149474,2002351,20200102,150000000000,1577948400000000,2020-01-02 15:00:00.000,16288125,1,0,24.75,1000,12758236,16262020
149475,2002351,20200102,150000000000,1577948400000000,2020-01-02 15:00:00.000,16288126,1,0,24.75,1000,12758236,16264225
149476,2002351,20200102,150000000000,1577948400000000,2020-01-02 15:00:00.000,16288127,1,0,24.75,300,12758236,16264427
