In [1]:
import pymongo 
import io 
import pandas as pd 
import pickle 
import datetime 
import time 
import gzip 
import lzma 
import pytz 
import pyarrow as pa 
import pyarrow.parquet as pq 
import numpy as np 
import re

def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)

class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db', version=3): 
        self.db_name = db_name 
        self.uri = uri 
        self.client = pymongo.MongoClient(self.uri) 
        self.db = self.client[self.db_name] 
        self.chunk_size = 20000 
        self.symbol_column = symbol_column 
        self.date_column = 'date' 
        self.version = version

    def parse_uri(self, uri): 
        # mongodb://user:password@example.com 
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}
        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("date must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid date type: " + str(type(x)))
        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)
        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)
        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)
        return query

    def read_tick(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name] 
        query = self.build_query(start_date, end_date, symbol) 
        if not query: 
            print('cannot read the whole table') 
            return None  
        segs = [] 
        for x in collection.find(query): 
            x['data'] = self.deser(x['data'], x['ver']) 
            segs.append(x) 
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start'])) 
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def read_daily(self, table_name, start_date=None, end_date=None, skey=None, index_id=None, interval=None, index_name=None, col=None, return_sdi=True): 
        collection = self.db[table_name]
        # Build projection 
        prj = {'_id': 0} 
        if col is not None: 
            if return_sdi: 
                col = ['skey', 'date', 'index_id'] + col 
            for col_name in col: 
                prj[col_name] = 1 
        # Build query 
        query = {} 
        if skey is not None: 
            query['skey'] = {'$in': skey} 
        if interval is not None: 
            query['interval'] = {'$in': interval} 
        if index_id is not None: 
            query['index_id'] = {'$in': index_id}    
        if index_name is not None:
            n = '' 
            for name in index_name: 
                try: 
                    name = re.compile('[\u4e00-\u9fff]+').findall(name)[0] 
                    if len(n) == 0: 
                        n = n = "|".join(name) 
                    else: 
                        n = n + '|' + "|".join(name) 
                except: 
                    if len(n) == 0: 
                        n = name 
                    else: 
                        n = n + '|' + name 
            query['index_name'] = {'$regex': n}
        if start_date is not None: 
            if end_date is not None: 
                query['date'] = {'$gte': start_date, '$lte': end_date} 
            else: 
                query['date'] = {'$gte': start_date} 
        elif end_date is not None: 
            query['date'] = {'$lte': end_date} 
        # Load data 
        cur = collection.find(query, prj) 
        df = pd.DataFrame.from_records(cur) 
        if df.empty: 
            df = pd.DataFrame() 
        else:
            if 'index_id' in df.columns:
                df = df.sort_values(by=['date', 'index_id', 'skey']).reset_index(drop=True)
            else:
                df = df.sort_values(by=['date','skey']).reset_index(drop=True)
        return df 
 

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = self.version
            ser_data = self.ser(df_seg, version)
            seg = {'ver': version, 'data': ser_data, 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None
        collection.delete_many(query)

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        elif version == 3:
            # 32-bit number needs more space than 64-bit for parquet
            for col_name in s.columns:
                col = s[col_name]
                if col.dtype == np.int32:
                    s[col_name] = s[col_name].astype(np.int64)
                elif col.dtype == np.uint32:
                    s[col_name] = s[col_name].astype(np.uint64)
            tbl = pa.Table.from_pandas(s)
            f = io.BytesIO()
            pq.write_table(tbl, f, use_dictionary=False, compression='ZSTD', compression_level=0)
            f.seek(0)
            data = f.read()
            return data
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        print(version)
        def unpickle(s):
            return pickle.loads(s)
        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        elif version == 3:
            f = io.BytesIO()
            f.write(s)
            f.seek(0)
            return pq.read_table(f, use_threads=False).to_pandas()
        else:
            raise Exception('unknown version')

def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()


In [17]:
database_name = 'com_md_eq_cn'
user = 'zhenyuy'
password = 'bnONBrzSMGoE'

start = 20200903
end = 20200903
symbol = 2000001

import sys

pd.set_option('max_columns', 200)
db1 = DB("192.168.10.178", database_name, user, password)

order = db1.read_tick('md_order', start_date=start, end_date=end, symbol=symbol)
trade = db1.read_tick('md_trade', start_date=start, end_date=end, symbol=symbol)
print(order)
print(trade)

3
3
3
3
3
3
3
3
3
3
3
           skey      date          time    clockAtArrival  ApplSeqNum  \
0       2000001  20200903   91500000000  1599095700000000         474   
1       2000001  20200903   91500000000  1599095700000000        1010   
2       2000001  20200903   91500000000  1599095700000000        1051   
3       2000001  20200903   91500000000  1599095700000000        1131   
4       2000001  20200903   91500000000  1599095700000000        1474   
...         ...       ...           ...               ...         ...   
107562  2000001  20200903  145957440000  1599116397440000    25633303   
107563  2000001  20200903  145957490000  1599116397490000    25633326   
107564  2000001  20200903  145957860000  1599116397860000    25633489   
107565  2000001  20200903  145958270000  1599116398270000    25633705   
107566  2000001  20200903  145959260000  1599116399260000    25634282   

        order_side  order_type  order_price  order_qty  
0                1           2        13.79 

In [10]:
order[order['order_price'] == 999990000]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty


In [19]:
table = pa.Table.from_pandas(order)
pq.write_table(table, r'E:\work\MBDzoom-demo\order.parquet', compression='zstd', compression_level=0)

In [20]:
pd.read_csv(r'E:\work\MBDzoom-demo\output.csv')

Unnamed: 0,skey,date,ApplSeqNum,tradeVol,cancelVol,unfrozenOrderNum
0,2000001,20200903,444237,473762.0,0.0,0.0
1,2000001,20200903,444927,0.0,0.0,0.0
2,2000001,20200903,445955,0.0,0.0,0.0
3,2000001,20200903,447223,0.0,0.0,0.0
4,2000001,20200903,447411,0.0,0.0,0.0
...,...,...,...,...,...,...
120840,2000001,20200903,25525711,0.0,1000.0,0.0
120841,2000001,20200903,25525854,900.0,0.0,0.0
120842,2000001,20200903,25526415,0.0,0.0,0.0
120843,2000001,20200903,25526825,500.0,0.0,0.0


In [23]:
test.head(1)

Unnamed: 0,skey,date,time,clockAtArrival,ordering,ApplSeqNum,bbo_improve,pass_filter,cum_trades_cnt,cum_volume,cum_amount,prev_close,open,close,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_orders,total_ask_orders,total_bid_levels,total_ask_levels
0,2000001,20200903,92500000000,1599096300000000,1,392343,1,2,287,473162,7248841.84,15.32,15.32,15.32,15.23,15.24,15.25,15.26,15.27,15.28,15.29,15.3,15.31,15.32,15.33,15.34,15.35,15.36,15.37,15.38,15.39,15.4,15.41,15.42,20100,14300,38800,18800,4700,28700,21100,23800,4100,29338,1300,1500,7900,1500,900,13300,42400,18438,600,6000,9,5,21,7,3,9,3,13,1,17,4,1,6,1,1,6,7,7,1,4,2681338,5853402,14.813579,15.923983,1335,2492,139,151


In [177]:
order[(order['time'] < 92500000000) & (order['order_price'] > 71.71 * 9) & (order['order_side'] == 2)]['order_qty'].sum()

7482

In [179]:
order[(order['time'] < 92500000000) & (order['order_price'] > 71.71 * 9) & (order['order_side'] == 1)]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
679,2300864,20200824,91957820000,1598231997820000,232188,1,2,1010.0,100
1976,2300864,20200824,92445910000,1598232285910000,331896,1,2,2000.0,1000


In [178]:
trade[trade['OfferApplSeqNum'].isin(order[(order['time'] < 92500000000) & (order['order_price'] > 71.71 * 9) & (order['order_side'] == 2)]['ApplSeqNum'].unique())]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
2,2300864,20200824,91504570000,1598231704570000,157819,4,0,0.0,500,0,157816
3802,2300864,20200824,93124540000,1598232684540000,1254642,4,0,0.0,500,0,152833
3831,2300864,20200824,93125340000,1598232685340000,1261659,4,0,0.0,500,0,101003
4858,2300864,20200824,93203150000,1598232723150000,1493150,4,0,0.0,200,0,251402
11713,2300864,20200824,94204670000,1598233324670000,4187610,4,0,0.0,500,0,304094
12369,2300864,20200824,94356190000,1598233436190000,4560435,4,0,0.0,500,0,228358
17533,2300864,20200824,95853010000,1598234333010000,7118000,4,0,0.0,300,0,78372
32672,2300864,20200824,111310820000,1598238790820000,14076239,4,0,0.0,500,0,287947
37267,2300864,20200824,132850560000,1598246930560000,17762943,4,0,0.0,400,0,110955
37272,2300864,20200824,132852290000,1598246932290000,17765105,4,0,0.0,100,0,123287


In [183]:
trade[trade['BidApplSeqNum'].isin(order[(order['time'] < 92500000000) & (order['order_price'] > 71.71 * 9) & (order['order_side'] == 1)]['ApplSeqNum'].unique())]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
1209,2300864,20200824,93000750000,1598232600750000,477085,4,0,0.0,1000,331896,0
1324,2300864,20200824,93002310000,1598232602310000,563153,4,0,0.0,100,232188,0


In [170]:
db1.read_daily('mdbar1d_tr', start_date=20200824, end_date=20200824, skey=[2300864])

Unnamed: 0,skey,date,time,name,trade_status,listed_days,open,high,low,close,closeL1,yclose,ztClose,dtClose,dayReturn,volume,amount,buy_volume,sell_volume,TORate,allZT,hasZT,isZT,allDT,hasDT,isDT,isST,VWAP,TWAP,marketValue,marketShares,totalShares,SW1_name,SW1_code,SW2_code,SW3_code,index_name,index_weight,exchange
0,2300864,20200824,180000000000,南大环境,1,1,133.0,206.0,132.0,166.02,0.0,71.71,0.0,0.0,1.315158,7001276,1038723000.0,3424687.0,3576589.0,0.615256,0,0,0,0,0,0,0,148.362013,158.609141,1889215000.0,11379445.0,48000000.0,申万公用事业,SW801160,SW801162,SW851641,,0.0,SZSE


In [243]:
order[order['ApplSeqNum'] == 21145147]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
150624,2300869,20200824,145102000000,1598251862000000,21145147,1,2,88.0,100


In [254]:
database_name = 'com_md_eq_cn'
user = 'zhenyuy'
password = 'bnONBrzSMGoE'

import sys

pd.set_option('max_columns', 200)
db1 = DB("192.168.10.178", database_name, user, password)

pd.set_option('max_rows', 300)
test1 = db1.read_tick('md_snapshot_l2', start_date=start, end_date=end, symbol=symbol)
test1[(test1['ApplSeqNum'] == -1) & (test1['cum_volume'] > 0) & (test1['time'] <= 145655000000)][['ApplSeqNum', 'time', 'skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
                   'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
                   'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
                   'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
                   'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
                   'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
                   'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']]

3


Unnamed: 0,ApplSeqNum,time,skey,date,cum_volume,prev_close,open,close,cum_trades_cnt,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity
34,-1,93003000000,2300489,20201112,163400,40.37,43.5,42.0,53,40.39,40.4,40.5,40.53,40.58,40.68,40.7,40.71,40.81,41.0,41.17,42.0,42.88,42.98,43.0,43.02,43.2,43.24,43.28,43.3,2000,34100,400,3000,1000,300,300,8200,300,1500,100,1400,10000,4800,20000,2800,12200,3500,400,36200,1,1,2,1,1,2,2,1,1,1,1,1,1,1,1,1,2,1,1,12,126000,446214
35,-1,93006000000,2300489,20201112,164400,40.37,43.5,41.01,61,40.58,40.6,40.68,40.7,40.71,40.76,40.8,40.81,41.0,41.01,41.17,41.98,41.99,42.0,42.88,42.98,43.0,43.02,43.05,43.2,1000,10000,300,100,200,2000,800,300,2300,5100,1200,7400,1200,1400,10000,4800,42200,2800,5000,12200,1,2,2,1,1,1,1,1,4,3,4,4,8,1,1,1,5,1,1,2,140800,723514
36,-1,93009000000,2300489,20201112,169200,40.37,43.5,41.02,87,40.6,40.68,40.7,40.71,40.76,40.8,40.81,41.0,41.01,41.02,41.17,41.98,41.99,42.0,42.88,42.98,43.0,43.02,43.05,43.2,10000,300,100,200,2000,800,300,2300,3900,5700,800,2800,600,1400,10000,5600,48200,2800,5000,12200,2,2,1,1,1,1,1,4,8,7,3,1,4,1,1,2,6,1,1,2,170500,758714
37,-1,93012000000,2300489,20201112,179600,40.37,43.5,41.37,111,40.8,40.81,41.0,41.01,41.02,41.17,41.18,41.19,41.2,41.37,41.91,41.99,42.0,42.88,42.98,43.0,43.02,43.05,43.2,43.25,800,300,2300,4000,10600,2000,3100,3400,2700,100,7000,600,1400,10000,5200,48200,2800,5000,12200,2000,1,1,4,8,11,4,6,1,5,1,3,4,1,1,2,6,1,1,2,1,186800,758514
38,-1,93015000000,2300489,20201112,183800,40.37,43.5,41.41,119,40.8,40.81,41.0,41.01,41.02,41.03,41.17,41.37,41.39,41.4,41.41,41.5,41.56,41.87,41.88,41.99,42.0,42.88,42.98,43.0,800,300,2500,4100,6400,100,2000,800,3200,2300,2000,100,500,3800,1600,10400,1400,10000,5200,48200,1,1,5,9,8,1,4,4,3,1,4,1,1,1,2,5,1,1,2,6,178000,766614
39,-1,93018000000,2300489,20201112,186200,40.37,43.5,41.39,132,40.7,40.71,40.8,40.81,41.0,41.01,41.02,41.1,41.17,41.18,41.39,41.4,41.41,41.91,41.99,42.0,42.88,42.98,43.0,43.02,100,200,800,300,2500,4100,1400,5000,3300,100,2700,2300,8000,500,10400,1400,10000,5200,48200,2800,1,1,1,1,5,9,7,1,5,1,3,1,11,1,5,1,1,2,6,1,173000,768114
40,-1,93021000000,2300489,20201112,190200,40.37,43.5,41.27,148,40.75,40.8,40.81,41.0,41.01,41.02,41.1,41.17,41.27,41.29,41.3,41.39,41.4,41.41,41.91,41.99,42.0,42.88,42.98,43.0,2700,800,300,2500,4300,1400,5000,2900,4800,3000,100,1600,100,8000,500,10400,1400,10000,5200,48200,1,1,1,5,10,7,1,5,1,1,1,4,1,11,1,5,1,1,2,6,182600,751614
41,-1,93024000000,2300489,20201112,194300,40.37,43.5,41.41,162,40.81,41.0,41.01,41.02,41.1,41.17,41.28,41.37,41.38,41.39,41.41,41.52,41.91,41.99,42.0,42.88,42.98,43.0,43.02,43.2,300,2500,1700,1400,5000,2500,200,400,2900,100,5800,100,500,10400,1400,10000,5200,48200,2800,12200,1,5,9,7,1,3,2,2,1,1,7,1,1,5,1,1,2,6,1,2,169600,741414
42,-1,93027000000,2300489,20201112,208300,40.37,43.5,41.43,193,41.1,41.17,41.28,41.34,41.37,41.4,41.41,41.42,41.43,41.44,41.87,41.88,41.9,41.91,41.99,42.0,42.88,42.98,43.0,43.02,5000,2500,200,500,100,300,100,200,1800,3700,31338,23700,3600,7000,10700,1400,10000,5200,48200,2800,1,3,2,1,1,3,1,1,1,3,14,9,1,14,6,1,1,2,6,1,172000,798552
43,-1,93030000000,2300489,20201112,209400,40.37,43.5,41.47,202,41.17,41.28,41.3,41.34,41.37,41.39,41.4,41.41,41.44,41.45,41.54,41.84,41.85,41.86,41.91,41.99,42.0,42.88,42.98,43.0,2500,200,200,500,700,100,200,100,700,100,600,3600,200,3500,6500,900,1400,10000,5200,48200,3,2,1,1,3,1,2,1,2,1,2,1,2,1,13,5,1,1,2,6,167900,720214


In [274]:
test1[test1['time'] <= 93000000000].tail(1)[['ApplSeqNum', 'time', 'skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
                   'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
                   'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
                   'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
                   'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
                   'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
                   'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']]

Unnamed: 0,ApplSeqNum,time,skey,date,cum_volume,prev_close,open,close,cum_trades_cnt,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity
33,284407,92903000000,2300489,20201112,112000,40.37,43.5,43.5,33,39.5,39.75,40.0,40.01,40.11,40.37,40.5,40.68,40.7,43.5,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,45.33,5200,500,200,300,500,600,100,200,200,73000,200,100,200,10000,1000,300,100,1300,600,300,2,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,2,1,1,109800,19500


In [279]:
test.head(200)[['ApplSeqNum', 'time', 'skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
                   'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
                   'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
                   'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
                   'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
                   'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
                   'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']]

Unnamed: 0,ApplSeqNum,time,skey,date,cum_volume,prev_close,open,close,cum_trades_cnt,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity
0,284407,92500000000,2300489,20201112,112000,40.37,43.5,43.5,33,39.5,39.75,40.0,40.01,40.11,40.37,40.5,40.68,40.7,43.5,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,45.33,5200,500,200,300,500,600,100,200,200,73000,200,100,200,10000,1000,300,100,1300,600,300,2,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,2,1,1,109800,19500
1,288975,93000010000,2300489,20201112,117000,40.37,43.5,43.5,34,39.5,39.75,40.0,40.01,40.11,40.37,40.5,40.68,40.7,43.5,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,45.33,5200,500,200,300,500,600,100,200,200,68000,200,100,200,10000,1000,300,100,1300,600,300,2,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,2,1,1,104800,19500
2,289494,93000010000,2300489,20201112,117000,40.37,43.5,43.5,34,39.5,39.75,40.0,40.01,40.11,40.37,40.5,40.68,40.7,43.5,43.83,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,5200,500,200,300,500,600,100,200,200,68000,6700,200,100,200,10000,1000,300,100,1300,600,2,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,104800,26200
3,289810,93000020000,2300489,20201112,117000,40.37,43.5,43.5,34,39.5,39.75,40.0,40.01,40.11,40.37,40.5,40.68,40.7,43.5,43.83,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,5200,500,200,300,500,600,100,200,200,68000,9300,200,100,200,10000,1000,300,100,1300,600,2,1,1,2,1,2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,104800,28800
4,290783,93000020000,2300489,20201112,117000,40.37,43.5,43.5,34,39.75,40.0,40.01,40.11,40.37,40.5,40.53,40.68,40.7,43.5,43.83,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,500,200,300,500,600,100,3000,200,200,68000,9300,200,100,200,10000,1000,300,100,1300,600,1,1,2,1,2,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,107800,28800
5,290823,93000020000,2300489,20201112,117000,40.37,43.5,43.5,34,40.0,40.01,40.11,40.37,40.39,40.5,40.53,40.68,40.7,43.5,43.83,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,200,300,500,600,2000,100,3000,200,200,68000,9300,200,100,200,10000,1000,300,100,1300,600,1,2,1,2,1,1,1,1,1,1,2,1,1,1,1,1,1,1,2,1,109800,28800
6,291142,93000020000,2300489,20201112,117000,40.37,43.5,43.5,34,40.0,40.01,40.11,40.37,40.39,40.5,40.53,40.68,40.7,43.5,43.83,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,200,300,500,600,2000,100,3000,200,200,68000,16000,200,100,200,10000,1000,300,100,1300,600,1,2,1,2,1,1,1,1,1,1,3,1,1,1,1,1,1,1,2,1,109800,35500
7,291306,93000020000,2300489,20201112,117000,40.37,43.5,43.5,34,40.0,40.01,40.11,40.37,40.39,40.5,40.53,40.68,40.7,43.5,43.83,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,200,300,500,600,2000,100,3000,200,200,68000,22700,200,100,200,10000,1000,300,100,1300,600,1,2,1,2,1,1,1,1,1,1,4,1,1,1,1,1,1,1,2,1,109800,42200
8,291742,93000020000,2300489,20201112,117000,40.37,43.5,43.5,34,40.0,40.01,40.11,40.37,40.39,40.5,40.53,40.68,40.7,43.5,43.83,43.84,43.92,44.17,44.39,44.55,44.94,44.98,45.0,45.22,200,300,500,600,2000,100,3000,200,200,68000,22700,200,100,1900,10000,1000,300,100,1300,600,1,2,1,2,1,1,1,1,1,1,4,1,1,2,1,1,1,1,2,1,109800,43900
9,291821,93000020000,2300489,20201112,117000,40.37,43.5,43.5,34,40.0,40.01,40.11,40.37,40.39,40.5,40.53,40.68,40.7,43.5,43.83,43.84,43.92,44.0,44.17,44.39,44.55,44.94,44.98,45.0,200,300,500,600,2000,100,3000,200,200,68000,22700,200,100,1000,1900,10000,1000,300,100,1300,1,2,1,2,1,1,1,1,1,1,4,1,1,1,2,1,1,1,1,2,109800,44900


In [284]:
# min order_side 2
40.81 * 0.98

39.9938

In [283]:
# max order_side 1
43.3 * 1.02

44.166

In [13]:
order1 = pd.read_csv(r'\\192.168.10.34\random_backup\Kevin_zhenyu\KR_daily_data\20201112\SZ\order\300489.csv')
trade1 = pd.read_csv(r'\\192.168.10.34\random_backup\Kevin_zhenyu\KR_daily_data\20201112\SZ\tick\300489.csv')
pd.set_option('max_rows', 200)

In [8]:
mbd = pd.concat([order, trade]).sort_values(by='ApplSeqNum')
mbd[mbd['TransactTime'] > 20201112093000000].head(200)

Unnamed: 0,OrderQty,OrdType,TransactTime,ExpirationDays,Side,ApplSeqNum,Contactor,SendingTime,Price,ChannelNo,ExpirationType,ContactInfo,ConfirmID,BidApplSeqNum,Qty,OfferApplSeqNum,Amt,ExecType
96,5000.0,2.0,20201112093000010,0.0,2.0,288975,,20201112093000000,43.5,2011,0.0,,,,,,,
35,,,20201112093000010,,,288976,,20201112093000000,43.5,2011,,,,253880.0,5000.0,288975.0,0.0,F
97,6700.0,2.0,20201112093000010,0.0,2.0,289494,,20201112093000000,43.83,2011,0.0,,,,,,,
98,2600.0,2.0,20201112093000020,0.0,2.0,289810,,20201112093000000,43.83,2011,0.0,,,,,,,
99,3000.0,2.0,20201112093000020,0.0,1.0,290783,,20201112093000000,40.53,2011,0.0,,,,,,,
100,2000.0,2.0,20201112093000020,0.0,1.0,290823,,20201112093000000,40.39,2011,0.0,,,,,,,
101,6700.0,2.0,20201112093000020,0.0,2.0,291142,,20201112093000000,43.83,2011,0.0,,,,,,,
102,6700.0,2.0,20201112093000020,0.0,2.0,291306,,20201112093000000,43.83,2011,0.0,,,,,,,
103,1700.0,2.0,20201112093000020,0.0,2.0,291742,,20201112093000000,44.17,2011,0.0,,,,,,,
104,1000.0,2.0,20201112093000020,0.0,2.0,291821,,20201112093000000,44.0,2011,0.0,,,,,,,


In [17]:
mbd = pd.concat([order, trade]).sort_values(by='ApplSeqNum')
mbd[mbd['time'] >= 93000000000].head(200)

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
96,2300489,20201112,93000010000,1605144600010000,288975,2.0,2.0,43.5,5000.0,,,,,,
35,2300489,20201112,93000010000,1605144600010000,288976,,,,,1.0,0.0,43.5,5000.0,253880.0,288975.0
97,2300489,20201112,93000010000,1605144600010000,289494,2.0,2.0,43.83,6700.0,,,,,,
98,2300489,20201112,93000020000,1605144600020000,289810,2.0,2.0,43.83,2600.0,,,,,,
99,2300489,20201112,93000020000,1605144600020000,290783,1.0,2.0,40.53,3000.0,,,,,,
100,2300489,20201112,93000020000,1605144600020000,290823,1.0,2.0,40.39,2000.0,,,,,,
101,2300489,20201112,93000020000,1605144600020000,291142,2.0,2.0,43.83,6700.0,,,,,,
102,2300489,20201112,93000020000,1605144600020000,291306,2.0,2.0,43.83,6700.0,,,,,,
103,2300489,20201112,93000020000,1605144600020000,291742,2.0,2.0,44.17,1700.0,,,,,,
104,2300489,20201112,93000020000,1605144600020000,291821,2.0,2.0,44.0,1000.0,,,,,,


In [298]:
trade[trade['BidApplSeqNum'] == 317622]

Unnamed: 0,ApplSeqNum,BidApplSeqNum,SendingTime,Price,ChannelNo,Qty,OfferApplSeqNum,Amt,ExecType,TransactTime
1689,1099834,317622,20201112093153000,0.0,2011,900,0,0.0,4,20201112093153190


In [21]:
test = db1.read_tick('md_snapshot_mbd', start_date=start, end_date=end, symbol=symbol)
# test[(test['ApplSeqNum'] >= 317705) & (test['ApplSeqNum'] <= 319593)][['ApplSeqNum', 'time', 'skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
#                    'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
#                    'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
#                    'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
#                    'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
#                    'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
#                    'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']].head()
test[test['cum_volume'] == 163400][['ApplSeqNum', 'time', 'skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
                   'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
                   'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
                   'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
                   'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
                   'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
                   'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']]

3
3
3
3
3
3
3


Unnamed: 0,ApplSeqNum,time,skey,date,cum_volume,prev_close,open,close,cum_trades_cnt,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,total_bid_quantity,total_ask_quantity


In [240]:
trade[(trade['ApplSeqNum'] >= 13743257) & (trade['ApplSeqNum'] <= 13744715) & (trade['trade_type'] == 4)]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
44730,2300863,20200825,110938160000,1598324978160000,13744715,4,0,0.0,500,0,7034063


In [239]:
order[(order['ApplSeqNum'] >= 13743257) & (order['ApplSeqNum'] <= 13744715)]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
50203,2300863,20200825,110937000000,1598324977000000,13743257,1,2,153.6,1000


In [168]:
cols = ['skey', 'date', 'cum_volume', 'prev_close', 'open', 'close', 'cum_trades_cnt', 'bid10p', 'bid9p',
       'bid8p', 'bid7p', 'bid6p', 'bid5p', 'bid4p', 'bid3p', 'bid2p', 'bid1p', 'ask1p', 'ask2p',
       'ask3p', 'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p', 'bid10q', 'bid9q', 
       'bid8q', 'bid7q', 'bid6q', 'bid5q', 'bid4q', 'bid3q', 'bid2q', 'bid1q', 'ask1q', 'ask2q', 'ask3q', 
       'ask4q', 'ask5q', 'ask6q','ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid10n', 'bid9n', 'bid8n',
       'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 'ask1n', 'ask2n', 'ask3n', 
       'ask4n', 'ask5n', 'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'total_bid_quantity', 'total_ask_quantity']
test = test.drop_duplicates(cols, keep='first')
test = test[cols+['ApplSeqNum']]
if 'ApplSeqNum' in test1.columns:
    test1 = test1[list(test1.columns[test1.columns != 'ApplSeqNum'])]
rl2 = pd.merge(test1, test, on=cols, how='left')
try:
    assert(rl2[(rl2['ApplSeqNum'].isnull()) & (rl2['cum_volume'] > 0) & (rl2['time'] <= 145655000000)].shape[0] == 0)
except:
    print(rl2[(rl2['ApplSeqNum'].isnull()) & (rl2['cum_volume'] > 0) & (rl2['time'] <= 145655000000)][['skey', 'date', 'time', 'cum_volume', 'close', 'bid1p', 'bid2p','bid1q', 'bid2q', 'ask1p', 'ask2p', 'ask1q', 'ask2q']])

In [108]:
85.4 * 1.02

87.108

In [101]:
trade[trade['ApplSeqNum'] >= 449323].head()

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
2227,2300919,20201224,93001410000,1608773401410000,449443,1,0,85.0,100,449442,439470
2228,2300919,20201224,93001420000,1608773401420000,450030,4,0,0.0,400,0,255763
2229,2300919,20201224,93001420000,1608773401420000,450146,4,0,0.0,500,0,201100
2230,2300919,20201224,93001420000,1608773401420000,450210,1,0,85.0,300,450209,439470
2231,2300919,20201224,93001420000,1608773401420000,450240,4,0,0.0,500,0,153152


In [103]:
order[order['ApplSeqNum'] == 439470]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
3985,2300919,20201224,93001240000,1608773401240000,439470,2,2,85.0,10000


In [94]:
order[(order['order_price'] == 87.2) & (order['order_side'] == 1) & (order['ApplSeqNum'] <= 454301)]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
1429,2300919,20201224,92258340000,1608772978340000,239412,1,2,87.2,400
1444,2300919,20201224,92300440000,1608772980440000,239967,1,2,87.2,300
1498,2300919,20201224,92312720000,1608772992720000,243311,1,2,87.2,200
1553,2300919,20201224,92325010000,1608773005010000,246840,1,2,87.2,500
1885,2300919,20201224,92407490000,1608773047490000,260469,1,2,87.2,100
2462,2300919,20201224,92443730000,1608773083730000,278866,1,2,87.2,100
2528,2300919,20201224,92447360000,1608773087360000,280833,1,2,87.2,1200
4094,2300919,20201224,93001410000,1608773401410000,449323,1,2,87.2,300


In [106]:
order[(order['order_price'] == 85.39) & (order['order_side'] == 2) & (order['ApplSeqNum'] <= 454301)]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,order_side,order_type,order_price,order_qty
3336,2300919,20201224,93000310000,1608773400310000,376929,2,2,85.39,100
3361,2300919,20201224,93000330000,1608773400330000,378610,2,2,85.39,7500
3484,2300919,20201224,93000480000,1608773400480000,390020,2,2,85.39,50000
3750,2300919,20201224,93000870000,1608773400870000,416605,2,2,85.39,13100


In [98]:
trade[(trade['trade_type'] == 1) & (trade['time'] > 93001400000)].head(50)

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
2227,2300919,20201224,93001410000,1608773401410000,449443,1,0,85.0,100,449442,439470
2230,2300919,20201224,93001420000,1608773401420000,450210,1,0,85.0,300,450209,439470
2232,2300919,20201224,93001420000,1608773401420000,450296,1,0,85.0,600,450295,439470
2233,2300919,20201224,93001430000,1608773401430000,450523,1,0,85.0,300,450522,439470
2235,2300919,20201224,93001460000,1608773401460000,452170,1,0,85.0,100,452169,439470
2236,2300919,20201224,93001460000,1608773401460000,452213,1,0,85.0,100,452212,439470
2237,2300919,20201224,93001480000,1608773401480000,453493,1,0,85.0,100,453492,439470
2238,2300919,20201224,93001490000,1608773401490000,453789,1,0,85.0,100,453788,439470
2239,2300919,20201224,93001490000,1608773401490000,454255,1,0,85.0,100,454254,439470
2240,2300919,20201224,93001490000,1608773401490000,454256,1,0,85.0,800,454254,447073


In [95]:
trade[trade['BidApplSeqNum'] == 449323]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
2258,2300919,20201224,93001540000,1608773401540000,455893,1,0,85.5,300,449323,378356


In [76]:
order[order['ApplSeqNum'] == 17246327]

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
19998,2300085,20201209,141716740000,1607494636740000,17246401,1,0,20.43,500,17246400,17241637
19999,2300085,20201209,141716900000,1607494636900000,17246568,1,0,20.43,800,17246567,17241637


In [57]:
trade = pd.read_csv(r'\\192.168.10.34\trading\dailyRawData\20201209\logs_20201209_zs_96_03_day_pcap\mdTradePcap_SZ_20201209_0900.csv')
y = 20201209
trade = trade[trade['ID'] == 2300085]
trade["SecurityID"] = trade["ID"] - 2000000
trade = trade.rename(columns={"time":'TransactTime'})
trade['skey'] = trade['SecurityID'] + 2000000
trade["TradeBSFlag"] = 'N'
trade['date'] = int(y)
trade['time1'] = int(y) * 1000000000 + trade['TransactTime']
trade["TransactTime"] = trade['TransactTime'].astype('int64') * 1000
trade["clockAtArrival"] = trade["time1"].astype(str).apply(
    lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp() * 1e6))
trade.drop("time1", axis=1, inplace=True)
trade['datetime'] = trade["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x / 1e6))
trade['TradePrice'] = (trade['TradePrice'] / 10000).round(2)
trade = trade.rename(columns={"TradeQty":"trade_qty", "TradePrice":"trade_price", "ExecType":"trade_type", 'TransactTime':'time'})
trade["trade_flag"] = 0
trade["trade_type"] = np.where(trade["trade_type"] == 'F', 1, trade["trade_type"])
for col in ["skey", "date", "ApplSeqNum", "BidApplSeqNum", "OfferApplSeqNum", "trade_qty", "trade_type", "trade_flag"]:
    trade[col] = trade[col].astype('int32')
trade = trade.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
trade = trade[["skey", "date", "time", "clockAtArrival", "ApplSeqNum", "trade_type", "trade_flag",
                                             "trade_price", "trade_qty", "BidApplSeqNum", "OfferApplSeqNum"]]
print(trade['date'].iloc[0])
print("trade finished")
db1.write('md_trade', trade)

212

In [254]:
order = pd.read_csv(r'\\192.168.10.34\random_backup\Kevin_zhenyu\KR_daily_data\20200728\SZ\order\000750.csv')
order['SecurityID'] = 750

order = order.rename(columns={"OrdType": "OrderType"})
order["date"] = order["TransactTime"].iloc[0]//1000000000
order["OrderType"] = np.where(order["OrderType"] == 'U', 3, order["OrderType"])
order["skey"] = order["SecurityID"] + 2000000
order["clockAtArrival"] = order["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
order['datetime'] = order["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
order["time"] = (order['TransactTime'] - int(order['TransactTime'].iloc[0]//1000000000*1000000000)).astype(np.int64)*1000
order = order[order['ChannelNo'] != 4001]

for col in ["skey", "date", "ApplSeqNum", "OrderQty", "Side", "OrderType"]:
    order[col] = order[col].astype('int32')
#     for cols in ["Price"]:
#         print(cols)
#         print(order[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())

assert(order[((order["Side"] != 1) & (order["Side"] != 2)) | (order["OrderType"].isnull())].shape[0] == 0)

order = order.rename(columns={"Side":"order_side", "OrderType":"order_type", "Price":"order_price", "OrderQty":"order_qty"})
order = order[["skey", "date", "time", "clockAtArrival", "ApplSeqNum", "order_side", "order_type", "order_price",
                                             "order_qty"]]

print(order["date"].iloc[0])
print("order finished")

database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db1 = DB("192.168.10.178", database_name, user, password)
db1.write('md_order', order)

20200728
order finished


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [266]:
kk = pd.read_csv(r'L:\no_order.csv')

In [267]:
for i in range(0, kk.shape[0]):
    print(kk.iloc[i, 1:])
    data = db1.read_tick('md_order', start_date=int(kk.iloc[i, 1]), end_date=int(kk.iloc[i, 1]), symbol=int(kk.iloc[i, 2]))
    print(data['time'].max())

date    20171226
skey     2300631
Name: 0, dtype: int64
3
145959630000
date    20200727
skey     2000001
Name: 1, dtype: int64
3
3
3
3
3
3
3
145959790000
date    20200728
skey     2000750
Name: 2, dtype: int64
3
3
3
3
3
145958360000


In [242]:
trade = pd.read_csv(r'\\192.168.10.34\random_backup\Kevin_zhenyu\KR_daily_data\20201021\SZ\tick\300999.csv')
trade['SecurityID'] = 300171

trade = trade[trade["ChannelNo"] != 4001]
trade["date"] = trade["TransactTime"].iloc[0]//1000000000
trade = trade.rename(columns={"Qty":"trade_qty", "Price":"trade_price", "ExecType":"trade_type"})
trade["trade_money"] = trade["trade_price"] * trade["trade_qty"]
trade["trade_flag"] = 0
trade["skey"] = trade["SecurityID"] + 2000000
trade["clockAtArrival"] = trade["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
trade['datetime'] = trade["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
trade["time"] = (trade['TransactTime'] - int(trade['TransactTime'].iloc[0]//1000000000*1000000000)).astype(np.int64)*1000
trade["trade_type"] = np.where(trade["trade_type"] == 'F', 1, trade["trade_type"])
for col in ["skey", "date", "ApplSeqNum", "BidApplSeqNum", "OfferApplSeqNum", "trade_qty", "trade_type", "trade_flag"]:
    trade[col] = trade[col].astype('int32')

for cols in ["trade_money"]:
    trade[cols] = trade[cols].round(2)

trade = trade[["skey", "date", "time", "clockAtArrival", "ApplSeqNum", "trade_type", "trade_flag",
                                             "trade_price", "trade_qty", "BidApplSeqNum", "OfferApplSeqNum"]]
print("trade finished")


database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db1 = DB("192.168.10.178", database_name, user, password)
db1.write('md_trade', trade)   

trade finished


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [205]:
trade

Unnamed: 0,skey,date,time,clockAtArrival,ApplSeqNum,trade_type,trade_flag,trade_price,trade_qty,BidApplSeqNum,OfferApplSeqNum
0,2000722,20170714,91505380000,1499994905380000,85122,4,0,0.0,400,0,82011
1,2000722,20170714,92500000000,1499995500000000,177277,1,0,12.85,400,148466,86300
2,2000722,20170714,92500000000,1499995500000000,177278,1,0,12.85,200,153554,86300
3,2000722,20170714,92500000000,1499995500000000,177279,1,0,12.85,600,153554,169061
4,2000722,20170714,92500000000,1499995500000000,177280,1,0,12.85,1000,153554,130318
5,2000722,20170714,92500000000,1499995500000000,177281,1,0,12.85,400,153554,157131
6,2000722,20170714,92500000000,1499995500000000,177282,1,0,12.85,1600,153554,106096
7,2000722,20170714,92500000000,1499995500000000,177283,1,0,12.85,700,153554,135926
8,2000722,20170714,92500000000,1499995500000000,177284,1,0,12.85,500,153554,174426
9,2000722,20170714,92500000000,1499995500000000,177285,1,0,12.85,800,54317,174426
