In [2]:
# 2017-2019 version
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()

import pandas as pd
import random
import numpy as np
import glob
import os
from unrar import rarfile
import py7zr
import pickle
import datetime

columns1 = ["Date","OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID","PreClosePx",
                   "PxChnage1","PXChange2","openPrice","HighPx","LowPx","close","NumTrades","cum_volume","cum_amount","PE1","PE2","TradingPhase",
                   "totalofferqty", "wa_offerPrice", "totalbidqty", "wa_bidPrice", "PreNAV", "RealTimeNAV", "WarrantPremiumRate", "UpLimitPx",
                   "DownLimitPx", "TotalLongPosition", "unknown1", "unknown2", "unknown3"]
columns2 = ['Date',"OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID",'ask1p','bid1p',
                   "ask1q","bid1q", 'ask2p','bid2p',"ask2q","bid2q",'ask3p','bid3p',"ask3q","bid3q",'ask4p','bid4p',"ask4q","bid4q",'ask5p',
                    'bid5p',"ask5q","bid5q",'ask6p','bid6p',"ask6q","bid6q",'ask7p','bid7p',"ask7q","bid7q",'ask8p','bid8p',"ask8q","bid8q",
                   'ask9p','bid9p',"ask9q","bid9q",'ask10p','bid10p',"ask10q","bid10q","NUMORDERS_B1","NOORDERS_B1","ORDERQTY_B1",
                    "NUMORDERS_S1","NOORDERS_S1","ORDERQTY_S1"]
columns3 =  ["Date","OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID","PreClosePx",
                   "PxChnage1","PXChange2","openPrice","HighPx","LowPx","close","NumTrades","cum_volume","cum_amount","PE1","PE2","TradingPhase",
                   "totalofferqty", "wa_offerPrice", "totalbidqty", "wa_bidPrice", "PreNAV", "RealTimeNAV", "WarrantPremiumRate", "UpLimitPx",
                   "DownLimitPx", "TotalLongPosition"]

startTm = datetime.datetime.now()
readPath = r'\\192.168.10.30\Kevin_zhenyu\day_stock\***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

startTm = datetime.datetime.now()
year = "2017"
startDate = "1130"
endDate = "1130"
df = []
bad = []
# readPath = 'J:\\LEVEL2_shenzhen\\' + year + '\\***'
# dataPathLs = np.array(glob.glob(readPath))
# dateLs = np.array([os.path.basename(i) for i in dataPathLs])
# dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]

# for data in dataPathLs:
#     if len(np.array(glob.glob(data +'\\***'))) == 0:
#         continue
    
#     if len(np.array(glob.glob(data +'\\pm_hq_order_spot.7z'))) == 1:
#         date = os.path.basename(data)
#         path = 'L:\\backup_data\\' + year 
#         os.chdir(data)
#         try:
#             a = py7zr.SevenZipFile(data + '\\am_hq_order_spot.7z','r',filters=None)
#         except:
#             print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
#             print(data + '\\am_hq_order_spot.7z')
#             bad.append(data + '\\am_hq_order_spot.7z')
#             continue
#         path1 = path + '\\' + date
#         a.extractall(path = path1)
#         a.close()
#         try:
#             a = py7zr.SevenZipFile(data + '\\pm_hq_order_spot.7z','r',filters=None)
#         except:
#             print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
#             print(data + '\\pm_hq_order_spot.7z')
#             bad.append(data + '\\pm_hq_order_spot.7z')
#             continue
#         a.extractall(path = path1)
#         a.close()
        
#         am_order = pd.read_table(path1 + '\\am_hq_order_spot.txt',header=None)
#         am_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
#                    "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
#         pm_order = pd.read_table(path1 + '\\pm_hq_order_spot.txt',header=None)
#         pm_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
#                    "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
#         OrderLog1 = pd.concat([am_order, pm_order])
#         del am_order
#         del pm_order
  
    
#     elif len(np.array(glob.glob(data +'\\pm_hq_order_spot.7z.001'))) == 1:
#         date = os.path.basename(data)
#         path = 'L:\\backup_data\\' + year 
#         os.chdir(data)
#         os.system("copy /b am_hq_order_spot.7z.* am_hq_order_spot.7z")
#         try:
#             a = py7zr.SevenZipFile(data + '\\am_hq_order_spot.7z','r',filters=None)
#         except:
#             print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
#             print(data + '\\am_hq_order_spot.7z')
#             bad.append(data + '\\am_hq_order_spot.7z')
#             continue
#         path1 = path + '\\' + date
#         a.extractall(path = path1)
#         a.close()
#         os.system("copy /b pm_hq_order_spot.7z.* pm_hq_order_spot.7z")
#         try:
#             a = py7zr.SevenZipFile(data + '\\pm_hq_order_spot.7z','r',filters=None)
#         except:
#             print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
#             print(data + '\\pm_hq_order_spot.7z')
#             bad.append(data + '\\pm_hq_order_spot.7z')
#             continue
#         a.extractall(path = path1)
#         a.close()
        
#         am_order = pd.read_table(path1 + '\\am_hq_order_spot.txt',header=None)
#         am_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
#                    "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
#         pm_order = pd.read_table(path1 + '\\pm_hq_order_spot.txt',header=None)
#         pm_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
#                    "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
#         OrderLog1 = pd.concat([am_order, pm_order])
#         del am_order
#         del pm_order

#     elif len(np.array(glob.glob(data +'\\hq_order.7z'))) == 1:
#         date = os.path.basename(data)
#         path = 'L:\\backup_data\\' + year 
#         os.chdir(data)
#         try:
#             a = py7zr.SevenZipFile(data + '\\hq_order.7z','r',filters=None)
#         except:
#             print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
#             print(data + '\\hq_order.7z')
#             bad.append(data + '\\hq_order.7z')
#             continue
#         path1 = path + '\\' + date
#         a.extractall(path = path1)
#         a.close()
#         OrderLog1 = pd.read_table(path1 + '\\hq_order.txt',header=None)
#         OrderLog1.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
#                    "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
    
    am_order = pd.read_table(path1 + '\\am_hq_order_spot.txt',header=None)
    am_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
               "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
    pm_order = pd.read_table(path1 + '\\pm_hq_order_spot.txt',header=None)
    pm_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
               "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
    OrderLog1 = pd.concat([am_order, pm_order])
    del am_order
    del pm_order    
    
    OrderLog1 = OrderLog1[(OrderLog1["SecurityID"] < 4000) | (OrderLog1["SecurityID"] > 300000)]
    OrderLog1["skey"] = OrderLog1["SecurityID"] + 2000000
    OrderLog1["clockAtArrival"] = OrderLog1["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    OrderLog1['datetime'] = OrderLog1["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    OrderLog1["time"] = (OrderLog1['TransactTime'] - int(OrderLog1['TransactTime'].iloc[0]//1000000000*1000000000)).astype(np.int64)*1000
    OrderLog1["order_type"] =np.where(OrderLog1["order_type"] == 'U', 3, OrderLog1["order_type"])
    for col in ["skey", "date", "ApplSeqNum", "order_qty", "order_side", "order_type"]:
        OrderLog1[col] = OrderLog1[col].astype('int32')
    display(OrderLog1["order_price"].astype(str).apply(lambda x: len(x.split('.')[1])).unique())
    
    assert(OrderLog1[((OrderLog1["order_side"] != 1) & (OrderLog1["order_side"] != 2)) | (OrderLog1["order_type"].isnull())].shape[0] == 0)
    da_te = str(OrderLog1["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    sl = (db1["ID"].str[2:].astype(int) + 2000000).unique()
    del db1
    try:
        assert(len(set(sl) - set(OrderLog1["skey"].unique())) == 0)
    except:
        print(set(sl) - set(OrderLog1["skey"].unique()))
    
    OrderLog1 = OrderLog1[["skey", "date", "time", "clockAtArrival", "datetime", "ApplSeqNum", "order_side", "order_type", "order_price",
                                                 "order_qty"]]
    
    print(OrderLog1.dtypes)
    print(OrderLog1["date"].iloc[0])
    print("order finished")
    
    print(datetime.datetime.now() - startTm)

    
startDate = 20171130
endDate = 20171130
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"
db1 = DB("192.168.10.178", database_name, user, password)
trade = db1.read('md_trade', start_date=startDate, end_date=endDate)
trade = trade[trade['skey'] > 2000000]
t1 = trade.groupby('skey')['BidApplSeqNum'].unique().reset_index()
t2 = trade.groupby('skey')['OfferApplSeqNum'].unique().reset_index()
t3 = OrderLog1.groupby('skey')['ApplSeqNum'].unique().reset_index()
t = pd.merge(t1, t2, on='skey')
t['union'] = [list(set(a) | set(b)) for a, b in zip(t.BidApplSeqNum, t.OfferApplSeqNum)]
t = pd.merge(t, t3, on='skey')
t['less'] = [len(set(a) - set(b)) for a, b in zip(t.union, t.ApplSeqNum)]
t['less1'] = [list(set(a) - set(b))[0] for a, b in zip(t.union, t.ApplSeqNum)]
display(t['less1'].unique())
t[t['less'] > 1]

  if (await self.run_code(code, result,  async_=asy)):


array([2, 1], dtype=int64)

NameError: name 'db' is not defined

In [8]:
import pymongo 
import io 
import pandas as pd 
import pickle 
import datetime 
import time 
import gzip 
import lzma 
import pytz 
import pyarrow as pa 
import pyarrow.parquet as pq 
import numpy as np 
import re

def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)

class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db', version=3): 
        self.db_name = db_name 
        self.uri = uri 
        self.client = pymongo.MongoClient(self.uri) 
        self.db = self.client[self.db_name] 
        self.chunk_size = 20000 
        self.symbol_column = symbol_column 
        self.date_column = 'date' 
        self.version = version

    def parse_uri(self, uri): 
        # mongodb://user:password@example.com 
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}
        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("date must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid date type: " + str(type(x)))
        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)
        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)
        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)
        return query

    def read_tick(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name] 
        query = self.build_query(start_date, end_date, symbol) 
        if not query: 
            print('cannot read the whole table') 
            return None  
        segs = [] 
        for x in collection.find(query): 
            x['data'] = self.deser(x['data'], x['ver']) 
            segs.append(x) 
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start'])) 
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def read_daily(self, table_name, start_date=None, end_date=None, skey=None, index_id=None, interval=None, index_name=None, col=None, return_sdi=True): 
        collection = self.db[table_name]
        # Build projection 
        prj = {'_id': 0} 
        if col is not None: 
            if return_sdi: 
                col = ['skey', 'date', 'index_id'] + col 
            for col_name in col: 
                prj[col_name] = 1 
        # Build query 
        query = {} 
        if skey is not None: 
            query['skey'] = {'$in': skey} 
        if interval is not None: 
            query['interval'] = {'$in': interval} 
        if index_id is not None: 
            query['index_id'] = {'$in': index_id}    
        if index_name is not None:
            n = '' 
            for name in index_name: 
                try: 
                    name = re.compile('[\u4e00-\u9fff]+').findall(name)[0] 
                    if len(n) == 0: 
                        n = n = "|".join(name) 
                    else: 
                        n = n + '|' + "|".join(name) 
                except: 
                    if len(n) == 0: 
                        n = name 
                    else: 
                        n = n + '|' + name 
            query['index_name'] = {'$regex': n}
        if start_date is not None: 
            if end_date is not None: 
                query['date'] = {'$gte': start_date, '$lte': end_date} 
            else: 
                query['date'] = {'$gte': start_date} 
        elif end_date is not None: 
            query['date'] = {'$lte': end_date} 
        # Load data 
        cur = collection.find(query, prj) 
        df = pd.DataFrame.from_records(cur) 
        if df.empty: 
            df = pd.DataFrame() 
        else:
            if 'index_id' in df.columns:
                df = df.sort_values(by=['date', 'index_id', 'skey']).reset_index(drop=True)
            else:
                df = df.sort_values(by=['date','skey']).reset_index(drop=True)
        return df 
 

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = self.version
            ser_data = self.ser(df_seg, version)
            seg = {'ver': version, 'data': ser_data, 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None
        collection.delete_many(query)

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        elif version == 3:
            # 32-bit number needs more space than 64-bit for parquet
            for col_name in s.columns:
                col = s[col_name]
                if col.dtype == np.int32:
                    s[col_name] = s[col_name].astype(np.int64)
                elif col.dtype == np.uint32:
                    s[col_name] = s[col_name].astype(np.uint64)
            tbl = pa.Table.from_pandas(s)
            f = io.BytesIO()
            pq.write_table(tbl, f, use_dictionary=False, compression='ZSTD', compression_level=0)
            f.seek(0)
            data = f.read()
            return data
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        print(version)
        def unpickle(s):
            return pickle.loads(s)
        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        elif version == 3:
            f = io.BytesIO()
            f.write(s)
            f.seek(0)
            return pq.read_table(f, use_threads=False).to_pandas()
        else:
            raise Exception('unknown version')

def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()

database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db1 = DB("192.168.10.178", database_name, user, password)
db1.write('md_order', OrderLog1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [14]:
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db1 = DB("192.168.10.178", database_name, user, password)
db1.write('md_order', OrderLog1)

In [14]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()

import pandas as pd
import random
import numpy as np
import glob
import os
from unrar import rarfile
import py7zr
import pickle
import datetime

columns1 = ["Date","OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID","PreClosePx",
                   "PxChnage1","PXChange2","openPrice","HighPx","LowPx","close","NumTrades","cum_volume","cum_amount","PE1","PE2","TradingPhase",
                   "totalofferqty", "wa_offerPrice", "totalbidqty", "wa_bidPrice", "PreNAV", "RealTimeNAV", "WarrantPremiumRate", "UpLimitPx",
                   "DownLimitPx", "TotalLongPosition", "unknown1", "unknown2", "unknown3"]
columns2 = ['Date',"OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID",'ask1p','bid1p',
                   "ask1q","bid1q", 'ask2p','bid2p',"ask2q","bid2q",'ask3p','bid3p',"ask3q","bid3q",'ask4p','bid4p',"ask4q","bid4q",'ask5p',
                    'bid5p',"ask5q","bid5q",'ask6p','bid6p',"ask6q","bid6q",'ask7p','bid7p',"ask7q","bid7q",'ask8p','bid8p',"ask8q","bid8q",
                   'ask9p','bid9p',"ask9q","bid9q",'ask10p','bid10p',"ask10q","bid10q","NUMORDERS_B1","NOORDERS_B1","ORDERQTY_B1",
                    "NUMORDERS_S1","NOORDERS_S1","ORDERQTY_S1"]
columns3 =  ["Date","OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID","PreClosePx",
                   "PxChnage1","PXChange2","openPrice","HighPx","LowPx","close","NumTrades","cum_volume","cum_amount","PE1","PE2","TradingPhase",
                   "totalofferqty", "wa_offerPrice", "totalbidqty", "wa_bidPrice", "PreNAV", "RealTimeNAV", "WarrantPremiumRate", "UpLimitPx",
                   "DownLimitPx", "TotalLongPosition"]

# startTm = datetime.datetime.now()
# readPath = r'\\192.168.10.30\Kevin_zhenyu\day_stock\***'
# dataPathLs = np.array(glob.glob(readPath))
# dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
# db = pd.DataFrame()
# for p in dataPathLs:
#     dayData = pd.read_csv(p, compression='gzip')
#     db = pd.concat([db, dayData])
# print(datetime.datetime.now() - startTm)

startTm = datetime.datetime.now()
year = "2019"
startDate = "1025"
endDate = "1025"
df = []
bad = []
readPath = 'L:\\backup_data\\' + year + '\\***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i) for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]

for data in dataPathLs:
    am_order = pd.read_table(data + '\\am_hq_order_spot.txt',header=None)
    am_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
                   "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
    pm_order = pd.read_table(data + '\\pm_hq_order_spot.txt',header=None)
    pm_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
                   "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
    OrderLog1 = pd.concat([am_order, pm_order])
    del am_order
    del pm_order

    OrderLog1 = OrderLog1[(OrderLog1["SecurityID"] < 4000) | (OrderLog1["SecurityID"] > 300000)]
    OrderLog1["skey"] = OrderLog1["SecurityID"] + 2000000
    OrderLog1["clockAtArrival"] = OrderLog1["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    OrderLog1['datetime'] = OrderLog1["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    OrderLog1["time"] = (OrderLog1['TransactTime'] - int(OrderLog1['TransactTime'].iloc[0]//1000000000*1000000000)).astype(np.int64)*1000
    OrderLog1["order_type"] =np.where(OrderLog1["order_type"] == 'U', 3, OrderLog1["order_type"])
    for col in ["skey", "date", "ApplSeqNum", "order_qty", "order_side", "order_type"]:
        OrderLog1[col] = OrderLog1[col].astype('int32')
    display(OrderLog1["order_price"].astype(str).apply(lambda x: len(x.split('.')[1])).unique())
    
    assert(OrderLog1[((OrderLog1["order_side"] != 1) & (OrderLog1["order_side"] != 2)) | (OrderLog1["order_type"].isnull())].shape[0] == 0)
    da_te = str(OrderLog1["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    sl = (db1["ID"].str[2:].astype(int) + 2000000).unique()
    del db1
    try:
        assert(len(set(sl) - set(OrderLog1["skey"].unique())) == 0)
    except:
        print(set(sl) - set(OrderLog1["skey"].unique()))
    
    OrderLog1 = OrderLog1[["skey", "date", "time", "clockAtArrival", "datetime", "ApplSeqNum", "order_side", "order_type", "order_price",
                                                 "order_qty"]]
    
    print(OrderLog1.dtypes)
    print(OrderLog1["date"].iloc[0])
    print("order finished")
    
    print(datetime.datetime.now() - startTm)

    
startDate = 20191025
endDate = 20191025
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"
db1 = DB("192.168.10.178", database_name, user, password)
trade = db1.read('md_trade', start_date=startDate, end_date=endDate)
trade = trade[trade['skey'] > 2000000]
t1 = trade.groupby('skey')['BidApplSeqNum'].unique().reset_index()
t2 = trade.groupby('skey')['OfferApplSeqNum'].unique().reset_index()
t3 = OrderLog1.groupby('skey')['ApplSeqNum'].unique().reset_index()
t = pd.merge(t1, t2, on='skey')
t['union'] = [list(set(a) | set(b)) for a, b in zip(t.BidApplSeqNum, t.OfferApplSeqNum)]
t = pd.merge(t, t3, on='skey')
t['less'] = [len(set(a) - set(b)) for a, b in zip(t.union, t.ApplSeqNum)]
t[t['less'] > 1]

array([2, 1], dtype=int64)

{2001914}
skey                       int32
date                       int32
time                       int64
clockAtArrival             int64
datetime          datetime64[ns]
ApplSeqNum                 int32
order_side                 int32
order_type                 int32
order_price              float64
order_qty                  int32
dtype: object
20191025
order finished
0:08:05.115100


Unnamed: 0,skey,BidApplSeqNum,OfferApplSeqNum,union,ApplSeqNum,less


In [15]:
OrderLog1.head(5)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
0,2300731,20191025,91500000000,1571966100000000,2019-10-25 09:15:00,1,1,2,27.39,95600
1,2300551,20191025,91500000000,1571966100000000,2019-10-25 09:15:00,2,1,2,22.58,188000
2,2300731,20191025,91500000000,1571966100000000,2019-10-25 09:15:00,3,1,2,27.39,36900
3,2300731,20191025,91500000000,1571966100000000,2019-10-25 09:15:00,4,1,2,27.39,221000
4,2300551,20191025,91500000000,1571966100000000,2019-10-25 09:15:00,5,1,2,22.58,421500


In [16]:
# database_name = 'com_md_eq_cn'
# user = "zhenyuy"
# password = "bnONBrzSMGoE"

# db1 = DB("192.168.10.178", database_name, user, password)
# db1.write('md_order', OrderLog1)

In [6]:
# 2020 ftp data version
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()

import pandas as pd
import random
import numpy as np
import glob
import os
from unrar import rarfile
import py7zr
import pickle
import datetime

columns1 = ["Date","OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID","PreClosePx",
                   "PxChnage1","PXChange2","openPrice","HighPx","LowPx","close","NumTrades","cum_volume","cum_amount","PE1","PE2","TradingPhase",
                   "totalofferqty", "wa_offerPrice", "totalbidqty", "wa_bidPrice", "PreNAV", "RealTimeNAV", "WarrantPremiumRate", "UpLimitPx",
                   "DownLimitPx", "TotalLongPosition", "unknown1", "unknown2", "unknown3"]
columns2 = ['Date',"OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID",'ask1p','bid1p',
                   "ask1q","bid1q", 'ask2p','bid2p',"ask2q","bid2q",'ask3p','bid3p',"ask3q","bid3q",'ask4p','bid4p',"ask4q","bid4q",'ask5p',
                    'bid5p',"ask5q","bid5q",'ask6p','bid6p',"ask6q","bid6q",'ask7p','bid7p',"ask7q","bid7q",'ask8p','bid8p',"ask8q","bid8q",
                   'ask9p','bid9p',"ask9q","bid9q",'ask10p','bid10p',"ask10q","bid10q","NUMORDERS_B1","NOORDERS_B1","ORDERQTY_B1",
                    "NUMORDERS_S1","NOORDERS_S1","ORDERQTY_S1"]
columns3 =  ["Date","OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID","PreClosePx",
                   "PxChnage1","PXChange2","openPrice","HighPx","LowPx","close","NumTrades","cum_volume","cum_amount","PE1","PE2","TradingPhase",
                   "totalofferqty", "wa_offerPrice", "totalbidqty", "wa_bidPrice", "PreNAV", "RealTimeNAV", "WarrantPremiumRate", "UpLimitPx",
                   "DownLimitPx", "TotalLongPosition"]

# startTm = datetime.datetime.now()
# readPath = r'\\192.168.10.30\Kevin_zhenyu\day_stock\***'
# dataPathLs = np.array(glob.glob(readPath))
# dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
# db = pd.DataFrame()
# for p in dataPathLs:
#     dayData = pd.read_csv(p, compression='gzip')
#     db = pd.concat([db, dayData])
# print(datetime.datetime.now() - startTm)

startTm = datetime.datetime.now()
year = "2020"
startDate = "0221"
endDate = "0221"
df = []
bad = []
readPath = 'L:\\backup_data\\' + year + '\\***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i) for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]

for data in dataPathLs:
    if len(np.array(glob.glob(data +'\\***'))) == 0:
        continue
    
    if len(np.array(glob.glob(data +'\\am_hq_order_spot.7z'))) == 1:
        date = os.path.basename(data)
        path = 'L:\\backup_data\\' + year 
        os.chdir(data)
        try:
            a = py7zr.SevenZipFile(data + '\\am_hq_order_spot.7z','r',filters=None)
        except:
            print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print(data + '\\am_hq_order_spot.7z')
            bad.append(data + '\\am_hq_order_spot.7z')
            continue
        path1 = path + '\\' + date
        a.extractall(path = path1)
        a.close()
        try:
            a = py7zr.SevenZipFile(data + '\\pm_hq_order_spot.7z','r',filters=None)
        except:
            print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print(data + '\\pm_hq_order_spot.7z')
            bad.append(data + '\\pm_hq_order_spot.7z')
            continue
        a.extractall(path = path1)
        a.close()
        
        am_order = pd.read_table(path1 + '\\am_hq_order_spot.txt',header=None)
        am_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
                   "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
        pm_order = pd.read_table(path1 + '\\pm_hq_order_spot.txt',header=None)
        pm_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
                   "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
        OrderLog1 = pd.concat([am_order, pm_order])
        del am_order
        del pm_order
  
    
    elif len(np.array(glob.glob(data +'\\am_hq_order_spot.7z.001'))) == 1:
        date = os.path.basename(data)
        path = 'L:\\backup_data\\' + year 
        os.chdir(data)
        os.system("copy /b am_hq_order_spot.7z.* am_hq_order_spot.7z")
        try:
            a = py7zr.SevenZipFile(data + '\\am_hq_order_spot.7z','r',filters=None)
        except:
            print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print(data + '\\am_hq_order_spot.7z')
            bad.append(data + '\\am_hq_order_spot.7z')
            continue
        path1 = path + '\\' + date
        a.extractall(path = path1)
        a.close()
        os.system("copy /b pm_hq_order_spot.7z.* pm_hq_order_spot.7z")
        try:
            a = py7zr.SevenZipFile(data + '\\pm_hq_order_spot.7z','r',filters=None)
        except:
            print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print(data + '\\pm_hq_order_spot.7z')
            bad.append(data + '\\pm_hq_order_spot.7z')
            continue
        a.extractall(path = path1)
        a.close()
        
        am_order = pd.read_table(path1 + '\\am_hq_order_spot.txt',header=None)
        am_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
                   "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
        pm_order = pd.read_table(path1 + '\\pm_hq_order_spot.txt',header=None)
        pm_order.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
                   "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
        OrderLog1 = pd.concat([am_order, pm_order])
        del am_order
        del pm_order

    elif len(np.array(glob.glob(data +'\\hq_order.7z'))) == 1:
        date = os.path.basename(data)
        path = 'L:\\backup_data\\' + year 
        os.chdir(data)
        try:
            a = py7zr.SevenZipFile(data + '\\hq_order.7z','r',filters=None)
        except:
            print("Bad unzip here!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
            print(data + '\\hq_order.7z')
            bad.append(data + '\\hq_order.7z')
            continue
        path1 = path + '\\' + date
        a.extractall(path = path1)
        a.close()
        OrderLog1 = pd.read_table(path1 + '\\hq_order.txt',header=None)
        OrderLog1.columns = ["date","OrigTime","SendTime","recvtime","dbtime","ChannelNo","MDStreamID","ApplSeqNum", "SecurityID","SecurityIDSource", "order_price",
                   "order_qty","TransactTime","order_side","order_type","ConfirmID","Contactor","ContactInfo","ExpirationDays","ExpirationType"]
    
    
    OrderLog1 = OrderLog1[(OrderLog1["SecurityID"] < 4000) | (OrderLog1["SecurityID"] > 300000)]
    OrderLog1["skey"] = OrderLog1["SecurityID"] + 2000000
    OrderLog1["clockAtArrival"] = OrderLog1["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    OrderLog1['datetime'] = OrderLog1["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    OrderLog1["time"] = (OrderLog1['TransactTime'] - int(OrderLog1['TransactTime'].iloc[0]//1000000000*1000000000)).astype(np.int64)*1000
    OrderLog1["order_type"] =np.where(OrderLog1["order_type"] == 'U', 3, OrderLog1["order_type"])
    for col in ["skey", "date", "ApplSeqNum", "order_qty", "order_side", "order_type"]:
        OrderLog1[col] = OrderLog1[col].astype('int32')
    display(OrderLog1["order_price"].astype(str).apply(lambda x: len(x.split('.')[1])).unique())
    
    assert(OrderLog1[((OrderLog1["order_side"] != 1) & (OrderLog1["order_side"] != 2)) | (OrderLog1["order_type"].isnull())].shape[0] == 0)
    da_te = str(OrderLog1["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    sl = (db1["ID"].str[2:].astype(int) + 2000000).unique()
    del db1
    try:
        assert(len(set(sl) - set(OrderLog1["skey"].unique())) == 0)
    except:
        print(set(sl) - set(OrderLog1["skey"].unique()))
    
    OrderLog1 = OrderLog1[["skey", "date", "time", "clockAtArrival", "datetime", "ApplSeqNum", "order_side", "order_type", "order_price",
                                                 "order_qty"]]
    
    print(OrderLog1.dtypes)
    print(OrderLog1["date"].iloc[0])
    print("order finished")
    
    print(datetime.datetime.now() - startTm)

array([2, 1], dtype=int64)

skey                       int32
date                       int32
time                       int64
clockAtArrival             int64
datetime          datetime64[ns]
ApplSeqNum                 int32
order_side                 int32
order_type                 int32
order_price              float64
order_qty                  int32
dtype: object
20200221
order finished
0:40:32.394440


ServerSelectionTimeoutError: 192.168.10.223:27017: [WinError 10061] 由于目标计算机积极拒绝，无法连接。

In [8]:
year = "2020"
startDate = '20200221'
endDate = '20200221'
readPath = 'K:\\data\\' + year + '\\***\\***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i) for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]

for data in dataPathLs:
    
    startTm = datetime.datetime.now()
    
    readPath = data + '\\SZ\\tick\\***'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs < 4000) | ((dateLs > 300000) & (dateLs < 310000))]
    trade = []
    ll = []
    
    for i in dataPathLs:
        try:
            df = pd.read_csv(i)
        except:
            print("empty data")
            print(i)
            ll.append(int(os.path.basename(i).split('.')[0]))
            continue
        df["SecurityID"] = int(os.path.basename(i).split('.')[0])
        trade += [df]
    trade = pd.concat(trade).reset_index(drop=True)
    trade = trade[trade["ChannelNo"] != 4001]

    trade["skey"] = trade["SecurityID"] + 2000000

t1 = trade.groupby('skey')['BidApplSeqNum'].unique().reset_index()
t2 = trade.groupby('skey')['OfferApplSeqNum'].unique().reset_index()
t3 = OrderLog1.groupby('skey')['ApplSeqNum'].unique().reset_index()
t = pd.merge(t1, t2, on='skey', how='outer')
display(t[(t['BidApplSeqNum'].isnull()) | (t['OfferApplSeqNum'].isnull())])
t['union'] = [list(set(a) | set(b)) for a, b in zip(t.BidApplSeqNum, t.OfferApplSeqNum)]
t = pd.merge(t, t3, on='skey', how='outer')
display(t[(t['BidApplSeqNum'].isnull()) | (t['OfferApplSeqNum'].isnull()) | (t['ApplSeqNum'].isnull())])
t['less'] = [len(set(a) - set(b)) for a, b in zip(t.union, t.ApplSeqNum)]
t['less1'] = [list(set(a) - set(b))[0] for a, b in zip(t.union, t.ApplSeqNum)]
display(t['less1'].unique())
t[t['less'] > 1]

Unnamed: 0,skey,BidApplSeqNum,OfferApplSeqNum


Unnamed: 0,skey,BidApplSeqNum,OfferApplSeqNum,union,ApplSeqNum


array([0], dtype=int64)

Unnamed: 0,skey,BidApplSeqNum,OfferApplSeqNum,union,ApplSeqNum,less,less1


In [22]:
OrderLog1.head(5)

Unnamed: 0,skey,date,time,clockAtArrival,datetime,ApplSeqNum,order_side,order_type,order_price,order_qty
0,2300051,20200123,91500000000,1579742100000000,2020-01-23 09:15:00,1,1,2,7.69,282600
1,2002950,20200123,91500000000,1579742100000000,2020-01-23 09:15:00,2,1,2,34.32,1000
2,2002280,20200123,91500000000,1579742100000000,2020-01-23 09:15:00,3,2,2,3.58,9300
3,2002950,20200123,91500000000,1579742100000000,2020-01-23 09:15:00,4,1,2,34.32,26300
4,2300051,20200123,91500000000,1579742100000000,2020-01-23 09:15:00,5,1,2,7.69,622200


In [23]:
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db1 = DB("192.168.10.178", database_name, user, password)
db1.write('md_order', OrderLog1)

In [20]:
# 2020 version

import pandas as pd
import random
import numpy as np
import glob
import os
from unrar import rarfile
import py7zr
import pickle
import datetime

columns1 = ["Date","OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID","PreClosePx",
                   "PxChnage1","PXChange2","openPrice","HighPx","LowPx","close","NumTrades","cum_volume","cum_amount","PE1","PE2","TradingPhase",
                   "totalofferqty", "wa_offerPrice", "totalbidqty", "wa_bidPrice", "PreNAV", "RealTimeNAV", "WarrantPremiumRate", "UpLimitPx",
                   "DownLimitPx", "TotalLongPosition", "unknown1", "unknown2", "unknown3"]
columns2 = ['Date',"OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID",'ask1p','bid1p',
                   "ask1q","bid1q", 'ask2p','bid2p',"ask2q","bid2q",'ask3p','bid3p',"ask3q","bid3q",'ask4p','bid4p',"ask4q","bid4q",'ask5p',
                    'bid5p',"ask5q","bid5q",'ask6p','bid6p',"ask6q","bid6q",'ask7p','bid7p',"ask7q","bid7q",'ask8p','bid8p',"ask8q","bid8q",
                   'ask9p','bid9p',"ask9q","bid9q",'ask10p','bid10p',"ask10q","bid10q","NUMORDERS_B1","NOORDERS_B1","ORDERQTY_B1",
                    "NUMORDERS_S1","NOORDERS_S1","ORDERQTY_S1"]
columns3 =  ["Date","OrigTime","SendTime","ercvtime","dbtime","ChannelNo","SecurityID","SecurityIDsource", "MDStreamID","PreClosePx",
                   "PxChnage1","PXChange2","openPrice","HighPx","LowPx","close","NumTrades","cum_volume","cum_amount","PE1","PE2","TradingPhase",
                   "totalofferqty", "wa_offerPrice", "totalbidqty", "wa_bidPrice", "PreNAV", "RealTimeNAV", "WarrantPremiumRate", "UpLimitPx",
                   "DownLimitPx", "TotalLongPosition"]

# startTm = datetime.datetime.now()
# readPath = r'\\192.168.10.30\Kevin_zhenyu\day_stock\***'
# dataPathLs = np.array(glob.glob(readPath))
# dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SZ' for i in dataPathLs])]]
# db = pd.DataFrame()
# for p in dataPathLs:
#     dayData = pd.read_csv(p, compression='gzip')
#     db = pd.concat([db, dayData])
# print(datetime.datetime.now() - startTm)

startTm = datetime.datetime.now()
startDate = "20200123"
endDate = "20200123"
df = []
bad = []
readPath = 'A:\\rawData\\logs_***_zs_92_01_day_data'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]

# for data in dataPathLs:
#     readPath = data + '\\mdOrderLog***'
#     dataPathLs = np.array(glob.glob(readPath))
#     OrderLog1 = pd.read_csv(dataPathLs[0], encoding="utf-8").loc[:, ["clockAtArrival", "sequenceNo", "exchId", "TransactTime",
#                                                  "ApplSeqNum", "SecurityID", "Side", "OrderType", "Price",
#                                                  "OrderQty"]]
#     OrderLog1 = OrderLog1[(OrderLog1["SecurityID"] < 4000) | (OrderLog1["SecurityID"] > 300000)]
#     OrderLog1 = OrderLog1.rename(columns={"Side":"order_side", "OrderType":"order_type", "Price":"order_price",
#                                              "OrderQty":'order_qty'})
#     OrderLog1['date'] = int(os.path.basename(dataPathLs[0]).split('_')[1])
#     OrderLog1["skey"] = OrderLog1["SecurityID"] + 2000000
#     OrderLog1["time"] = OrderLog1['TransactTime'].astype(np.int64)*1000
#     OrderLog1['TransactTime'] = OrderLog1['date'] * 1000000000 + OrderLog1['TransactTime']
#     OrderLog1["clockAtArrival"] = OrderLog1["TransactTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
#     OrderLog1['datetime'] = OrderLog1["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
#     OrderLog1["order_type"] =np.where(OrderLog1["order_type"] == 'U', 3, OrderLog1["order_type"])
#     for col in ["skey", "date", "ApplSeqNum", "order_qty", "order_side", "order_type"]:
#         OrderLog1[col] = OrderLog1[col].astype('int32')
#     OrderLog1['order_price'] = OrderLog1['order_price']/10000
#     display(OrderLog1["order_price"].astype(str).apply(lambda x: len(x.split('.')[1])).unique())
    
#     assert(OrderLog1[((OrderLog1["order_side"] != 1) & (OrderLog1["order_side"] != 2)) | (OrderLog1["order_type"].isnull())].shape[0] == 0)
#     da_te = str(OrderLog1["date"].iloc[0]) 
#     da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
#     db1 = db[db["date"] == da_te]
#     sl = (db1["ID"].str[2:].astype(int) + 2000000).unique()
#     del db1
#     try:
#         assert(len(set(sl) - set(OrderLog1["skey"].unique())) == 0)
#     except:
#         print(set(sl) - set(OrderLog1["skey"].unique()))
    
#     OrderLog1 = OrderLog1[["skey", "date", "time", "clockAtArrival", "datetime", "ApplSeqNum", "order_side", "order_type", "order_price",
#                                                  "order_qty"]]
    
#     print(OrderLog1["date"].iloc[0])
#     print("order finished")
    
#     print(datetime.datetime.now() - startTm)

    
startDate = 20200123
endDate = 20200123
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"
db1 = DB("192.168.10.178", database_name, user, password)
trade = db1.read('md_trade', start_date=startDate, end_date=endDate)
trade = trade[trade['skey'] > 2000000]
t1 = trade.groupby('skey')['BidApplSeqNum'].unique().reset_index()
t2 = trade.groupby('skey')['OfferApplSeqNum'].unique().reset_index()
t3 = OrderLog1.groupby('skey')['ApplSeqNum'].unique().reset_index()
t = pd.merge(t1, t2, on='skey')
t['union'] = [list(set(a) | set(b)) for a, b in zip(t.BidApplSeqNum, t.OfferApplSeqNum)]
t = pd.merge(t, t3, on='skey')
t['less'] = [len(set(a) - set(b)) for a, b in zip(t.union, t.ApplSeqNum)]
t['less1'] = [list(set(a) - set(b))[0] for a, b in zip(t.union, t.ApplSeqNum)]
t[t['less'] > 1]

Unnamed: 0,skey,BidApplSeqNum,OfferApplSeqNum,union,ApplSeqNum,less,less1


In [3]:
import pandas as pd
pd.set_option('max_columns', 300)
pd.read_pickle(r'A:\temp\2002290.pkl')

Unnamed: 0,time,clockAtArrival,sequenceNo,StockID,cum_volume,cum_amount,close,bid30p,bid29p,bid28p,bid27p,bid26p,bid25p,bid24p,bid23p,bid22p,bid21p,bid20p,bid19p,bid18p,bid17p,bid16p,bid15p,bid14p,bid13p,bid12p,bid11p,bid10p,bid9p,bid8p,bid7p,bid6p,bid5p,bid4p,bid3p,bid2p,bid1p,ask1p,ask2p,ask3p,ask4p,ask5p,ask6p,ask7p,ask8p,ask9p,ask10p,ask11p,ask12p,ask13p,ask14p,ask15p,ask16p,ask17p,ask18p,ask19p,ask20p,ask21p,ask22p,ask23p,ask24p,ask25p,ask26p,ask27p,ask28p,ask29p,ask30p,bid30q,bid29q,bid28q,bid27q,bid26q,bid25q,bid24q,bid23q,bid22q,bid21q,bid20q,bid19q,bid18q,bid17q,bid16q,bid15q,bid14q,bid13q,bid12q,bid11q,bid10q,bid9q,bid8q,bid7q,bid6q,bid5q,bid4q,bid3q,bid2q,bid1q,ask1q,ask2q,ask3q,ask4q,ask5q,ask6q,ask7q,ask8q,ask9q,ask10q,ask11q,ask12q,ask13q,ask14q,ask15q,ask16q,ask17q,ask18q,ask19q,ask20q,ask21q,ask22q,ask23q,ask24q,ask25q,ask26q,ask27q,ask28q,ask29q,ask30q,bid30n,bid29n,bid28n,bid27n,bid26n,bid25n,bid24n,bid23n,bid22n,bid21n,bid20n,bid19n,bid18n,bid17n,bid16n,bid15n,bid14n,bid13n,bid12n,bid11n,bid10n,bid9n,bid8n,bid7n,bid6n,bid5n,bid4n,bid3n,bid2n,bid1n,ask1n,ask2n,ask3n,ask4n,ask5n,ask6n,ask7n,ask8n,ask9n,ask10n,ask11n,ask12n,ask13n,ask14n,ask15n,ask16n,ask17n,ask18n,ask19n,ask20n,ask21n,ask22n,ask23n,ask24n,ask25n,ask26n,ask27n,ask28n,ask29n,ask30n,bid1Top50q,bid1Top49q,bid1Top48q,bid1Top47q,bid1Top46q,bid1Top45q,bid1Top44q,bid1Top43q,bid1Top42q,bid1Top41q,bid1Top40q,bid1Top39q,bid1Top38q,bid1Top37q,bid1Top36q,bid1Top35q,bid1Top34q,bid1Top33q,bid1Top32q,bid1Top31q,bid1Top30q,bid1Top29q,bid1Top28q,bid1Top27q,bid1Top26q,bid1Top25q,bid1Top24q,bid1Top23q,bid1Top22q,bid1Top21q,bid1Top20q,bid1Top19q,bid1Top18q,bid1Top17q,bid1Top16q,bid1Top15q,bid1Top14q,bid1Top13q,bid1Top12q,bid1Top11q,bid1Top10q,bid1Top9q,bid1Top8q,bid1Top7q,bid1Top6q,bid1Top5q,bid1Top4q,bid1Top3q,bid1Top2q,bid1Top1q,ask1Top1q,ask1Top2q,ask1Top3q,ask1Top4q,ask1Top5q,ask1Top6q,ask1Top7q,ask1Top8q,ask1Top9q,ask1Top10q,ask1Top11q,ask1Top12q,ask1Top13q,ask1Top14q,ask1Top15q,ask1Top16q,ask1Top17q,ask1Top18q,ask1Top19q,ask1Top20q,ask1Top21q,ask1Top22q,ask1Top23q,ask1Top24q,ask1Top25q,ask1Top26q,ask1Top27q,ask1Top28q,ask1Top29q,ask1Top30q,ask1Top31q,ask1Top32q,ask1Top33q,ask1Top34q,ask1Top35q,ask1Top36q,ask1Top37q,ask1Top38q,ask1Top39q,ask1Top40q,ask1Top41q,ask1Top42q,ask1Top43q,ask1Top44q,ask1Top45q,ask1Top46q,ask1Top47q,ask1Top48q,ask1Top49q,ask1Top50q,total_bid_quantity,total_ask_quantity,total_bid_vwap,total_ask_vwap,total_bid_levels,total_ask_levels,total_bid_orders,total_ask_orders
0,92500000,1577928300000000,246463,2002290,2400,11976.0,499,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,474,480,481,483,490,491,492,494,496,497,498,499,508,516,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1300,5000,1100,2000,5000,15800,5000,1200,200,3000,1000,500,1800,2000,9300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,1,1,1,1,5,1,1,1,2,1,1,1,1,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,1800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,41100,13100,4.892847,5.205802,13,5,20,9
1,93000010,1577928600010000,268979,2002290,2400,11976.0,499,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,474,480,481,482,483,490,491,492,494,496,497,498,499,508,516,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1300,5000,1100,1000,2000,5000,15800,5000,1200,200,3000,1000,500,1800,2000,9300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,1,1,1,1,1,5,1,1,1,2,1,1,1,1,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,1800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,42100,13100,4.891116,5.205802,14,5,21,9
2,93000010,1577928600010000,270239,2002290,2400,11976.0,499,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,474,480,481,482,483,490,491,492,494,495,496,497,498,499,508,516,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1300,5000,1100,1000,2000,5000,15800,5000,1200,3000,200,3000,1000,500,1800,2000,9300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,1,1,1,1,1,5,1,1,1,1,2,1,1,1,1,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,1800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,45100,13100,4.895033,5.205802,15,5,22,9
3,93000320,1577928600320000,296403,2002290,2400,11976.0,499,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,474,480,481,482,483,490,491,492,494,495,496,497,498,499,508,516,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1300,5000,1100,1000,2000,5000,15800,5000,1200,3000,200,3000,2600,500,1800,2000,9300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,1,1,1,1,1,5,1,1,1,1,2,2,1,1,1,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,1800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,46700,13100,4.897944,5.205802,15,5,23,9
4,93000460,1577928600460000,304485,2002290,2400,11976.0,499,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,474,480,481,482,483,490,491,492,494,495,496,497,498,499,508,515,516,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1300,5000,1100,1000,2000,5000,15800,5000,1200,3000,200,3000,2600,500,1800,1200,2000,9300,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,1,1,1,1,1,5,1,1,1,1,2,2,1,1,1,1,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,500,1800,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,46700,14300,4.897944,5.201119,15,6,23,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637,145502350,1577948102350000,16000245,2002290,552700,2795855.0,504,0,0,0,0,0,0,0,0,0,0,0,0,0,474,475,480,482,484,488,491,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,512,513,514,515,516,517,518,519,520,521,523,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5900,300,2500,1000,5900,5000,3800,1200,500,700,1400,8500,7000,11600,21000,15700,20400,1900,23000,46700,52501,28400,10000,16600,400,4500,36000,12500,12000,13000,11000,700,4000,30000,6500,42500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,1,2,1,1,1,5,1,1,2,2,4,4,9,4,5,3,3,4,7,3,4,1,8,1,5,4,4,2,1,2,1,2,1,3,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,18300,1100,500,400,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,112400,352201,4.969342,5.128112,26,38,53,73
638,145544870,1577948144870000,16072797,2002290,552700,2795855.0,504,0,0,0,0,0,0,0,0,0,0,0,0,0,474,475,480,482,484,488,491,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,512,513,514,515,516,517,518,519,520,521,523,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5900,300,2500,1000,5900,5000,3800,1200,500,700,1400,8500,12000,11600,21000,15700,20400,1900,23000,46700,52501,28400,10000,16600,400,4500,36000,12500,12000,13000,11000,700,4000,30000,6500,42500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,1,2,1,1,1,5,1,1,2,2,4,5,9,4,5,3,3,4,7,3,4,1,8,1,5,4,4,2,1,2,1,2,1,3,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,18300,1100,500,400,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,117400,352201,4.970221,5.128112,26,38,54,73
639,145558850,1577948158850000,16093147,2002290,553500,2799879.0,503,0,0,0,0,0,0,0,0,0,0,0,0,0,474,475,480,482,484,488,491,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,512,513,514,515,516,517,518,519,520,521,523,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5900,300,2500,1000,5900,5000,3800,1200,500,700,1400,8500,12000,11600,21000,15700,19600,1900,23000,46700,52501,28400,10000,16600,400,4500,36000,12500,12000,13000,11000,700,4000,30000,6500,42500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,1,2,1,1,1,5,1,1,2,2,4,5,9,4,5,3,3,4,7,3,4,1,8,1,5,4,4,2,1,2,1,2,1,3,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,18300,300,500,400,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,116600,352201,4.969811,5.128112,26,38,54,73
640,145608000,1577948168000000,16108649,2002290,553500,2799879.0,503,0,0,0,0,0,0,0,0,0,0,0,0,0,474,475,480,482,484,488,491,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,512,513,514,515,516,517,518,519,520,521,523,524,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5900,300,2500,1000,5900,5000,3800,1200,500,700,1400,7500,12000,11600,21000,15700,19600,1900,23000,46700,52501,28400,10000,16600,400,4500,36000,12500,12000,13000,11000,700,4000,30000,6500,42500,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,7,1,2,1,1,1,5,1,1,2,2,3,5,9,4,5,3,3,4,7,3,4,1,8,1,5,4,4,2,1,2,1,2,1,3,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1000,18300,300,500,400,1000,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,115600,352201,4.969723,5.128112,26,38,53,73
