In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()


import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)


startDate = 20200102
endDate = 20200529
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
test = db.read('md_index', start_date=startDate, end_date=endDate, symbol=[1000300])
date_list = test['date'].unique()
del test

new_trade_data = []
new_order_data = []

for i in date_list:
    print('--------------------------------------------------------------------------------------------')
    print(i)
    print('SH lv2')
    startDate = str(i)
    endDate = str(i)
    db = DB("192.168.10.178", database_name, user, password)
    SH = db.read('md_snapshot_l2', start_date=startDate, end_date=endDate)
    SZ = SH[SH['skey'] > 2000000]
    SH = SH[SH['skey'] < 2000000]
    SH['num'] = SH['skey'] * 10000 + SH['ordering']
    SZ['num'] = SZ['skey'] * 10000 + SZ['ordering']
    
    SH = SH[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]
    SZ = SZ[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SH_***'))
        SH1 = pd.read_csv(path1[0])
        index1 = SH1[SH1['StockID'].isin([16, 300, 852, 905])]
        SH1 = SH1[SH1['source'] == 4]

        SH1['skey'] = SH1['StockID'] + 1000000
        SH1 = SH1.rename(columns={"openPrice":"open"})
        SH1["open"] = np.where(SH1["cum_volume"] > 0, SH1.groupby("skey")["open"].transform("max"), SH1["open"])
        SH1["time"] = SH1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SH1 = SH1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount', "close", 'open']:
        SH1[cols] = SH1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SH1 = SH1[SH1['skey'].isin(SH['skey'].unique())]
    re = pd.merge(SH, SH1, on=cols, how='outer')

    p21 = re[(re['date'].isnull())][['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    p22 = re[(re['sequenceNo'].isnull())][["skey", "date", "time", 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    p11 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)]
    p12 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())].drop_duplicates(['num'], keep=False)
    p11 = p11.sort_values(by=['num', 'sequenceNo'])
    display(p11)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p11_1 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)].drop_duplicates('num')
    p11_1 = pd.merge(p11_1, p11[['num', 'order1']], on='num', how='left')
    p11_1 = p11_1[p11_1['order1'].isnull()]
    p11_1['sequenceNo'] = np.nan
    p11_1['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p11_1.drop(['order1'],axis=1,inplace=True)
    p11 = pd.concat([p11, p11_1])
    
    p1 = pd.concat([p11, p12])
    p2 = pd.merge(p22, p21[['skey', 'time', 'clockAtArrival', 'sequenceNo']], on=['skey', 'time'], how='left')
    re1 = pd.concat([p1, p2])
    re1 = re1.sort_values(by='num')
    re1['seq1'] = re1.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(SH['skey'].unique()) - set(SH1['skey'].unique()))
    re1.loc[re1['skey'].isin(sl), 'seq1'] = np.nan
    re1['count1'] = re1.groupby(['seq1']).cumcount()
    re1['count2'] = re1.groupby(['seq1'])['count1'].transform('nunique')
    re1['min_seq'] = re1.groupby('skey')['sequenceNo'].transform('min')
    re1['count'] = np.where(re1['seq1'] != re1['min_seq'], re1['count1'], re1['count1']+1-re1['count2'])
    re1.drop(["min_seq"],axis=1,inplace=True)
    re1.drop(["count1"],axis=1,inplace=True)
    re1.drop(["count2"],axis=1,inplace=True)
    re1['dup'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo']).cumcount(), 0)
    re1['dup1'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
    re1['nan'] = np.where((re1['sequenceNo'].isnull()) | (re1['dup'] != 0), 1, 0)
    re1.loc[(re1['dup1'] > 1) & (re1['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re1[re1['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re1[re1['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re1.shape[0] == SH.shape[0])

    display('%.2f%%' % (re1[re1['sequenceNo'].isnull()].shape[0]/re1.shape[0] * 100))
    
    
    
    
    
    
    
    print('-------------------------------------------------------------------------------------------')
    print('SZ lv2')
    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SZ_***'))
        SZ1 = pd.read_csv(path1[0])
        SZ1 = SZ1[SZ1['source'] == 4]

        SZ1['skey'] = SZ1['StockID'] + 2000000
        SZ1 = SZ1.rename(columns={"openPrice":"open"})
        SZ1["open"] = np.where(SZ1["cum_volume"] > 0, SZ1.groupby("skey")["open"].transform("max"), SZ1["open"])
        SZ1["time"] = SZ1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SZ1 = SZ1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount']:
        SZ1[cols] = SZ1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SZ1 = SZ1[SZ1['skey'].isin(SZ['skey'].unique())]
    re = pd.merge(SZ, SZ1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(SZ.shape[0])
    display(SZ1.shape[0])

    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('SZ lv2 is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        print('92 have unique values not shared by database')
        re = pd.merge(SZ, SZ1, on=cols, how='left')

    if re[re.duplicated('num', keep=False)].shape[0] == 0:
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))


    else:
        p1 = re[re['num'].duplicated(keep=False)]
        p2 = re.drop_duplicates(['num'], keep=False)
        p1["order1"] = p1.groupby(["num"]).cumcount()
        p1["order2"] = p1.groupby(["sequenceNo"]).cumcount()
        p1 = p1[p1['order1'] == p1['order2']]
        p1.drop(['order1', 'order2'],axis=1,inplace=True)
        re = pd.concat([p1, p2])
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))
    
    print('----------------------------------------------------------------------------------------------')
    print('SH & SZ trade')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    trade = db.read('md_trade', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdTradeLog***'))
        trade1 = pd.read_csv(path1[0])
    trade1['skey'] = np.where(trade1['exchId'] == 2, trade1['SecurityID'] + 2000000, trade1['SecurityID'] + 1000000)
    trade1 = trade1[trade1['skey'].isin(trade['skey'].unique())]
    re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == trade.shape[0])
        display('trade data is complete')
        k = 0
    except:
        display('%.2f%%' % (trade.shape[0]/re.shape[0] * 100))
        k = 1
        display('trade data incomplete')
        k1 = pd.merge(trade1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k1.shape[0])
        display(k1['ExecType'].unique())
        display(k1['TransactTime'].unique())
        k1['date'] = trade['date'].iloc[0]
        new_trade_data += [k1[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'ExecType', 'TradeBSFlag', 
       'TradePrice', 'TradeQty', 'BidApplSeqNum', 'OfferApplSeqNum']]]
        re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == trade.shape[0])

    re3 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re3['seq1'] = re3.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(trade['skey'].unique()) - set(trade1['skey'].unique()))
    re3.loc[re3['skey'].isin(sl), 'seq1'] = np.nan
    re3['count1'] = re3.groupby(['seq1']).cumcount()
    re3['count2'] = re3.groupby(['seq1'])['count1'].transform('nunique')
    re3['min_seq'] = re3.groupby('skey')['sequenceNo'].transform('min')
    re3['count'] = np.where(re3['seq1'] != re3['min_seq'], re3['count1'], re3['count1']+1-re3['count2'])
    re3.drop(["min_seq"],axis=1,inplace=True)
    re3.drop(["count1"],axis=1,inplace=True)
    re3.drop(["count2"],axis=1,inplace=True)
    re3['dup'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo']).cumcount(), 0)
    re3['dup1'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re3['nan'] = np.where((re3['sequenceNo'].isnull()) | (re3['dup'] != 0), 1, 0)
    re3.loc[(re3['dup1'] > 1) & (re3['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re3[re3['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re3[re3['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re3.shape[0] == trade.shape[0])
    if k == 1:
        k1['seq1'] = k1['sequenceNo']
        k1['count'] = 0
        k1['nan'] = 0
        k1['dup1'] = 1
        re3 = pd.concat([re3, k1[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', 'dup1']]])

    display('%.2f%%' % (re3[re3['sequenceNo'].isnull()].shape[0]/re3.shape[0] * 100))

    
    print('--------------------------------------------------------------------------------------------------')
    print('SZ order data')

    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    order = db.read('md_order', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdOrderLog***'))
        order1 = pd.read_csv(path1[0])
    order1['skey'] = order1['SecurityID'] + 2000000
    order1 = order1[order1['skey'].isin(order['skey'].unique())]
    re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == order.shape[0])
        display('order data is complete')
        k = 0
    except:
        display('%.2f%%' % (order.shape[0]/re.shape[0] * 100))
        k = 1
        display('order data incomplete')
        k2 = pd.merge(order1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k2.shape[0])
        display(k2['SecurityID'].unique())
        display(k2['TransactTime'].unique())
        k2['date'] = order['date'].iloc[0]
        new_order_data += [k2[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'Side', 
       'OrderType', 'Price', 'OrderQty']]]
        re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == order.shape[0])

    re4 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re4['seq1'] = re4.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(order['skey'].unique()) - set(order1['skey'].unique()))
    re4.loc[re4['skey'].isin(sl), 'seq1'] = np.nan
    re4['count1'] = re4.groupby(['seq1']).cumcount()
    re4['count2'] = re4.groupby(['seq1'])['count1'].transform('nunique')
    re4['min_seq'] = re4.groupby('skey')['sequenceNo'].transform('min')
    re4['count'] = np.where(re4['seq1'] != re4['min_seq'], re4['count1'], re4['count1']+1-re4['count2'])
    re4.drop(["min_seq"],axis=1,inplace=True)
    re4.drop(["count1"],axis=1,inplace=True)
    re4.drop(["count2"],axis=1,inplace=True)
    re4['dup'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo']).cumcount(), 0)
    re4['dup1'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re4['nan'] = np.where((re4['sequenceNo'].isnull()) | (re4['dup'] != 0), 1, 0)
    re4.loc[(re4['dup1'] > 1) & (re4['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re4[re4['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re4[re4['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re4.shape[0] == order.shape[0])
    if k == 1:
        k2['seq1'] = k2['ApplSeqNum']
        k2['count'] = 0
        k2['nan'] = 0
        k2['dup1'] = 1
        re4 = pd.concat([re4, k2[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', "dup1"]]])


    display('%.2f%%' % (re4[re4['sequenceNo'].isnull()].shape[0]/re4.shape[0] * 100))
    
    print('-----------------------------------------------------------------------------------------------------')
    print('SH index data')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    index = db.read('md_index', start_date=startDate, end_date=endDate)

    index1['skey'] = index1['StockID'] + 1000000
    index1 = index1.rename(columns={"openPrice":"open"})
    index1["open"] = np.where(index1["cum_volume"] > 0, index1.groupby("skey")["open"].transform("max"), index1["open"])
    index1['close'] = np.where(index1['cum_volume'] == 0, 0, index1['close'])
    index1["time"] = index1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)
    index['close'] = np.where(index['cum_volume'] == 0, 0, index['close'])
    index['num'] = index['skey'] * 10000 + index['ordering']
    index = index[['skey', 'date', 'cum_volume', 'cum_amount', "close", "open", 'num']]
    index1 = index1[['clockAtArrival', 'sequenceNo', 'skey', 'cum_volume', 'cum_amount', "close", "open", "time"]]
    for cols in ['cum_amount']:
        index1[cols] = index1[cols].round(1)
    cols = ['skey', 'cum_volume', 'cum_amount', "close", "open"]
    index1 = index1[index1['skey'].isin(index['skey'].unique())]
    re = pd.merge(index, index1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(index.shape[0])
    display(index1.shape[0])
    
    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('index data is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        re = pd.merge(index, index1, on=cols, how='left')
        print('92 have unique values not shared by database')

    p11 = re[re.duplicated('num', keep=False)]
    p2 = re.drop_duplicates('num', keep=False)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p12 = re[re.duplicated('num', keep=False)].drop_duplicates('num')
    p12 = pd.merge(p12, p11[['num', 'order1']], on='num', how='left')
    p12 = p12[p12['order1'].isnull()]
    p12['sequenceNo'] = np.nan
    p12['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p12.drop(['order1'],axis=1,inplace=True)
    p1 = pd.concat([p11, p12])

    re = pd.concat([p1, p2])
    assert(re[re.duplicated('num', keep=False)].shape[0] == 0)

    if re[re['sequenceNo'].isnull()].shape[0] != 0:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    else:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5['sequenceNo']
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    
    
    print('----------------------------------------------------------------------------------------------------')
    print('final concat')
    try:
        assert(len(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
        display(SH1[SH1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(index1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(index1[index1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])

    del SH
    del SH1
    del SZ
    del SZ1
    del trade
    del trade1
    del order
    del order1
    del index
    del index1
    re1['tag'] = 'SH'
    re2['tag'] = 'SZ'
    re3['tag'] = 'trade'
    re4['tag'] = 'order'
    re5['tag'] = 'index'
    
    re1 = re1[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re2 = re2[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re3 = re3[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re4 = re4[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re5 = re5[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re1 = re1.sort_values(by='num').reset_index(drop=True)
    re1['seq2'] = re1.index
    re2 = re2.sort_values(by='num').reset_index(drop=True)
    re2['seq2'] = re2.index
    re3 = re3.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re3['seq2'] = re3.index
    re4 = re4.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re4['seq2'] = re4.index
    re5 = re5.sort_values(by='num').reset_index(drop=True)
    re5['seq2'] = re5.index

    fr1 = []
    fr2 = []
    fr1 += [re1[re1['seq1'].isnull()]]
    fr2 += [re1[~re1['seq1'].isnull()]]
    del re1
    display('1. here~')
    fr1 += [re2[re2['seq1'].isnull()]]
    fr2 += [re2[~re2['seq1'].isnull()]]
    del re2
    display('2. here~')
    fr1 += [re3[re3['seq1'].isnull()]]
    fr2 += [re3[~re3['seq1'].isnull()]]
    del re3
    display('3. here~')
    fr1 += [re4[re4['seq1'].isnull()]]
    fr2 += [re4[~re4['seq1'].isnull()]]
    del re4
    display('4. here~')
    fr1 += [re5[re5['seq1'].isnull()]]
    fr2 += [re5[~re5['seq1'].isnull()]]
    del re5
    display('5. here~')
    fr1 = pd.concat(fr1).reset_index(drop=True)
    fr2 = pd.concat(fr2).reset_index(drop=True)
    
    startTm = datetime.datetime.now()
    fr2 = fr2.sort_values(by=['seq1', 'seq2'])
    print(datetime.datetime.now() - startTm)

    fr2.loc[(fr2['nan']==0) & (fr2['dup1']==1), 'count'] = 0
    fr2['sum_nan'] = fr2['nan'].cumsum()
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['sum_nan']
    startTm = datetime.datetime.now()
    fr2['sequenceNo'] = fr2.groupby('seq1')['sequenceNo'].ffill().bfill()
    print(datetime.datetime.now() - startTm)
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['count']
    fr21 = fr2[~fr2['sequenceNo'].isnull()]
    fr22 = fr2[fr2['sequenceNo'].isnull()]
    display(fr22.shape[0])
    display(fr21.shape[0])
    display(fr2.shape[0])
    if fr22.shape[0] != 0:
        fr22['sequenceNo'] = range(int(fr21['sequenceNo'].max()) + 1, int(fr21['sequenceNo'].max()) + 1 + fr22.shape[0])
        fr2 = pd.concat([fr21, fr22])
    del fr21
    del fr22
    display(fr2.shape[0])
    try:
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    except:
        te_st = fr2[fr2.duplicated('sequenceNo', keep=False)]
        display(te_st)
        caa = te_st['clockAtArrival'].max()
        seq = te_st['sequenceNo'].iloc[0]
        m_in = fr2[fr2['sequenceNo'] > seq]['sequenceNo'].min()
        if m_in > seq + 1:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 1
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        else:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 2
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
        
    
    fr1['sequenceNo'] = range(int(fr2['sequenceNo'].max()) + 1, int(fr2['sequenceNo'].max()) + 1 + fr1.shape[0])
    fr2 = pd.concat([fr1, fr2])
    del fr1
    assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    
    import pickle
    os.mkdir('/mnt/e/result/' + startDate)
    SH = fr2[fr2['tag'] == 'SH'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SH.to_pickle('/mnt/e/result/' + startDate + '/SH.pkl')
    del SH

    SZ = fr2[fr2['tag'] == 'SZ'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SZ.to_pickle('/mnt/e/result/' + startDate + '/SZ.pkl')
    del SZ
    
    trade = fr2[fr2['tag'] == 'trade'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    trade.to_pickle('/mnt/e/result/' + startDate + '/trade.pkl')
    del trade
    
    order = fr2[fr2['tag'] == 'order'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    order.to_pickle('/mnt/e/result/' + startDate + '/order.pkl')
    del order
    
    index = fr2[fr2['tag'] == 'index'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    index.to_pickle('/mnt/e/result/' + startDate + '/index.pkl')
    del index
    del fr2
    
    print(str(i) + 'finished')


--------------------------------------------------------------------------------------------
20200102
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.52%'

-------------------------------------------------------------------------------------------
SZ lv2


8598613

8449796

8598613

8598613

8449796

SZ lv2 is complete


'1.73%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.05%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.02%'

-----------------------------------------------------------------------------------------------------
SH index data


809335

809335

809335

14801

16382

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


Unnamed: 0,clockAtArrival,sequenceNo,exchId,securityType,__isRepeated,TransactTime,ChannelNo,ApplSeqNum,SecurityID,ExecType,TradeBSFlag,__origTickSeq,TradePrice,TradeQty,TradeMoney,BidApplSeqNum,OfferApplSeqNum,skey
47567254,1577948140458219,112176837,2,1,0,145533490,2012,16833947,651,F,N,172595140,679200,200,135840000,16825092,16833945,2000651


Unnamed: 0,clockAtArrival,sequenceNo,exchId,securityType,__isRepeated,TransactTime,ChannelNo,ApplSeqNum,SecurityID,Side,OrderType,__origTickSeq,Price,OrderQty,skey
35584127,1577948140458224,112176837,2,1,0,145533490,2011,16864635,300092,2,2,172595142,73500,3600,2300092


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:53.431655
0:00:57.776058


0

99604183

99604183

99604183

Unnamed: 0,skey,date,num,sequenceNo,seq1,clockAtArrival,nan,count,tag,dup1,seq2,ApplSeqNum,sum_nan
34617486,2000651,20200102.0,,112211384.0,112176837.0,1577948000000000.0,0,0.0,trade,1.0,19624752,16833947.0,34547
89951872,2300092,20200102.0,,112211384.0,112176837.0,1577948000000000.0,0,0.0,order,1.0,26529981,16864635.0,34547


20200102finished
--------------------------------------------------------------------------------------------
20200103
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.65%'

-------------------------------------------------------------------------------------------
SZ lv2


8454191

8285615

8454191

8454191

8285615

SZ lv2 is complete


'1.99%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.06%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.02%'

-----------------------------------------------------------------------------------------------------
SH index data


909046

909046

909046

14700

16365

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'7%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:42.912987
0:00:56.737903


0

97651405

97651405

97651405

20200103finished
--------------------------------------------------------------------------------------------
20200106
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'2.28%'

-------------------------------------------------------------------------------------------
SZ lv2


8807962

8583363

8807962

8807962

8583363

SZ lv2 is complete


'2.55%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.29%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.09%'

-----------------------------------------------------------------------------------------------------
SH index data


388024

388024

388024

14340

14267

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'8%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:54.600603
0:01:23.010319


0

109360813

109360813

109360813

20200106finished
--------------------------------------------------------------------------------------------
20200107
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
4657893,20200107.0,1603192,150005000000,805900,22997685.0,28.62,28.62,28.61,28.6,28.57,28.56,2900,2400,1700,400,800,28.63,28.64,28.65,28.66,28.67,5300,12000,14000,3100,2300,28.31,16031920000.0,1578380000000000.0,118692271.0
4657894,20200107.0,1603192,150005000000,805900,22997685.0,28.62,28.62,28.61,28.6,28.57,28.56,2900,2400,1700,400,800,28.63,28.64,28.65,28.66,28.67,5300,12000,14000,3100,2300,28.31,16031920000.0,1578380000000000.0,118697819.0
4657895,20200107.0,1603192,150005000000,805900,22997685.0,28.62,28.62,28.61,28.6,28.57,28.56,2900,2400,1700,400,800,28.63,28.64,28.65,28.66,28.67,5300,12000,14000,3100,2300,28.31,16031920000.0,1578380000000000.0,118692271.0
4657896,20200107.0,1603192,150005000000,805900,22997685.0,28.62,28.62,28.61,28.6,28.57,28.56,2900,2400,1700,400,800,28.63,28.64,28.65,28.66,28.67,5300,12000,14000,3100,2300,28.31,16031920000.0,1578380000000000.0,118697819.0
5523728,20200107.0,1603697,150005000000,4503540,63082305.55,14.02,14.02,14.01,14.0,13.99,13.98,6500,39237,40600,5200,16400,14.03,14.04,14.05,14.06,14.07,26000,12900,28800,7800,13900,14.11,16036970000.0,1578380000000000.0,118692274.0
5523729,20200107.0,1603697,150005000000,4503540,63082305.55,14.02,14.02,14.01,14.0,13.99,13.98,6500,39237,40600,5200,16400,14.03,14.04,14.05,14.06,14.07,26000,12900,28800,7800,13900,14.11,16036970000.0,1578380000000000.0,118697820.0
5523730,20200107.0,1603697,150005000000,4503540,63082305.55,14.02,14.02,14.01,14.0,13.99,13.98,6500,39237,40600,5200,16400,14.03,14.04,14.05,14.06,14.07,26000,12900,28800,7800,13900,14.11,16036970000.0,1578380000000000.0,118692274.0
5523731,20200107.0,1603697,150005000000,4503540,63082305.55,14.02,14.02,14.01,14.0,13.99,13.98,6500,39237,40600,5200,16400,14.03,14.04,14.05,14.06,14.07,26000,12900,28800,7800,13900,14.11,16036970000.0,1578380000000000.0,118697820.0
6388253,20200107.0,1688368,150005000000,749878,65427052.77,87.9,87.81,87.7,87.65,87.64,87.63,300,300,300,1163,1353,87.9,87.91,87.95,87.96,87.97,4383,700,300,3501,928,87.2,16883680000.0,1578380000000000.0,118692283.0
6388254,20200107.0,1688368,150005000000,749878,65427052.77,87.9,87.81,87.7,87.65,87.64,87.63,300,300,300,1163,1353,87.9,87.91,87.95,87.96,87.97,4383,700,300,3501,928,87.2,16883680000.0,1578380000000000.0,118697821.0


'1.56%'

-------------------------------------------------------------------------------------------
SZ lv2


8623392

8455169

8623392

8623392

8455169

SZ lv2 is complete


'1.95%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.07%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.04%'

-----------------------------------------------------------------------------------------------------
SH index data


827969

827969

827969

14668

16366

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:43.213032
0:01:22.174730


0

104486588

104486588

104486588

20200107finished
--------------------------------------------------------------------------------------------
20200108
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
6220064,20200108.0,1603992,150005000000,3097640,62739310.0,20.05,20.05,20.04,20.03,20.02,20.01,3500,8200,7200,9100,5900,20.06,20.07,20.08,20.09,20.1,5900,10600,11800,16200,3100,20.35,16039920000.0,1578467000000000.0,125341747.0
6220065,20200108.0,1603992,150005000000,3097640,62739310.0,20.05,20.05,20.04,20.03,20.02,20.01,3500,8200,7200,9100,5900,20.06,20.07,20.08,20.09,20.1,5900,10600,11800,16200,3100,20.35,16039920000.0,1578467000000000.0,125347918.0
6220066,20200108.0,1603992,150005000000,3097640,62739310.0,20.05,20.05,20.04,20.03,20.02,20.01,3500,8200,7200,9100,5900,20.06,20.07,20.08,20.09,20.1,5900,10600,11800,16200,3100,20.35,16039920000.0,1578467000000000.0,125341747.0
6220067,20200108.0,1603992,150005000000,3097640,62739310.0,20.05,20.05,20.04,20.03,20.02,20.01,3500,8200,7200,9100,5900,20.06,20.07,20.08,20.09,20.1,5900,10600,11800,16200,3100,20.35,16039920000.0,1578467000000000.0,125347918.0
6483991,20200108.0,1688258,150005000000,2297698,193454500.0,84.53,84.52,84.5,84.41,84.4,84.33,1782,700,12408,200,200,84.53,84.54,84.56,84.58,84.6,5028,278,1605,4010,200,85.97,16882580000.0,1578467000000000.0,125341755.0
6483992,20200108.0,1688258,150005000000,2297698,193454500.0,84.53,84.52,84.5,84.41,84.4,84.33,1782,700,12408,200,200,84.53,84.54,84.56,84.58,84.6,5028,278,1605,4010,200,85.97,16882580000.0,1578467000000000.0,125347919.0
6483993,20200108.0,1688258,150005000000,2297698,193454500.0,84.53,84.52,84.5,84.41,84.4,84.33,1782,700,12408,200,200,84.53,84.54,84.56,84.58,84.6,5028,278,1605,4010,200,85.97,16882580000.0,1578467000000000.0,125341755.0
6483994,20200108.0,1688258,150005000000,2297698,193454500.0,84.53,84.52,84.5,84.41,84.4,84.33,1782,700,12408,200,200,84.53,84.54,84.56,84.58,84.6,5028,278,1605,4010,200,85.97,16882580000.0,1578467000000000.0,125347919.0


'1.65%'

-------------------------------------------------------------------------------------------
SZ lv2


8819958

8645775

8819958

8819958

8645775

SZ lv2 is complete


'1.97%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.06%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.02%'

-----------------------------------------------------------------------------------------------------
SH index data


887592

887592

887592

14700

16049

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'8%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:56.079905
0:01:23.363778


0

110682904

110682904

110682904

20200108finished
--------------------------------------------------------------------------------------------
20200109
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
5988401,20200109.0,1688002,150013000000,3630189,149946900.0,41.0,40.99,40.98,40.97,40.96,40.95,4078,1807,1093,2340,3600,41.0,41.02,41.03,41.04,41.05,31757,6355,5300,1000,869,41.08,16880020000.0,1578553000000000.0,113406051.0
5988402,20200109.0,1688002,150013000000,3630189,149946900.0,41.0,40.99,40.98,40.97,40.96,40.95,4078,1807,1093,2340,3600,41.0,41.02,41.03,41.04,41.05,31757,6355,5300,1000,869,41.08,16880020000.0,1578553000000000.0,113411929.0
5988403,20200109.0,1688002,150013000000,3630189,149946900.0,41.0,40.99,40.98,40.97,40.96,40.95,4078,1807,1093,2340,3600,41.0,41.02,41.03,41.04,41.05,31757,6355,5300,1000,869,41.08,16880020000.0,1578553000000000.0,113406051.0
5988404,20200109.0,1688002,150013000000,3630189,149946900.0,41.0,40.99,40.98,40.97,40.96,40.95,4078,1807,1093,2340,3600,41.0,41.02,41.03,41.04,41.05,31757,6355,5300,1000,869,41.08,16880020000.0,1578553000000000.0,113411929.0


'1.69%'

-------------------------------------------------------------------------------------------
SZ lv2


8431572

8250421

8431572

8431572

8250421

SZ lv2 is complete


'2.15%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.06%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.05%'

-----------------------------------------------------------------------------------------------------
SH index data


763085

763085

763085

14704

16431

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:39.618635
0:00:59.260478


0

99556009

99556009

99556009

20200109finished
--------------------------------------------------------------------------------------------
20200110
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
3046333,20200110.0,1600933,150006000000,3755125,5.572828e+07,14.76,14.76,14.75,14.74,14.71,14.70,11000,5800,3200,7300,12800,14.77,14.78,14.80,14.81,14.83,26200,2035,1800,100,9800,14.95,1.600933e+10,1.578640e+15,109147676.0
3046334,20200110.0,1600933,150006000000,3755125,5.572828e+07,14.76,14.76,14.75,14.74,14.71,14.70,11000,5800,3200,7300,12800,14.77,14.78,14.80,14.81,14.83,26200,2035,1800,100,9800,14.95,1.600933e+10,1.578640e+15,109147842.0
3046335,20200110.0,1600933,150006000000,3755125,5.572828e+07,14.76,14.76,14.75,14.74,14.71,14.70,11000,5800,3200,7300,12800,14.77,14.78,14.80,14.81,14.83,26200,2035,1800,100,9800,14.95,1.600933e+10,1.578640e+15,109147676.0
3046336,20200110.0,1600933,150006000000,3755125,5.572828e+07,14.76,14.76,14.75,14.74,14.71,14.70,11000,5800,3200,7300,12800,14.77,14.78,14.80,14.81,14.83,26200,2035,1800,100,9800,14.95,1.600933e+10,1.578640e+15,109147842.0
3414379,20200110.0,1601163,150006000000,7868830,1.237117e+08,15.79,15.78,15.77,15.76,15.75,15.74,26400,31100,12800,22600,10900,15.79,15.80,15.81,15.82,15.83,42500,47200,45400,39300,53300,15.63,1.601163e+10,1.578640e+15,109147639.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6102091,20200110.0,1688128,150006000000,5314128,1.125940e+08,21.42,21.42,21.41,21.40,21.39,21.38,55091,6500,11520,3000,50400,21.43,21.44,21.45,21.46,21.47,17896,15674,33242,14575,7464,21.08,1.688128e+10,1.578640e+15,109147859.0
6137360,20200110.0,1688199,150006000000,2167732,1.398460e+08,64.82,64.82,64.81,64.80,64.78,64.77,969,2849,800,1523,900,64.83,64.84,64.85,64.88,64.90,3351,5458,701,3311,1000,63.56,1.688199e+10,1.578640e+15,109147656.0
6137361,20200110.0,1688199,150006000000,2167732,1.398460e+08,64.82,64.82,64.81,64.80,64.78,64.77,969,2849,800,1523,900,64.83,64.84,64.85,64.88,64.90,3351,5458,701,3311,1000,63.56,1.688199e+10,1.578640e+15,109147838.0
6137362,20200110.0,1688199,150006000000,2167732,1.398460e+08,64.82,64.82,64.81,64.80,64.78,64.77,969,2849,800,1523,900,64.83,64.84,64.85,64.88,64.90,3351,5458,701,3311,1000,63.56,1.688199e+10,1.578640e+15,109147656.0


'1.64%'

-------------------------------------------------------------------------------------------
SZ lv2


8385978

8222973

8385978

8385978

8222973

SZ lv2 is complete


'1.94%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.05%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.03%'

-----------------------------------------------------------------------------------------------------
SH index data


852906

852906

852906

14708

16738

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:40.201191
0:00:56.748499


0

95347031

95347031

95347031

20200110finished
--------------------------------------------------------------------------------------------
20200113
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.64%'

-------------------------------------------------------------------------------------------
SZ lv2


8284893

8105994

8284893

8284893

8105994

SZ lv2 is complete


'2.16%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.07%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.08%'

-----------------------------------------------------------------------------------------------------
SH index data


893715

893715

893711

15185

17231

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'3%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:24.807831
0:00:56.628197


0

93900792

93900792

93900792

20200113finished
--------------------------------------------------------------------------------------------
20200114
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
644100,20200114.0,1600187,150012000000,14841063,39398590.0,2.67,2.66,2.65,2.64,2.63,2.62,389400,411100,319074,611100,461700,2.67,2.68,2.69,2.7,2.71,634327,1187115,894700,1404900,375000,2.65,16001870000.0,1578985000000000.0,111665044.0
644101,20200114.0,1600187,150012000000,14841063,39398590.0,2.67,2.66,2.65,2.64,2.63,2.62,389400,411100,319074,611100,461700,2.67,2.68,2.69,2.7,2.71,634327,1187115,894700,1404900,375000,2.65,16001870000.0,1578985000000000.0,111670678.0
644102,20200114.0,1600187,150012000000,14841063,39398590.0,2.67,2.66,2.65,2.64,2.63,2.62,389400,411100,319074,611100,461700,2.67,2.68,2.69,2.7,2.71,634327,1187115,894700,1404900,375000,2.65,16001870000.0,1578985000000000.0,111665044.0
644103,20200114.0,1600187,150012000000,14841063,39398590.0,2.67,2.66,2.65,2.64,2.63,2.62,389400,411100,319074,611100,461700,2.67,2.68,2.69,2.7,2.71,634327,1187115,894700,1404900,375000,2.65,16001870000.0,1578985000000000.0,111670678.0
1632596,20200114.0,1600503,150012000000,20831020,68676930.0,3.28,3.27,3.26,3.25,3.24,3.23,648300,299000,356700,90500,372800,3.28,3.29,3.3,3.31,3.32,443185,610500,337500,367900,383100,3.3,16005030000.0,1578985000000000.0,111665073.0
1632597,20200114.0,1600503,150012000000,20831020,68676930.0,3.28,3.27,3.26,3.25,3.24,3.23,648300,299000,356700,90500,372800,3.28,3.29,3.3,3.31,3.32,443185,610500,337500,367900,383100,3.3,16005030000.0,1578985000000000.0,111670679.0
1632598,20200114.0,1600503,150012000000,20831020,68676930.0,3.28,3.27,3.26,3.25,3.24,3.23,648300,299000,356700,90500,372800,3.28,3.29,3.3,3.31,3.32,443185,610500,337500,367900,383100,3.3,16005030000.0,1578985000000000.0,111665073.0
1632599,20200114.0,1600503,150012000000,20831020,68676930.0,3.28,3.27,3.26,3.25,3.24,3.23,648300,299000,356700,90500,372800,3.28,3.29,3.3,3.31,3.32,443185,610500,337500,367900,383100,3.3,16005030000.0,1578985000000000.0,111670679.0
2487430,20200114.0,1600751,150012000000,30327650,91476300.0,3.0,3.0,2.99,2.98,2.97,2.96,147700,613600,603600,344400,184100,3.01,3.02,3.03,3.04,3.05,533400,949097,3248300,1940300,999100,3.02,16007510000.0,1578985000000000.0,111665103.0
2487431,20200114.0,1600751,150012000000,30327650,91476300.0,3.0,3.0,2.99,2.98,2.97,2.96,147700,613600,603600,344400,184100,3.01,3.02,3.03,3.04,3.05,533400,949097,3248300,1940300,999100,3.02,16007510000.0,1578985000000000.0,111670680.0


'1.68%'

-------------------------------------------------------------------------------------------
SZ lv2


8395392

8226849

8395392

8395392

8226849

SZ lv2 is complete


'2.01%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.12%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.07%'

-----------------------------------------------------------------------------------------------------
SH index data


914102

914102

914102

14704

16060

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'7%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:29.663193
0:00:57.916859


0

97736239

97736239

97736239

20200114finished
--------------------------------------------------------------------------------------------
20200115
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.81%'

-------------------------------------------------------------------------------------------
SZ lv2


8177214

8014752

8177214

8177214

8014752

SZ lv2 is complete


'1.99%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.08%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.05%'

-----------------------------------------------------------------------------------------------------
SH index data


890783

890783

890783

14712

16642

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:21.064397
0:00:55.347570


0

92268415

92268415

92268415

20200115finished
--------------------------------------------------------------------------------------------
20200116
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.90%'

-------------------------------------------------------------------------------------------
SZ lv2


8114971

7943253

8114971

8114971

7943253

SZ lv2 is complete


'2.12%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'3.06%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.04%'

-----------------------------------------------------------------------------------------------------
SH index data


877501

877501

877501

14674

16845

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:13.219033
0:00:52.783732


0

90544939

90544939

90544939

20200116finished
--------------------------------------------------------------------------------------------
20200117
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
3397323,20200117.0,1601330,150004000000,8946800,99472750.0,11.05,11.05,11.04,11.03,11.02,11.01,176300,26500,26700,9200,44200,11.06,11.07,11.08,11.09,11.1,48300,67500,81800,43700,24800,11.15,16013300000.0,1579244000000000.0,100972707.0
3397324,20200117.0,1601330,150004000000,8946800,99472750.0,11.05,11.05,11.04,11.03,11.02,11.01,176300,26500,26700,9200,44200,11.06,11.07,11.08,11.09,11.1,48300,67500,81800,43700,24800,11.15,16013300000.0,1579244000000000.0,100979252.0
3397325,20200117.0,1601330,150004000000,8946800,99472750.0,11.05,11.05,11.04,11.03,11.02,11.01,176300,26500,26700,9200,44200,11.06,11.07,11.08,11.09,11.1,48300,67500,81800,43700,24800,11.15,16013300000.0,1579244000000000.0,100972707.0
3397326,20200117.0,1601330,150004000000,8946800,99472750.0,11.05,11.05,11.04,11.03,11.02,11.01,176300,26500,26700,9200,44200,11.06,11.07,11.08,11.09,11.1,48300,67500,81800,43700,24800,11.15,16013300000.0,1579244000000000.0,100979252.0
4463121,20200117.0,1603298,150004000000,2384530,33260400.0,13.9,13.89,13.88,13.87,13.85,13.83,2900,17200,6000,5600,4700,13.9,13.91,13.93,13.94,13.95,17800,8300,2500,11000,2700,13.89,16032980000.0,1579244000000000.0,100972662.0
4463122,20200117.0,1603298,150004000000,2384530,33260400.0,13.9,13.89,13.88,13.87,13.85,13.83,2900,17200,6000,5600,4700,13.9,13.91,13.93,13.94,13.95,17800,8300,2500,11000,2700,13.89,16032980000.0,1579244000000000.0,100979253.0
4463123,20200117.0,1603298,150004000000,2384530,33260400.0,13.9,13.89,13.88,13.87,13.85,13.83,2900,17200,6000,5600,4700,13.9,13.91,13.93,13.94,13.95,17800,8300,2500,11000,2700,13.89,16032980000.0,1579244000000000.0,100972662.0
4463124,20200117.0,1603298,150004000000,2384530,33260400.0,13.9,13.89,13.88,13.87,13.85,13.83,2900,17200,6000,5600,4700,13.9,13.91,13.93,13.94,13.95,17800,8300,2500,11000,2700,13.89,16032980000.0,1579244000000000.0,100979253.0
5823877,20200117.0,1688036,150004000000,9218811,567699600.0,61.9,61.9,61.88,61.86,61.8,61.78,48118,625,510,6650,500,61.94,61.99,62.0,62.04,62.06,275,200,200,500,28787,57.28,16880370000.0,1579244000000000.0,100973715.0
5823878,20200117.0,1688036,150004000000,9218811,567699600.0,61.9,61.9,61.88,61.86,61.8,61.78,48118,625,510,6650,500,61.94,61.99,62.0,62.04,62.06,275,200,200,500,28787,57.28,16880370000.0,1579244000000000.0,100979240.0


'2.04%'

-------------------------------------------------------------------------------------------
SZ lv2


7997139

7838537

7997139

7997139

7838537

SZ lv2 is complete


'1.98%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'1.44%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.04%'

-----------------------------------------------------------------------------------------------------
SH index data


910514

910514

910512

14667

16558

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:02.738943
0:00:51.926675


0

87740636

87740636

87740636

20200117finished
--------------------------------------------------------------------------------------------
20200120
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'2.07%'

-------------------------------------------------------------------------------------------
SZ lv2


8108967

7923143

8108967

8108967

7923143

SZ lv2 is complete


'2.29%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'1.12%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.03%'

-----------------------------------------------------------------------------------------------------
SH index data


775072

775072

775067

14716

15697

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'7%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:15.838419
0:00:53.770398


0

91069025

91069025

91069025

20200120finished
--------------------------------------------------------------------------------------------
20200121
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
2166479,20200121.0,1600678,144808000000,29091090,209889300.0,6.96,6.96,6.95,6.94,6.93,6.92,20500,8900,31400,39800,36300,6.97,6.98,6.99,7.0,7.01,23300,35500,17200,28100,4300,7.58,16006780000.0,1579589000000000.0,102492125.0
2166480,20200121.0,1600678,144808000000,29091090,209889300.0,6.96,6.96,6.95,6.94,6.93,6.92,20500,8900,31400,39800,36300,6.97,6.98,6.99,7.0,7.01,23300,35500,17200,28100,4300,7.58,16006780000.0,1579589000000000.0,102492126.0
2198904,20200121.0,1600689,150017000000,1933000,18988820.0,9.76,9.76,9.75,9.74,9.73,9.72,19100,13200,1800,6400,12500,9.77,9.78,9.79,9.8,9.81,1800,125301,9800,9300,30800,9.81,16006890000.0,1579590000000000.0,108015689.0
2198905,20200121.0,1600689,150017000000,1933000,18988820.0,9.76,9.76,9.75,9.74,9.73,9.72,19100,13200,1800,6400,12500,9.77,9.78,9.79,9.8,9.81,1800,125301,9800,9300,30800,9.81,16006890000.0,1579590000000000.0,108021196.0
2198906,20200121.0,1600689,150017000000,1933000,18988820.0,9.76,9.76,9.75,9.74,9.73,9.72,19100,13200,1800,6400,12500,9.77,9.78,9.79,9.8,9.81,1800,125301,9800,9300,30800,9.81,16006890000.0,1579590000000000.0,108015689.0
2198907,20200121.0,1600689,150017000000,1933000,18988820.0,9.76,9.76,9.75,9.74,9.73,9.72,19100,13200,1800,6400,12500,9.77,9.78,9.79,9.8,9.81,1800,125301,9800,9300,30800,9.81,16006890000.0,1579590000000000.0,108021196.0
2788940,20200121.0,1600866,150017000000,18017713,82533680.0,4.59,4.59,4.58,4.57,4.56,4.55,56000,39500,142449,398400,149900,4.6,4.61,4.62,4.63,4.64,243698,253700,248300,139300,90000,4.55,16008660000.0,1579590000000000.0,108015701.0
2788941,20200121.0,1600866,150017000000,18017713,82533680.0,4.59,4.59,4.58,4.57,4.56,4.55,56000,39500,142449,398400,149900,4.6,4.61,4.62,4.63,4.64,243698,253700,248300,139300,90000,4.55,16008660000.0,1579590000000000.0,108021197.0
2788942,20200121.0,1600866,150017000000,18017713,82533680.0,4.59,4.59,4.58,4.57,4.56,4.55,56000,39500,142449,398400,149900,4.6,4.61,4.62,4.63,4.64,243698,253700,248300,139300,90000,4.55,16008660000.0,1579590000000000.0,108015701.0
2788943,20200121.0,1600866,150017000000,18017713,82533680.0,4.59,4.59,4.58,4.57,4.56,4.55,56000,39500,142449,398400,149900,4.6,4.61,4.62,4.63,4.64,243698,253700,248300,139300,90000,4.55,16008660000.0,1579590000000000.0,108021197.0


'2.30%'

-------------------------------------------------------------------------------------------
SZ lv2


8091930

7896297

8091930

8091930

7896297

SZ lv2 is complete


'2.42%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'1.57%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.36%'

-----------------------------------------------------------------------------------------------------
SH index data


564955

564955

564955

14336

16148

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:22.406457
0:00:55.538816


0

94629567

94629567

94629567

20200121finished
--------------------------------------------------------------------------------------------
20200122
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
4181620,20200122.0,1603053,150005000000,10080221,187734000.0,18.6,18.59,18.58,18.57,18.56,18.55,61800,23620,27600,13400,33700,18.6,18.61,18.62,18.63,18.64,16100,18200,3900,2500,500,19.0,16030530000.0,1579676000000000.0,111273217.0
4181621,20200122.0,1603053,150005000000,10080221,187734000.0,18.6,18.59,18.58,18.57,18.56,18.55,61800,23620,27600,13400,33700,18.6,18.61,18.62,18.63,18.64,16100,18200,3900,2500,500,19.0,16030530000.0,1579676000000000.0,111280253.0
4181622,20200122.0,1603053,150005000000,10080221,187734000.0,18.6,18.59,18.58,18.57,18.56,18.55,61800,23620,27600,13400,33700,18.6,18.61,18.62,18.63,18.64,16100,18200,3900,2500,500,19.0,16030530000.0,1579676000000000.0,111273217.0
4181623,20200122.0,1603053,150005000000,10080221,187734000.0,18.6,18.59,18.58,18.57,18.56,18.55,61800,23620,27600,13400,33700,18.6,18.61,18.62,18.63,18.64,16100,18200,3900,2500,500,19.0,16030530000.0,1579676000000000.0,111280253.0
4269909,20200122.0,1603099,150005000000,1747982,16025080.0,9.15,9.14,9.13,9.12,9.11,9.1,8400,53800,10300,8300,28000,9.15,9.16,9.17,9.18,9.19,2300,5200,2400,4100,18122,9.3,16030990000.0,1579676000000000.0,111273118.0
4269910,20200122.0,1603099,150005000000,1747982,16025080.0,9.15,9.14,9.13,9.12,9.11,9.1,8400,53800,10300,8300,28000,9.15,9.16,9.17,9.18,9.19,2300,5200,2400,4100,18122,9.3,16030990000.0,1579676000000000.0,111280254.0
4269911,20200122.0,1603099,150005000000,1747982,16025080.0,9.15,9.14,9.13,9.12,9.11,9.1,8400,53800,10300,8300,28000,9.15,9.16,9.17,9.18,9.19,2300,5200,2400,4100,18122,9.3,16030990000.0,1579676000000000.0,111273118.0
4269912,20200122.0,1603099,150005000000,1747982,16025080.0,9.15,9.14,9.13,9.12,9.11,9.1,8400,53800,10300,8300,28000,9.15,9.16,9.17,9.18,9.19,2300,5200,2400,4100,18122,9.3,16030990000.0,1579676000000000.0,111280254.0
4551215,20200122.0,1603256,150005000000,4088257,60116490.0,14.76,14.76,14.75,14.74,14.73,14.71,12700,18800,7700,1200,2800,14.77,14.78,14.79,14.8,14.81,8900,7700,1600,200,2100,14.7,16032560000.0,1579676000000000.0,111273145.0
4551216,20200122.0,1603256,150005000000,4088257,60116490.0,14.76,14.76,14.75,14.74,14.73,14.71,12700,18800,7700,1200,2800,14.77,14.78,14.79,14.8,14.81,8900,7700,1600,200,2100,14.7,16032560000.0,1579676000000000.0,111280255.0


'2.18%'

-------------------------------------------------------------------------------------------
SZ lv2


8256238

8093765

8256238

8256238

8093765

SZ lv2 is complete


'1.97%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'1.18%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.46%'

-----------------------------------------------------------------------------------------------------
SH index data


902057

902057

902057

14716

17177

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'4%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:33.507542
0:00:57.817050


0

97609225

97609225

97609225

20200122finished
--------------------------------------------------------------------------------------------
20200123
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
3165121,20200123.0,1600933,150006000000,4204118,5.881422e+07,14.00,13.98,13.95,13.94,13.93,13.92,6600,1400,100,1300,1000,14.00,14.01,14.02,14.06,14.07,15400,5200,1500,2100,1000,14.18,1.600933e+10,1.579763e+15,123257605.0
3165122,20200123.0,1600933,150006000000,4204118,5.881422e+07,14.00,13.98,13.95,13.94,13.93,13.92,6600,1400,100,1300,1000,14.00,14.01,14.02,14.06,14.07,15400,5200,1500,2100,1000,14.18,1.600933e+10,1.579763e+15,123257790.0
3165123,20200123.0,1600933,150006000000,4204118,5.881422e+07,14.00,13.98,13.95,13.94,13.93,13.92,6600,1400,100,1300,1000,14.00,14.01,14.02,14.06,14.07,15400,5200,1500,2100,1000,14.18,1.600933e+10,1.579763e+15,123257605.0
3165124,20200123.0,1600933,150006000000,4204118,5.881422e+07,14.00,13.98,13.95,13.94,13.93,13.92,6600,1400,100,1300,1000,14.00,14.01,14.02,14.06,14.07,15400,5200,1500,2100,1000,14.18,1.600933e+10,1.579763e+15,123257790.0
3425247,20200123.0,1601068,150006000000,9472727,4.802016e+07,5.01,5.01,5.00,4.99,4.98,4.97,72300,59974,61300,73900,9800,5.02,5.03,5.04,5.05,5.06,83900,42062,52900,16600,44600,5.15,1.601068e+10,1.579763e+15,123257617.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6341301,20200123.0,1688118,150006000000,3724841,1.774983e+08,46.08,46.08,46.01,45.98,45.90,45.83,446,688,6700,300,730,46.09,46.10,46.11,46.13,46.14,322,700,1388,5711,900,48.90,1.688118e+10,1.579763e+15,123257802.0
6354634,20200123.0,1688128,150006000000,5498237,1.198749e+08,21.44,21.43,21.42,21.40,21.38,21.37,6008,5000,10100,1600,2682,21.44,21.45,21.48,21.49,21.50,21835,468,5619,900,3631,22.25,1.688128e+10,1.579763e+15,123257626.0
6354635,20200123.0,1688128,150006000000,5498237,1.198749e+08,21.44,21.43,21.42,21.40,21.38,21.37,6008,5000,10100,1600,2682,21.44,21.45,21.48,21.49,21.50,21835,468,5619,900,3631,22.25,1.688128e+10,1.579763e+15,123257803.0
6354636,20200123.0,1688128,150006000000,5498237,1.198749e+08,21.44,21.43,21.42,21.40,21.38,21.37,6008,5000,10100,1600,2682,21.44,21.45,21.48,21.49,21.50,21835,468,5619,900,3631,22.25,1.688128e+10,1.579763e+15,123257626.0


'2.26%'

-------------------------------------------------------------------------------------------
SZ lv2


8628453

8441011

8628453

8628453

8441011

SZ lv2 is complete


'2.17%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'1.07%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.34%'

-----------------------------------------------------------------------------------------------------
SH index data


668619

668619

668619

14326

16205

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:49.333178
0:01:19.624701


0

108905228

108905228

108905228

20200123finished
--------------------------------------------------------------------------------------------
20200203
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
433886,20200203.0,1600160,150005000000,6943200,4.353386e+07,6.27,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,6.27,6.28,6.29,6.30,6.31,12091418,122750,2300,65439,7700,6.27,1.600160e+10,1.580713e+15,65619558.0
433887,20200203.0,1600160,150005000000,6943200,4.353386e+07,6.27,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,6.27,6.28,6.29,6.30,6.31,12091418,122750,2300,65439,7700,6.27,1.600160e+10,1.580713e+15,65626493.0
433888,20200203.0,1600160,150005000000,6943200,4.353386e+07,6.27,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,6.27,6.28,6.29,6.30,6.31,12091418,122750,2300,65439,7700,6.27,1.600160e+10,1.580713e+15,65619558.0
433889,20200203.0,1600160,150005000000,6943200,4.353386e+07,6.27,0.00,0.00,0.00,0.00,0.00,0,0,0,0,0,6.27,6.28,6.29,6.30,6.31,12091418,122750,2300,65439,7700,6.27,1.600160e+10,1.580713e+15,65626493.0
833839,20200203.0,1600323,150005000000,10427742,1.953530e+08,19.20,19.19,19.18,19.17,19.16,19.15,22000,26200,20000,31000,22700,19.20,19.21,19.22,19.23,19.24,26205,33000,51379,1200,50400,17.99,1.600323e+10,1.580713e+15,65619559.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4529664,20200203.0,1688299,150005000000,9461528,1.808186e+08,18.49,18.48,18.47,18.46,18.45,18.44,17621,900,4915,8600,3021,18.49,18.50,18.51,18.53,18.54,30355,2700,2384,11387,5000,18.52,1.688299e+10,1.580713e+15,65626607.0
4561004,20200203.0,1688366,150005000000,1507053,1.314270e+08,86.13,86.13,86.11,86.10,86.09,86.05,323,1000,1800,3490,1100,86.14,86.19,86.30,86.50,86.60,4274,200,750,200,2300,80.08,1.688366e+10,1.580713e+15,65619894.0
4561005,20200203.0,1688366,150005000000,1507053,1.314270e+08,86.13,86.13,86.11,86.10,86.09,86.05,323,1000,1800,3490,1100,86.14,86.19,86.30,86.50,86.60,4274,200,750,200,2300,80.08,1.688366e+10,1.580713e+15,65626608.0
4561006,20200203.0,1688366,150005000000,1507053,1.314270e+08,86.13,86.13,86.11,86.10,86.09,86.05,323,1000,1800,3490,1100,86.14,86.19,86.30,86.50,86.60,4274,200,750,200,2300,80.08,1.688366e+10,1.580713e+15,65619894.0


AssertionError: 

In [None]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()


import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)


startDate = 20200324
endDate = 20200529
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
test = db.read('md_index', start_date=startDate, end_date=endDate, symbol=[1000300])
date_list = test['date'].unique()
del test

new_trade_data = []
new_order_data = []

for i in date_list:
    print('--------------------------------------------------------------------------------------------')
    print(i)
    print('SH lv2')
    startDate = str(i)
    endDate = str(i)
    db = DB("192.168.10.178", database_name, user, password)
    SH = db.read('md_snapshot_l2', start_date=startDate, end_date=endDate)
    SZ = SH[SH['skey'] > 2000000]
    SH = SH[SH['skey'] < 2000000]
    SH['num'] = SH['skey'] * 10000 + SH['ordering']
    SZ['num'] = SZ['skey'] * 10000 + SZ['ordering']
    
    SH = SH[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]
    SZ = SZ[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SH_***'))
        SH1 = pd.read_csv(path1[0])
        index1 = SH1[SH1['StockID'].isin([16, 300, 852, 905])]
        SH1 = SH1[SH1['source'] == 4]

        SH1['skey'] = SH1['StockID'] + 1000000
        SH1 = SH1.rename(columns={"openPrice":"open"})
        SH1["open"] = np.where(SH1["cum_volume"] > 0, SH1.groupby("skey")["open"].transform("max"), SH1["open"])
        SH1["time"] = SH1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SH1 = SH1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount', "close", 'open']:
        SH1[cols] = SH1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SH1 = SH1[SH1['skey'].isin(SH['skey'].unique())]
    re = pd.merge(SH, SH1, on=cols, how='outer')

    p21 = re[(re['date'].isnull())][['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    p22 = re[(re['sequenceNo'].isnull())][["skey", "date", "time", 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    p11 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)]
    p12 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())].drop_duplicates(['num'], keep=False)
    p11 = p11.sort_values(by=['num', 'sequenceNo'])
    display(p11)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p11_1 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)].drop_duplicates('num')
    p11_1 = pd.merge(p11_1, p11[['num', 'order1']], on='num', how='left')
    p11_1 = p11_1[p11_1['order1'].isnull()]
    p11_1['sequenceNo'] = np.nan
    p11_1['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p11_1.drop(['order1'],axis=1,inplace=True)
    p11 = pd.concat([p11, p11_1])
    
    p1 = pd.concat([p11, p12])
    p2 = pd.merge(p22, p21[['skey', 'time', 'clockAtArrival', 'sequenceNo']], on=['skey', 'time'], how='left')
    re1 = pd.concat([p1, p2])
    re1 = re1.sort_values(by='num')
    re1['seq1'] = re1.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(SH['skey'].unique()) - set(SH1['skey'].unique()))
    re1.loc[re1['skey'].isin(sl), 'seq1'] = np.nan
    re1['count1'] = re1.groupby(['seq1']).cumcount()
    re1['count2'] = re1.groupby(['seq1'])['count1'].transform('nunique')
    re1['min_seq'] = re1.groupby('skey')['sequenceNo'].transform('min')
    re1['count'] = np.where(re1['seq1'] != re1['min_seq'], re1['count1'], re1['count1']+1-re1['count2'])
    re1.drop(["min_seq"],axis=1,inplace=True)
    re1.drop(["count1"],axis=1,inplace=True)
    re1.drop(["count2"],axis=1,inplace=True)
    re1['dup'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo']).cumcount(), 0)
    re1['dup1'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
    re1['nan'] = np.where((re1['sequenceNo'].isnull()) | (re1['dup'] != 0), 1, 0)
    re1.loc[(re1['dup1'] > 1) & (re1['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re1[re1['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re1[re1['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re1.shape[0] == SH.shape[0])

    display('%.2f%%' % (re1[re1['sequenceNo'].isnull()].shape[0]/re1.shape[0] * 100))
    
    
    
    
    
    
    
    print('-------------------------------------------------------------------------------------------')
    print('SZ lv2')
    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SZ_***'))
        SZ1 = pd.read_csv(path1[0])
        SZ1 = SZ1[SZ1['source'] == 4]

        SZ1['skey'] = SZ1['StockID'] + 2000000
        SZ1 = SZ1.rename(columns={"openPrice":"open"})
        SZ1["open"] = np.where(SZ1["cum_volume"] > 0, SZ1.groupby("skey")["open"].transform("max"), SZ1["open"])
        SZ1["time"] = SZ1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SZ1 = SZ1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount']:
        SZ1[cols] = SZ1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SZ1 = SZ1[SZ1['skey'].isin(SZ['skey'].unique())]
    re = pd.merge(SZ, SZ1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(SZ.shape[0])
    display(SZ1.shape[0])

    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('SZ lv2 is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        print('92 have unique values not shared by database')
        re = pd.merge(SZ, SZ1, on=cols, how='left')

    if re[re.duplicated('num', keep=False)].shape[0] == 0:
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))


    else:
        p1 = re[re['num'].duplicated(keep=False)]
        p2 = re.drop_duplicates(['num'], keep=False)
        p1["order1"] = p1.groupby(["num"]).cumcount()
        p1["order2"] = p1.groupby(["sequenceNo"]).cumcount()
        p1 = p1[p1['order1'] == p1['order2']]
        p1.drop(['order1', 'order2'],axis=1,inplace=True)
        re = pd.concat([p1, p2])
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))
    
    print('----------------------------------------------------------------------------------------------')
    print('SH & SZ trade')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    trade = db.read('md_trade', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdTradeLog***'))
        trade1 = pd.read_csv(path1[0])
    trade1['skey'] = np.where(trade1['exchId'] == 2, trade1['SecurityID'] + 2000000, trade1['SecurityID'] + 1000000)
    trade1 = trade1[trade1['skey'].isin(trade['skey'].unique())]
    re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == trade.shape[0])
        display('trade data is complete')
        k = 0
    except:
        display('%.2f%%' % (trade.shape[0]/re.shape[0] * 100))
        k = 1
        display('trade data incomplete')
        k1 = pd.merge(trade1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k1.shape[0])
        display(k1['ExecType'].unique())
        display(k1['TransactTime'].unique())
        k1['date'] = trade['date'].iloc[0]
        new_trade_data += [k1[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'ExecType', 'TradeBSFlag', 
       'TradePrice', 'TradeQty', 'BidApplSeqNum', 'OfferApplSeqNum']]]
        re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == trade.shape[0])

    re3 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re3['seq1'] = re3.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(trade['skey'].unique()) - set(trade1['skey'].unique()))
    re3.loc[re3['skey'].isin(sl), 'seq1'] = np.nan
    re3['count1'] = re3.groupby(['seq1']).cumcount()
    re3['count2'] = re3.groupby(['seq1'])['count1'].transform('nunique')
    re3['min_seq'] = re3.groupby('skey')['sequenceNo'].transform('min')
    re3['count'] = np.where(re3['seq1'] != re3['min_seq'], re3['count1'], re3['count1']+1-re3['count2'])
    re3.drop(["min_seq"],axis=1,inplace=True)
    re3.drop(["count1"],axis=1,inplace=True)
    re3.drop(["count2"],axis=1,inplace=True)
    re3['dup'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo']).cumcount(), 0)
    re3['dup1'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re3['nan'] = np.where((re3['sequenceNo'].isnull()) | (re3['dup'] != 0), 1, 0)
    re3.loc[(re3['dup1'] > 1) & (re3['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re3[re3['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re3[re3['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re3.shape[0] == trade.shape[0])
    if k == 1:
        k1['seq1'] = k1['sequenceNo']
        k1['count'] = 0
        k1['nan'] = 0
        k1['dup1'] = 1
        re3 = pd.concat([re3, k1[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', 'dup1']]])

    display('%.2f%%' % (re3[re3['sequenceNo'].isnull()].shape[0]/re3.shape[0] * 100))

    
    print('--------------------------------------------------------------------------------------------------')
    print('SZ order data')

    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    order = db.read('md_order', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdOrderLog***'))
        order1 = pd.read_csv(path1[0])
    order1['skey'] = order1['SecurityID'] + 2000000
    order1 = order1[order1['skey'].isin(order['skey'].unique())]
    re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == order.shape[0])
        display('order data is complete')
        k = 0
    except:
        display('%.2f%%' % (order.shape[0]/re.shape[0] * 100))
        k = 1
        display('order data incomplete')
        k2 = pd.merge(order1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k2.shape[0])
        display(k2['SecurityID'].unique())
        display(k2['TransactTime'].unique())
        k2['date'] = order['date'].iloc[0]
        new_order_data += [k2[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'Side', 
       'OrderType', 'Price', 'OrderQty']]]
        re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == order.shape[0])

    re4 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re4['seq1'] = re4.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(order['skey'].unique()) - set(order1['skey'].unique()))
    re4.loc[re4['skey'].isin(sl), 'seq1'] = np.nan
    re4['count1'] = re4.groupby(['seq1']).cumcount()
    re4['count2'] = re4.groupby(['seq1'])['count1'].transform('nunique')
    re4['min_seq'] = re4.groupby('skey')['sequenceNo'].transform('min')
    re4['count'] = np.where(re4['seq1'] != re4['min_seq'], re4['count1'], re4['count1']+1-re4['count2'])
    re4.drop(["min_seq"],axis=1,inplace=True)
    re4.drop(["count1"],axis=1,inplace=True)
    re4.drop(["count2"],axis=1,inplace=True)
    re4['dup'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo']).cumcount(), 0)
    re4['dup1'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re4['nan'] = np.where((re4['sequenceNo'].isnull()) | (re4['dup'] != 0), 1, 0)
    re4.loc[(re4['dup1'] > 1) & (re4['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re4[re4['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re4[re4['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re4.shape[0] == order.shape[0])
    if k == 1:
        k2['seq1'] = k2['ApplSeqNum']
        k2['count'] = 0
        k2['nan'] = 0
        k2['dup1'] = 1
        re4 = pd.concat([re4, k2[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', "dup1"]]])


    display('%.2f%%' % (re4[re4['sequenceNo'].isnull()].shape[0]/re4.shape[0] * 100))
    
    print('-----------------------------------------------------------------------------------------------------')
    print('SH index data')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    index = db.read('md_index', start_date=startDate, end_date=endDate)

    index1['skey'] = index1['StockID'] + 1000000
    index1 = index1.rename(columns={"openPrice":"open"})
    index1["open"] = np.where(index1["cum_volume"] > 0, index1.groupby("skey")["open"].transform("max"), index1["open"])
    index1['close'] = np.where(index1['cum_volume'] == 0, 0, index1['close'])
    index1["time"] = index1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)
    index['close'] = np.where(index['cum_volume'] == 0, 0, index['close'])
    index['num'] = index['skey'] * 10000 + index['ordering']
    index = index[['skey', 'date', 'cum_volume', 'cum_amount', "close", "open", 'num']]
    index1 = index1[['clockAtArrival', 'sequenceNo', 'skey', 'cum_volume', 'cum_amount', "close", "open", "time"]]
    for cols in ['cum_amount']:
        index1[cols] = index1[cols].round(1)
    cols = ['skey', 'cum_volume', 'cum_amount', "close", "open"]
    index1 = index1[index1['skey'].isin(index['skey'].unique())]
    re = pd.merge(index, index1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(index.shape[0])
    display(index1.shape[0])
    
    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('index data is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        re = pd.merge(index, index1, on=cols, how='left')
        print('92 have unique values not shared by database')

    p11 = re[re.duplicated('num', keep=False)]
    p2 = re.drop_duplicates('num', keep=False)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p12 = re[re.duplicated('num', keep=False)].drop_duplicates('num')
    p12 = pd.merge(p12, p11[['num', 'order1']], on='num', how='left')
    p12 = p12[p12['order1'].isnull()]
    p12['sequenceNo'] = np.nan
    p12['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p12.drop(['order1'],axis=1,inplace=True)
    p1 = pd.concat([p11, p12])

    re = pd.concat([p1, p2])
    assert(re[re.duplicated('num', keep=False)].shape[0] == 0)

    if re[re['sequenceNo'].isnull()].shape[0] != 0:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    else:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5['sequenceNo']
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    
    
    print('----------------------------------------------------------------------------------------------------')
    print('final concat')
    try:
        assert(len(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
        display(SH1[SH1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(index1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(index1[index1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])

    del SH
    del SH1
    del SZ
    del SZ1
    del trade
    del trade1
    del order
    del order1
    del index
    del index1
    re1['tag'] = 'SH'
    re2['tag'] = 'SZ'
    re3['tag'] = 'trade'
    re4['tag'] = 'order'
    re5['tag'] = 'index'
    
    re1 = re1[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re2 = re2[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re3 = re3[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re4 = re4[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re5 = re5[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re1 = re1.sort_values(by='num').reset_index(drop=True)
    re1['seq2'] = re1.index
    re2 = re2.sort_values(by='num').reset_index(drop=True)
    re2['seq2'] = re2.index
    re3 = re3.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re3['seq2'] = re3.index
    re4 = re4.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re4['seq2'] = re4.index
    re5 = re5.sort_values(by='num').reset_index(drop=True)
    re5['seq2'] = re5.index

    fr1 = []
    fr2 = []
    fr1 += [re1[re1['seq1'].isnull()]]
    fr2 += [re1[~re1['seq1'].isnull()]]
    del re1
    display('1. here~')
    fr1 += [re2[re2['seq1'].isnull()]]
    fr2 += [re2[~re2['seq1'].isnull()]]
    del re2
    display('2. here~')
    fr1 += [re3[re3['seq1'].isnull()]]
    fr2 += [re3[~re3['seq1'].isnull()]]
    del re3
    display('3. here~')
    fr1 += [re4[re4['seq1'].isnull()]]
    fr2 += [re4[~re4['seq1'].isnull()]]
    del re4
    display('4. here~')
    fr1 += [re5[re5['seq1'].isnull()]]
    fr2 += [re5[~re5['seq1'].isnull()]]
    del re5
    display('5. here~')
    fr1 = pd.concat(fr1).reset_index(drop=True)
    fr2 = pd.concat(fr2).reset_index(drop=True)
    
    startTm = datetime.datetime.now()
    fr2 = fr2.sort_values(by=['seq1', 'seq2'])
    print(datetime.datetime.now() - startTm)

    fr2.loc[(fr2['nan']==0) & (fr2['dup1']==1), 'count'] = 0
    fr2['sum_nan'] = fr2['nan'].cumsum()
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['sum_nan']
    startTm = datetime.datetime.now()
    fr2['sequenceNo'] = fr2.groupby('seq1')['sequenceNo'].ffill().bfill()
    print(datetime.datetime.now() - startTm)
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['count']
    fr21 = fr2[~fr2['sequenceNo'].isnull()]
    fr22 = fr2[fr2['sequenceNo'].isnull()]
    display(fr22.shape[0])
    display(fr21.shape[0])
    display(fr2.shape[0])
    if fr22.shape[0] != 0:
        fr22['sequenceNo'] = range(int(fr21['sequenceNo'].max()) + 1, int(fr21['sequenceNo'].max()) + 1 + fr22.shape[0])
        fr2 = pd.concat([fr21, fr22])
    del fr21
    del fr22
    display(fr2.shape[0])
    try:
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    except:
        te_st = fr2[fr2.duplicated('sequenceNo', keep=False)]
        display(te_st)
        caa = te_st['clockAtArrival'].max()
        seq = te_st['sequenceNo'].iloc[0]
        m_in = fr2[fr2['sequenceNo'] > seq]['sequenceNo'].min()
        if m_in > seq + 1:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 1
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        else:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 2
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
        
    
    fr1['sequenceNo'] = range(int(fr2['sequenceNo'].max()) + 1, int(fr2['sequenceNo'].max()) + 1 + fr1.shape[0])
    fr2 = pd.concat([fr1, fr2])
    del fr1
    assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    
    import pickle
    os.mkdir('/mnt/e/result/' + startDate)
    SH = fr2[fr2['tag'] == 'SH'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SH.to_pickle('/mnt/e/result/' + startDate + '/SH.pkl')
    del SH

    SZ = fr2[fr2['tag'] == 'SZ'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SZ.to_pickle('/mnt/e/result/' + startDate + '/SZ.pkl')
    del SZ
    
    trade = fr2[fr2['tag'] == 'trade'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    trade.to_pickle('/mnt/e/result/' + startDate + '/trade.pkl')
    del trade
    
    order = fr2[fr2['tag'] == 'order'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    order.to_pickle('/mnt/e/result/' + startDate + '/order.pkl')
    del order
    
    index = fr2[fr2['tag'] == 'index'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    index.to_pickle('/mnt/e/result/' + startDate + '/index.pkl')
    del index
    del fr2
    
    print(str(i) + 'finished')


--------------------------------------------------------------------------------------------
20200324
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
5258376,20200324.0,1603595,150005000000,3896087,111618900.0,29.08,29.08,29.07,29.06,29.0,28.97,3000,4200,2000,16600,5100,29.09,29.1,29.11,29.12,29.14,5500,16300,2200,5000,2800,29.5,16035950000.0,1585033000000000.0,128067528.0
5258377,20200324.0,1603595,150005000000,3896087,111618900.0,29.08,29.08,29.07,29.06,29.0,28.97,3000,4200,2000,16600,5100,29.09,29.1,29.11,29.12,29.14,5500,16300,2200,5000,2800,29.5,16035950000.0,1585033000000000.0,128073508.0
5258378,20200324.0,1603595,150005000000,3896087,111618900.0,29.08,29.08,29.07,29.06,29.0,28.97,3000,4200,2000,16600,5100,29.09,29.1,29.11,29.12,29.14,5500,16300,2200,5000,2800,29.5,16035950000.0,1585033000000000.0,128067528.0
5258379,20200324.0,1603595,150005000000,3896087,111618900.0,29.08,29.08,29.07,29.06,29.0,28.97,3000,4200,2000,16600,5100,29.09,29.1,29.11,29.12,29.14,5500,16300,2200,5000,2800,29.5,16035950000.0,1585033000000000.0,128073508.0
5764444,20200324.0,1603848,150005000000,939900,11250870.0,12.01,12.01,12.0,11.99,11.98,11.96,500,11400,5600,200,500,12.02,12.03,12.05,12.06,12.07,9300,6500,7900,3200,1300,12.0,16038480000.0,1585033000000000.0,128067535.0
5764445,20200324.0,1603848,150005000000,939900,11250870.0,12.01,12.01,12.0,11.99,11.98,11.96,500,11400,5600,200,500,12.02,12.03,12.05,12.06,12.07,9300,6500,7900,3200,1300,12.0,16038480000.0,1585033000000000.0,128073509.0
5764446,20200324.0,1603848,150005000000,939900,11250870.0,12.01,12.01,12.0,11.99,11.98,11.96,500,11400,5600,200,500,12.02,12.03,12.05,12.06,12.07,9300,6500,7900,3200,1300,12.0,16038480000.0,1585033000000000.0,128067535.0
5764447,20200324.0,1603848,150005000000,939900,11250870.0,12.01,12.01,12.0,11.99,11.98,11.96,500,11400,5600,200,500,12.02,12.03,12.05,12.06,12.07,9300,6500,7900,3200,1300,12.0,16038480000.0,1585033000000000.0,128073509.0
6447893,20200324.0,1688399,150005000000,1362519,102593400.0,75.98,75.98,75.97,75.91,75.86,75.8,4170,800,3000,350,393,75.99,76.0,76.05,76.09,76.15,280,3100,100,200,600,76.5,16883990000.0,1585033000000000.0,128067554.0
6447894,20200324.0,1688399,150005000000,1362519,102593400.0,75.98,75.98,75.97,75.91,75.86,75.8,4170,800,3000,350,393,75.99,76.0,76.05,76.09,76.15,280,3100,100,200,600,76.5,16883990000.0,1585033000000000.0,128073510.0


'1.70%'

-------------------------------------------------------------------------------------------
SZ lv2


8634687

8444299

8634687

8634687

8444299

SZ lv2 is complete


'2.20%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.58%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.72%'

-----------------------------------------------------------------------------------------------------
SH index data


887458

887458

887458

14693

16711

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:06:06.801888
0:01:21.753884


0

113786475

113786475

113786475

20200324finished
--------------------------------------------------------------------------------------------
20200325
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
2501142,20200325.0,1600731,144437000000,5668254,3.577929e+07,6.32,6.31,6.30,6.29,6.28,6.27,25700,34800,15300,56200,58000,6.32,6.33,6.34,6.35,6.36,31100,34100,45000,46300,43100,6.38,1.600731e+10,1.585119e+15,118364612.0
2501143,20200325.0,1600731,144437000000,5668254,3.577929e+07,6.32,6.31,6.30,6.29,6.28,6.27,25700,34800,15300,56200,58000,6.32,6.33,6.34,6.35,6.36,31100,34100,45000,46300,43100,6.38,1.600731e+10,1.585119e+15,118364615.0
4012759,20200325.0,1601860,150006000000,59000751,2.604602e+08,4.44,4.43,4.42,4.41,4.40,4.39,505800,307300,189600,586600,145500,4.44,4.45,4.46,4.47,4.48,782842,437200,387100,575800,259800,4.39,1.601861e+10,1.585120e+15,126969560.0
4012760,20200325.0,1601860,150006000000,59000751,2.604602e+08,4.44,4.43,4.42,4.41,4.40,4.39,505800,307300,189600,586600,145500,4.44,4.45,4.46,4.47,4.48,782842,437200,387100,575800,259800,4.39,1.601861e+10,1.585120e+15,126969673.0
4012761,20200325.0,1601860,150006000000,59000751,2.604602e+08,4.44,4.43,4.42,4.41,4.40,4.39,505800,307300,189600,586600,145500,4.44,4.45,4.46,4.47,4.48,782842,437200,387100,575800,259800,4.39,1.601861e+10,1.585120e+15,126969560.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6429487,20200325.0,1688288,150006000000,1165852,3.941188e+07,33.63,33.62,33.55,33.50,33.49,33.48,58,620,3900,1200,1000,33.63,33.64,33.65,33.67,33.68,18505,5033,900,200,2708,33.93,1.688288e+10,1.585120e+15,126969689.0
6445664,20200325.0,1688321,150006000000,2489904,1.190288e+08,47.30,47.30,47.20,47.14,47.13,47.12,3978,10435,9000,5778,837,47.31,47.32,47.33,47.34,47.37,4058,1400,2300,2000,200,48.00,1.688321e+10,1.585120e+15,126969576.0
6445665,20200325.0,1688321,150006000000,2489904,1.190288e+08,47.30,47.30,47.20,47.14,47.13,47.12,3978,10435,9000,5778,837,47.31,47.32,47.33,47.34,47.37,4058,1400,2300,2000,200,48.00,1.688321e+10,1.585120e+15,126969690.0
6445666,20200325.0,1688321,150006000000,2489904,1.190288e+08,47.30,47.30,47.20,47.14,47.13,47.12,3978,10435,9000,5778,837,47.31,47.32,47.33,47.34,47.37,4058,1400,2300,2000,200,48.00,1.688321e+10,1.585120e+15,126969576.0


'1.72%'

-------------------------------------------------------------------------------------------
SZ lv2


8624189

8427489

8624189

8624189

8427489

SZ lv2 is complete


'2.28%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.39%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.48%'

-----------------------------------------------------------------------------------------------------
SH index data


820343

820343

820343

14724

15757

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'7%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:38.201417
0:01:16.095061


0

112723187

112723187

112723187

20200325finished
--------------------------------------------------------------------------------------------
20200326
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.85%'

-------------------------------------------------------------------------------------------
SZ lv2


8520820

8337503

8520820

8520820

8337503

SZ lv2 is complete


'2.15%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.62%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.43%'

-----------------------------------------------------------------------------------------------------
SH index data


737753

737753

737752

14335

15942

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'8%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:41.631435
0:01:20.196447


0

105998085

105998085

105998085

20200326finished
--------------------------------------------------------------------------------------------
20200327
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.95%'

-------------------------------------------------------------------------------------------
SZ lv2


8415183

8220830

8415183

8415183

8220830

SZ lv2 is complete


'2.31%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.67%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.63%'

-----------------------------------------------------------------------------------------------------
SH index data


927868

927868

927868

14704

17000

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:23.880232
0:00:57.052698


0

103289119

103289119

103289119

20200327finished
--------------------------------------------------------------------------------------------
20200330
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.90%'

-------------------------------------------------------------------------------------------
SZ lv2


8490805

8277813

8490805

8490805

8277813

SZ lv2 is complete


'2.51%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.52%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.52%'

-----------------------------------------------------------------------------------------------------
SH index data


894338

894338

894335

14713

16733

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'3%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:13.405810
0:01:15.706652


0

105384922

105384922

105384922

20200330finished
--------------------------------------------------------------------------------------------
20200331
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
3592194,20200331.0,1601512,93841000000,244600,2627930.0,10.72,10.72,10.71,10.7,10.69,10.68,14200,20800,22200,12200,45100,10.73,10.74,10.75,10.76,10.77,15700,16700,3100,6500,7700,10.78,16015120000.0,1585619000000000.0,12803535.0
3592195,20200331.0,1601512,93841000000,244600,2627930.0,10.72,10.72,10.71,10.7,10.69,10.68,14200,20800,22200,12200,45100,10.73,10.74,10.75,10.76,10.77,15700,16700,3100,6500,7700,10.78,16015120000.0,1585619000000000.0,12803536.0


'1.57%'

-------------------------------------------------------------------------------------------
SZ lv2


8220566

8044639

8220566

8220566

8044639

SZ lv2 is complete


'2.14%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.00%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.00%'

-----------------------------------------------------------------------------------------------------
SH index data


703592

703185

703235

14344

16001

'99.95%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'10%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:16.320976
0:00:58.269683


0

97389746

97389746

97389746

20200331finished
--------------------------------------------------------------------------------------------
20200401
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.67%'

-------------------------------------------------------------------------------------------
SZ lv2


8245527

8066644

8245527

8245527

8066644

SZ lv2 is complete


'2.17%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.02%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.00%'

-----------------------------------------------------------------------------------------------------
SH index data


883212

882831

882853

14724

16509

'99.96%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'10%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:27.020222
0:01:00.651081


0

99998909

99998909

99998909

20200401finished
--------------------------------------------------------------------------------------------
20200402
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.57%'

-------------------------------------------------------------------------------------------
SZ lv2


8263481

8087477

8263481

8263481

8087477

SZ lv2 is complete


'2.13%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.00%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.00%'

-----------------------------------------------------------------------------------------------------
SH index data


724289

724289

724286

14316

16444

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:12.826786
0:00:56.325085


0

100542072

100542072

100542072

20200402finished
--------------------------------------------------------------------------------------------
20200403
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.60%'

-------------------------------------------------------------------------------------------
SZ lv2


8185931

8013272

8185931

8185931

8013272

SZ lv2 is complete


'2.11%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.02%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.04%'

-----------------------------------------------------------------------------------------------------
SH index data


917997

917997

917997

14712

17233

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'3%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:02.634580
0:00:53.081566


0

98366428

98366428

98366428

20200403finished
--------------------------------------------------------------------------------------------
20200407
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.57%'

-------------------------------------------------------------------------------------------
SZ lv2


8597619

8419411

8597619

8597619

8419411

SZ lv2 is complete


'2.07%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.01%'

--------------------------------------------------------------------------------------------------
SZ order data


In [2]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()


import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)


startDate = 20200430
endDate = 20200430
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
test = db.read('md_index', start_date=startDate, end_date=endDate, symbol=[1000300])
date_list = test['date'].unique()
del test

new_trade_data = []
new_order_data = []

for i in date_list:
    print('--------------------------------------------------------------------------------------------')
    print(i)
    print('SH lv2')
    startDate = str(i)
    endDate = str(i)
    db = DB("192.168.10.178", database_name, user, password)
    SH = db.read('md_snapshot_l2', start_date=startDate, end_date=endDate)
    SZ = SH[SH['skey'] > 2000000]
    SH = SH[SH['skey'] < 2000000]
    SH['num'] = SH['skey'] * 10000 + SH['ordering']
    SZ['num'] = SZ['skey'] * 10000 + SZ['ordering']
    
    SH = SH[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]
    SZ = SZ[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SH_***'))
        SH1 = pd.read_csv(path1[0])
        index1 = SH1[SH1['StockID'].isin([16, 300, 852, 905])]
        SH1 = SH1[SH1['source'] == 4]

        SH1['skey'] = SH1['StockID'] + 1000000
        SH1 = SH1.rename(columns={"openPrice":"open"})
        SH1["open"] = np.where(SH1["cum_volume"] > 0, SH1.groupby("skey")["open"].transform("max"), SH1["open"])
        SH1["time"] = SH1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SH1 = SH1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount', "close", 'open']:
        SH1[cols] = SH1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SH1 = SH1[SH1['skey'].isin(SH['skey'].unique())]
    re = pd.merge(SH, SH1, on=cols, how='outer')

    p21 = re[(re['date'].isnull())][['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    p22 = re[(re['sequenceNo'].isnull())][["skey", "date", "time", 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    p11 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)]
    p12 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())].drop_duplicates(['num'], keep=False)
    p11 = p11.sort_values(by=['num', 'sequenceNo'])
    display(p11)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p11_1 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)].drop_duplicates('num')
    p11_1 = pd.merge(p11_1, p11[['num', 'order1']], on='num', how='left')
    p11_1 = p11_1[p11_1['order1'].isnull()]
    p11_1['sequenceNo'] = np.nan
    p11_1['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p11_1.drop(['order1'],axis=1,inplace=True)
    p11 = pd.concat([p11, p11_1])
    
    p1 = pd.concat([p11, p12])
    p2 = pd.merge(p22, p21[['skey', 'time', 'clockAtArrival', 'sequenceNo']], on=['skey', 'time'], how='left')
    re1 = pd.concat([p1, p2])
    re1 = re1.sort_values(by='num')
    re1['seq1'] = re1.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(SH['skey'].unique()) - set(SH1['skey'].unique()))
    re1.loc[re1['skey'].isin(sl), 'seq1'] = np.nan
    re1['count1'] = re1.groupby(['seq1']).cumcount()
    re1['count2'] = re1.groupby(['seq1'])['count1'].transform('nunique')
    re1['min_seq'] = re1.groupby('skey')['sequenceNo'].transform('min')
    re1['count'] = np.where(re1['seq1'] != re1['min_seq'], re1['count1'], re1['count1']+1-re1['count2'])
    re1.drop(["min_seq"],axis=1,inplace=True)
    re1.drop(["count1"],axis=1,inplace=True)
    re1.drop(["count2"],axis=1,inplace=True)
    re1['dup'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo']).cumcount(), 0)
    re1['dup1'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
    re1['nan'] = np.where((re1['sequenceNo'].isnull()) | (re1['dup'] != 0), 1, 0)
    re1.loc[(re1['dup1'] > 1) & (re1['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re1[re1['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re1[re1['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re1.shape[0] == SH.shape[0])

    display('%.2f%%' % (re1[re1['sequenceNo'].isnull()].shape[0]/re1.shape[0] * 100))
    
    
    
    
    
    
    
    print('-------------------------------------------------------------------------------------------')
    print('SZ lv2')
    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SZ_***'))
        SZ1 = pd.read_csv(path1[0])
        SZ1 = SZ1[SZ1['source'] == 4]

        SZ1['skey'] = SZ1['StockID'] + 2000000
        SZ1 = SZ1.rename(columns={"openPrice":"open"})
        SZ1["open"] = np.where(SZ1["cum_volume"] > 0, SZ1.groupby("skey")["open"].transform("max"), SZ1["open"])
        SZ1["time"] = SZ1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SZ1 = SZ1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount']:
        SZ1[cols] = SZ1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SZ1 = SZ1[SZ1['skey'].isin(SZ['skey'].unique())]
    re = pd.merge(SZ, SZ1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(SZ.shape[0])
    display(SZ1.shape[0])

    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('SZ lv2 is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        print('92 have unique values not shared by database')
        re = pd.merge(SZ, SZ1, on=cols, how='left')

    if re[re.duplicated('num', keep=False)].shape[0] == 0:
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))


    else:
        p1 = re[re['num'].duplicated(keep=False)]
        p2 = re.drop_duplicates(['num'], keep=False)
        p1["order1"] = p1.groupby(["num"]).cumcount()
        p1["order2"] = p1.groupby(["sequenceNo"]).cumcount()
        p1 = p1[p1['order1'] == p1['order2']]
        p1.drop(['order1', 'order2'],axis=1,inplace=True)
        re = pd.concat([p1, p2])
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))
    
    print('----------------------------------------------------------------------------------------------')
    print('SH & SZ trade')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    trade = db.read('md_trade', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdTradeLog***'))
        trade1 = pd.read_csv(path1[0])
    trade1['skey'] = np.where(trade1['exchId'] == 2, trade1['SecurityID'] + 2000000, trade1['SecurityID'] + 1000000)
    trade1 = trade1[trade1['skey'].isin(trade['skey'].unique())]
    re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == trade.shape[0])
        display('trade data is complete')
        k = 0
    except:
        display('%.2f%%' % (trade.shape[0]/re.shape[0] * 100))
        k = 1
        display('trade data incomplete')
        k1 = pd.merge(trade1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k1.shape[0])
        display(k1['ExecType'].unique())
        display(k1['TransactTime'].unique())
        k1['date'] = trade['date'].iloc[0]
        new_trade_data += [k1[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'ExecType', 'TradeBSFlag', 
       'TradePrice', 'TradeQty', 'BidApplSeqNum', 'OfferApplSeqNum']]]
        re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == trade.shape[0])

    re3 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re3['seq1'] = re3.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(trade['skey'].unique()) - set(trade1['skey'].unique()))
    re3.loc[re3['skey'].isin(sl), 'seq1'] = np.nan
    re3['count1'] = re3.groupby(['seq1']).cumcount()
    re3['count2'] = re3.groupby(['seq1'])['count1'].transform('nunique')
    re3['min_seq'] = re3.groupby('skey')['sequenceNo'].transform('min')
    re3['count'] = np.where(re3['seq1'] != re3['min_seq'], re3['count1'], re3['count1']+1-re3['count2'])
    re3.drop(["min_seq"],axis=1,inplace=True)
    re3.drop(["count1"],axis=1,inplace=True)
    re3.drop(["count2"],axis=1,inplace=True)
    re3['dup'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo']).cumcount(), 0)
    re3['dup1'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re3['nan'] = np.where((re3['sequenceNo'].isnull()) | (re3['dup'] != 0), 1, 0)
    re3.loc[(re3['dup1'] > 1) & (re3['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re3[re3['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re3[re3['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re3.shape[0] == trade.shape[0])
    if k == 1:
        k1['seq1'] = k1['sequenceNo']
        k1['count'] = 0
        k1['nan'] = 0
        k1['dup1'] = 1
        re3 = pd.concat([re3, k1[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', 'dup1']]])

    display('%.2f%%' % (re3[re3['sequenceNo'].isnull()].shape[0]/re3.shape[0] * 100))

    
    print('--------------------------------------------------------------------------------------------------')
    print('SZ order data')

    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    order = db.read('md_order', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdOrderLog***'))
        order1 = pd.read_csv(path1[0])
    order1['skey'] = order1['SecurityID'] + 2000000
    order1 = order1[order1['skey'].isin(order['skey'].unique())]
    re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == order.shape[0])
        display('order data is complete')
        k = 0
    except:
        display('%.2f%%' % (order.shape[0]/re.shape[0] * 100))
        k = 1
        display('order data incomplete')
        k2 = pd.merge(order1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k2.shape[0])
        display(k2['SecurityID'].unique())
        display(k2['TransactTime'].unique())
        k2['date'] = order['date'].iloc[0]
        new_order_data += [k2[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'Side', 
       'OrderType', 'Price', 'OrderQty']]]
        re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == order.shape[0])

    re4 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re4['seq1'] = re4.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(order['skey'].unique()) - set(order1['skey'].unique()))
    re4.loc[re4['skey'].isin(sl), 'seq1'] = np.nan
    re4['count1'] = re4.groupby(['seq1']).cumcount()
    re4['count2'] = re4.groupby(['seq1'])['count1'].transform('nunique')
    re4['min_seq'] = re4.groupby('skey')['sequenceNo'].transform('min')
    re4['count'] = np.where(re4['seq1'] != re4['min_seq'], re4['count1'], re4['count1']+1-re4['count2'])
    re4.drop(["min_seq"],axis=1,inplace=True)
    re4.drop(["count1"],axis=1,inplace=True)
    re4.drop(["count2"],axis=1,inplace=True)
    re4['dup'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo']).cumcount(), 0)
    re4['dup1'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re4['nan'] = np.where((re4['sequenceNo'].isnull()) | (re4['dup'] != 0), 1, 0)
    re4.loc[(re4['dup1'] > 1) & (re4['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re4[re4['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re4[re4['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re4.shape[0] == order.shape[0])
    if k == 1:
        k2['seq1'] = k2['ApplSeqNum']
        k2['count'] = 0
        k2['nan'] = 0
        k2['dup1'] = 1
        re4 = pd.concat([re4, k2[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', "dup1"]]])


    display('%.2f%%' % (re4[re4['sequenceNo'].isnull()].shape[0]/re4.shape[0] * 100))
    
    print('-----------------------------------------------------------------------------------------------------')
    print('SH index data')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    index = db.read('md_index', start_date=startDate, end_date=endDate)

    index1['skey'] = index1['StockID'] + 1000000
    index1 = index1.rename(columns={"openPrice":"open"})
    index1["open"] = np.where(index1["cum_volume"] > 0, index1.groupby("skey")["open"].transform("max"), index1["open"])
    index1['close'] = np.where(index1['cum_volume'] == 0, 0, index1['close'])
    index1["time"] = index1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)
    index['close'] = np.where(index['cum_volume'] == 0, 0, index['close'])
    index['num'] = index['skey'] * 10000 + index['ordering']
    index = index[['skey', 'date', 'cum_volume', 'cum_amount', "close", "open", 'num']]
    index1 = index1[['clockAtArrival', 'sequenceNo', 'skey', 'cum_volume', 'cum_amount', "close", "open", "time"]]
    for cols in ['cum_amount']:
        index1[cols] = index1[cols].round(1)
    cols = ['skey', 'cum_volume', 'cum_amount', "close", "open"]
    index1 = index1[index1['skey'].isin(index['skey'].unique())]
    re = pd.merge(index, index1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(index.shape[0])
    display(index1.shape[0])
    
    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('index data is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        re = pd.merge(index, index1, on=cols, how='left')
        print('92 have unique values not shared by database')

    p11 = re[re.duplicated('num', keep=False)]
    p2 = re.drop_duplicates('num', keep=False)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p12 = re[re.duplicated('num', keep=False)].drop_duplicates('num')
    p12 = pd.merge(p12, p11[['num', 'order1']], on='num', how='left')
    p12 = p12[p12['order1'].isnull()]
    p12['sequenceNo'] = np.nan
    p12['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p12.drop(['order1'],axis=1,inplace=True)
    p1 = pd.concat([p11, p12])

    re = pd.concat([p1, p2])
    assert(re[re.duplicated('num', keep=False)].shape[0] == 0)

    if re[re['sequenceNo'].isnull()].shape[0] != 0:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    else:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5['sequenceNo']
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    
    
    print('----------------------------------------------------------------------------------------------------')
    print('final concat')
    try:
        assert(len(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
        display(SH1[SH1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(index1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(index1[index1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])

    del SH
    del SH1
    del SZ
    del SZ1
    del trade
    del trade1
    del order
    del order1
    del index
    del index1
    re1['tag'] = 'SH'
    re2['tag'] = 'SZ'
    re3['tag'] = 'trade'
    re4['tag'] = 'order'
    re5['tag'] = 'index'
    
    re1 = re1[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re2 = re2[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re3 = re3[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re4 = re4[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re5 = re5[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re1 = re1.sort_values(by='num').reset_index(drop=True)
    re1['seq2'] = re1.index
    re2 = re2.sort_values(by='num').reset_index(drop=True)
    re2['seq2'] = re2.index
    re3 = re3.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re3['seq2'] = re3.index
    re4 = re4.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re4['seq2'] = re4.index
    re5 = re5.sort_values(by='num').reset_index(drop=True)
    re5['seq2'] = re5.index

    fr1 = []
    fr2 = []
    fr1 += [re1[re1['seq1'].isnull()]]
    fr2 += [re1[~re1['seq1'].isnull()]]
    del re1
    display('1. here~')
    fr1 += [re2[re2['seq1'].isnull()]]
    fr2 += [re2[~re2['seq1'].isnull()]]
    del re2
    display('2. here~')
    fr1 += [re3[re3['seq1'].isnull()]]
    fr2 += [re3[~re3['seq1'].isnull()]]
    del re3
    display('3. here~')
    fr1 += [re4[re4['seq1'].isnull()]]
    fr2 += [re4[~re4['seq1'].isnull()]]
    del re4
    display('4. here~')
    fr1 += [re5[re5['seq1'].isnull()]]
    fr2 += [re5[~re5['seq1'].isnull()]]
    del re5
    display('5. here~')
    fr1 = pd.concat(fr1).reset_index(drop=True)
    fr2 = pd.concat(fr2).reset_index(drop=True)
    
    startTm = datetime.datetime.now()
    fr2 = fr2.sort_values(by=['seq1', 'seq2'])
    print(datetime.datetime.now() - startTm)

#     fr2.loc[(fr2['nan']==0) & (fr2['dup1']==1), 'count'] = 0
    fr2['sum_nan'] = fr2['nan'].cumsum()
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['sum_nan']
    startTm = datetime.datetime.now()
    fr2['sequenceNo'] = fr2.groupby('seq1')['sequenceNo'].ffill().bfill()
    print(datetime.datetime.now() - startTm)
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['count']
    fr21 = fr2[~fr2['sequenceNo'].isnull()]
    fr22 = fr2[fr2['sequenceNo'].isnull()]
    display(fr22.shape[0])
    display(fr21.shape[0])
    display(fr2.shape[0])
    if fr22.shape[0] != 0:
        fr22['sequenceNo'] = range(int(fr21['sequenceNo'].max()) + 1, int(fr21['sequenceNo'].max()) + 1 + fr22.shape[0])
        fr2 = pd.concat([fr21, fr22])
    del fr21
    del fr22
    display(fr2.shape[0])
    try:
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    except:
        te_st = fr2[fr2.duplicated('sequenceNo', keep=False)]
        display(te_st)
        caa = te_st['clockAtArrival'].max()
        seq = te_st['sequenceNo'].iloc[0]
        m_in = fr2[fr2['sequenceNo'] > seq]['sequenceNo'].min()
        if m_in > seq + 1:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 1
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        else:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 2
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
        
    
    fr1['sequenceNo'] = range(int(fr2['sequenceNo'].max()) + 1, int(fr2['sequenceNo'].max()) + 1 + fr1.shape[0])
    fr2 = pd.concat([fr1, fr2])
    del fr1
    assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    
    import pickle
    os.mkdir('/mnt/e/result/' + startDate)
    SH = fr2[fr2['tag'] == 'SH'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SH.to_pickle('/mnt/e/result/' + startDate + '/SH.pkl')
    del SH

    SZ = fr2[fr2['tag'] == 'SZ'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SZ.to_pickle('/mnt/e/result/' + startDate + '/SZ.pkl')
    del SZ
    
    trade = fr2[fr2['tag'] == 'trade'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    trade.to_pickle('/mnt/e/result/' + startDate + '/trade.pkl')
    del trade
    
    order = fr2[fr2['tag'] == 'order'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    order.to_pickle('/mnt/e/result/' + startDate + '/order.pkl')
    del order
    
    index = fr2[fr2['tag'] == 'index'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    index.to_pickle('/mnt/e/result/' + startDate + '/index.pkl')
    del index
    del fr2
    
    print(str(i) + 'finished')


--------------------------------------------------------------------------------------------
20200409
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
915956,20200409.0,1600261,150017000000,6894152,25530151.38,3.71,3.7,3.69,3.68,3.67,3.66,37700,224400,365000,88000,61800,3.71,3.72,3.73,3.74,3.75,402400,215200,220450,297000,190300,3.69,16002610000.0,1586416000000000.0,118347896.0
915957,20200409.0,1600261,150017000000,6894152,25530151.38,3.71,3.7,3.69,3.68,3.67,3.66,37700,224400,365000,88000,61800,3.71,3.72,3.73,3.74,3.75,402400,215200,220450,297000,190300,3.69,16002610000.0,1586416000000000.0,118353686.0
915958,20200409.0,1600261,150017000000,6894152,25530151.38,3.71,3.7,3.69,3.68,3.67,3.66,37700,224400,365000,88000,61800,3.71,3.72,3.73,3.74,3.75,402400,215200,220450,297000,190300,3.69,16002610000.0,1586416000000000.0,118347896.0
915959,20200409.0,1600261,150017000000,6894152,25530151.38,3.71,3.7,3.69,3.68,3.67,3.66,37700,224400,365000,88000,61800,3.71,3.72,3.73,3.74,3.75,402400,215200,220450,297000,190300,3.69,16002610000.0,1586416000000000.0,118353686.0
1184557,20200409.0,1600339,150017000000,20183778,56288160.84,2.79,2.79,2.78,2.77,2.76,2.75,168200,233000,706100,388400,394900,2.8,2.81,2.82,2.83,2.84,517102,685020,1450000,608600,465700,2.81,16003390000.0,1586416000000000.0,118347800.0
1184558,20200409.0,1600339,150017000000,20183778,56288160.84,2.79,2.79,2.78,2.77,2.76,2.75,168200,233000,706100,388400,394900,2.8,2.81,2.82,2.83,2.84,517102,685020,1450000,608600,465700,2.81,16003390000.0,1586416000000000.0,118353687.0
1184559,20200409.0,1600339,150017000000,20183778,56288160.84,2.79,2.79,2.78,2.77,2.76,2.75,168200,233000,706100,388400,394900,2.8,2.81,2.82,2.83,2.84,517102,685020,1450000,608600,465700,2.81,16003390000.0,1586416000000000.0,118347800.0
1184560,20200409.0,1600339,150017000000,20183778,56288160.84,2.79,2.79,2.78,2.77,2.76,2.75,168200,233000,706100,388400,394900,2.8,2.81,2.82,2.83,2.84,517102,685020,1450000,608600,465700,2.81,16003390000.0,1586416000000000.0,118353687.0
1265543,20200409.0,1600363,150017000000,5514377,78881389.1,14.27,14.27,14.26,14.24,14.23,14.22,15500,38700,300,45900,253400,14.28,14.29,14.3,14.31,14.32,9800,18100,22400,13400,4600,14.25,16003630000.0,1586416000000000.0,118347901.0
1265544,20200409.0,1600363,150017000000,5514377,78881389.1,14.27,14.27,14.26,14.24,14.23,14.22,15500,38700,300,45900,253400,14.28,14.29,14.3,14.31,14.32,9800,18100,22400,13400,4600,14.25,16003630000.0,1586416000000000.0,118353688.0


'1.76%'

-------------------------------------------------------------------------------------------
SZ lv2


8490931

8317376

8490931

8490931

8317376

SZ lv2 is complete


'2.04%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.29%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.06%'

-----------------------------------------------------------------------------------------------------
SH index data


739995

739995

739993

14342

16236

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:06:00.772709
0:01:23.555088


0

104325225

104325225

104325225

20200409finished
--------------------------------------------------------------------------------------------
20200410
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
349051,20200410.0,1600098,150005000000,2492205,14481090.0,5.78,5.77,5.76,5.75,5.74,5.73,48300,19400,20700,2700,28700,5.78,5.79,5.8,5.81,5.82,17655,26300,26800,6192,15326,5.88,16000980000.0,1586502000000000.0,126706044.0
349052,20200410.0,1600098,150005000000,2492205,14481090.0,5.78,5.77,5.76,5.75,5.74,5.73,48300,19400,20700,2700,28700,5.78,5.79,5.8,5.81,5.82,17655,26300,26800,6192,15326,5.88,16000980000.0,1586502000000000.0,126712125.0
349053,20200410.0,1600098,150005000000,2492205,14481090.0,5.78,5.77,5.76,5.75,5.74,5.73,48300,19400,20700,2700,28700,5.78,5.79,5.8,5.81,5.82,17655,26300,26800,6192,15326,5.88,16000980000.0,1586502000000000.0,126706044.0
349054,20200410.0,1600098,150005000000,2492205,14481090.0,5.78,5.77,5.76,5.75,5.74,5.73,48300,19400,20700,2700,28700,5.78,5.79,5.8,5.81,5.82,17655,26300,26800,6192,15326,5.88,16000980000.0,1586502000000000.0,126712125.0
4341739,20200410.0,1603037,150005000000,1461130,23227430.0,15.78,15.76,15.75,15.74,15.72,15.71,3100,1500,17700,3200,4500,15.78,15.79,15.8,15.81,15.83,400,1800,10100,5800,4200,16.12,16030370000.0,1586502000000000.0,126706052.0
4341740,20200410.0,1603037,150005000000,1461130,23227430.0,15.78,15.76,15.75,15.74,15.72,15.71,3100,1500,17700,3200,4500,15.78,15.79,15.8,15.81,15.83,400,1800,10100,5800,4200,16.12,16030370000.0,1586502000000000.0,126712127.0
4341741,20200410.0,1603037,150005000000,1461130,23227430.0,15.78,15.76,15.75,15.74,15.72,15.71,3100,1500,17700,3200,4500,15.78,15.79,15.8,15.81,15.83,400,1800,10100,5800,4200,16.12,16030370000.0,1586502000000000.0,126706052.0
4341742,20200410.0,1603037,150005000000,1461130,23227430.0,15.78,15.76,15.75,15.74,15.72,15.71,3100,1500,17700,3200,4500,15.78,15.79,15.8,15.81,15.83,400,1800,10100,5800,4200,16.12,16030370000.0,1586502000000000.0,126712127.0
5086021,20200410.0,1603421,150005000000,3825036,58528850.0,15.14,15.14,15.13,15.12,15.11,15.1,900,25500,11500,14700,8800,15.15,15.16,15.17,15.18,15.19,9500,3000,46600,34000,3900,15.7,16034210000.0,1586502000000000.0,126706288.0
5086022,20200410.0,1603421,150005000000,3825036,58528850.0,15.14,15.14,15.13,15.12,15.11,15.1,900,25500,11500,14700,8800,15.15,15.16,15.17,15.18,15.19,9500,3000,46600,34000,3900,15.7,16034210000.0,1586502000000000.0,126712128.0


'1.80%'

-------------------------------------------------------------------------------------------
SZ lv2


8753669

8575491

8753669

8753669

8575491

SZ lv2 is complete


'2.04%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.16%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.07%'

-----------------------------------------------------------------------------------------------------
SH index data


749061

749061

749057

14333

17451

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'2%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:45.410072
0:01:17.317246


0

112283446

112283446

112283446

20200410finished
--------------------------------------------------------------------------------------------
20200413
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
27715,20200413.0,1600009,150015000000,5592109,361352600.0,64.9,64.89,64.88,64.87,64.86,64.85,5700,100,4000,1500,23300,64.9,64.97,64.98,64.99,65.0,12600,400,500,400,3000,64.95,16000090000.0,1586761000000000.0,104127322.0
27716,20200413.0,1600009,150015000000,5592109,361352600.0,64.9,64.89,64.88,64.87,64.86,64.85,5700,100,4000,1500,23300,64.9,64.97,64.98,64.99,65.0,12600,400,500,400,3000,64.95,16000090000.0,1586761000000000.0,104133261.0
27717,20200413.0,1600009,150015000000,5592109,361352600.0,64.9,64.89,64.88,64.87,64.86,64.85,5700,100,4000,1500,23300,64.9,64.97,64.98,64.99,65.0,12600,400,500,400,3000,64.95,16000090000.0,1586761000000000.0,104127322.0
27718,20200413.0,1600009,150015000000,5592109,361352600.0,64.9,64.89,64.88,64.87,64.86,64.85,5700,100,4000,1500,23300,64.9,64.97,64.98,64.99,65.0,12600,400,500,400,3000,64.95,16000090000.0,1586761000000000.0,104133261.0
877294,20200413.0,1600261,150015000000,4596151,16539490.0,3.59,3.59,3.58,3.57,3.56,3.55,262500,353499,144600,118700,114800,3.6,3.61,3.62,3.63,3.64,105200,139400,81200,79800,79000,3.59,16002610000.0,1586761000000000.0,104127286.0
877295,20200413.0,1600261,150015000000,4596151,16539490.0,3.59,3.59,3.58,3.57,3.56,3.55,262500,353499,144600,118700,114800,3.6,3.61,3.62,3.63,3.64,105200,139400,81200,79800,79000,3.59,16002610000.0,1586761000000000.0,104133262.0
877296,20200413.0,1600261,150015000000,4596151,16539490.0,3.59,3.59,3.58,3.57,3.56,3.55,262500,353499,144600,118700,114800,3.6,3.61,3.62,3.63,3.64,105200,139400,81200,79800,79000,3.59,16002610000.0,1586761000000000.0,104127286.0
877297,20200413.0,1600261,150015000000,4596151,16539490.0,3.59,3.59,3.58,3.57,3.56,3.55,262500,353499,144600,118700,114800,3.6,3.61,3.62,3.63,3.64,105200,139400,81200,79800,79000,3.59,16002610000.0,1586761000000000.0,104133262.0
1225727,20200413.0,1600368,150015000000,4955550,18749040.0,3.78,3.77,3.76,3.75,3.74,3.73,107000,120800,280500,74600,48300,3.78,3.79,3.8,3.81,3.82,79975,35800,50937,71700,125080,3.8,16003680000.0,1586761000000000.0,104127291.0
1225728,20200413.0,1600368,150015000000,4955550,18749040.0,3.78,3.77,3.76,3.75,3.74,3.73,107000,120800,280500,74600,48300,3.78,3.79,3.8,3.81,3.82,79975,35800,50937,71700,125080,3.8,16003680000.0,1586761000000000.0,104133263.0


'1.97%'

-------------------------------------------------------------------------------------------
SZ lv2


8046860

7854926

8046860

8046860

7854926

SZ lv2 is complete


'2.39%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.13%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.05%'

-----------------------------------------------------------------------------------------------------
SH index data


924885

924885

924885

15208

17352

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'3%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:02.651742
0:00:49.564572


0

90888642

90888642

90888642

20200413finished
--------------------------------------------------------------------------------------------
20200414
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.79%'

-------------------------------------------------------------------------------------------
SZ lv2


8068637

7982114

8068637

8068637

7982114

SZ lv2 is complete


'1.07%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.14%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.12%'

-----------------------------------------------------------------------------------------------------
SH index data


876389

876389

876389

14708

16645

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:06.624311
0:00:52.510501


0

97139734

97139734

97139734

20200414finished
--------------------------------------------------------------------------------------------
20200415
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.78%'

-------------------------------------------------------------------------------------------
SZ lv2


8307900

8213097

8307900

8307900

8213097

SZ lv2 is complete


'1.14%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.21%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.22%'

-----------------------------------------------------------------------------------------------------
SH index data


944526

944526

944526

15188

16855

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:16.382316
0:01:15.540409


0

105995241

105995241

105995241

20200415finished
--------------------------------------------------------------------------------------------
20200416
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
3048341,20200416.0,1600958,150006000000,24291131,230542500.0,9.49,9.48,9.47,9.46,9.45,9.44,11600,116723,136700,118500,143600,9.49,9.5,9.51,9.52,9.53,172600,529100,55500,37100,117100,9.41,16009580000.0,1587020000000000.0,116247614.0
3048342,20200416.0,1600958,150006000000,24291131,230542500.0,9.49,9.48,9.47,9.46,9.45,9.44,11600,116723,136700,118500,143600,9.49,9.5,9.51,9.52,9.53,172600,529100,55500,37100,117100,9.41,16009580000.0,1587020000000000.0,116253620.0
3048343,20200416.0,1600958,150006000000,24291131,230542500.0,9.49,9.48,9.47,9.46,9.45,9.44,11600,116723,136700,118500,143600,9.49,9.5,9.51,9.52,9.53,172600,529100,55500,37100,117100,9.41,16009580000.0,1587020000000000.0,116247614.0
3048344,20200416.0,1600958,150006000000,24291131,230542500.0,9.49,9.48,9.47,9.46,9.45,9.44,11600,116723,136700,118500,143600,9.49,9.5,9.51,9.52,9.53,172600,529100,55500,37100,117100,9.41,16009580000.0,1587020000000000.0,116253620.0
4696214,20200416.0,1603322,150006000000,2404612,44243630.0,18.53,18.53,18.51,18.5,18.49,18.48,24604,1400,4800,6400,7000,18.55,18.56,18.57,18.59,18.6,700,3900,3500,2000,13200,18.57,16033220000.0,1587020000000000.0,116247532.0
4696215,20200416.0,1603322,150006000000,2404612,44243630.0,18.53,18.53,18.51,18.5,18.49,18.48,24604,1400,4800,6400,7000,18.55,18.56,18.57,18.59,18.6,700,3900,3500,2000,13200,18.57,16033220000.0,1587020000000000.0,116253622.0
4696216,20200416.0,1603322,150006000000,2404612,44243630.0,18.53,18.53,18.51,18.5,18.49,18.48,24604,1400,4800,6400,7000,18.55,18.56,18.57,18.59,18.6,700,3900,3500,2000,13200,18.57,16033220000.0,1587020000000000.0,116247532.0
4696217,20200416.0,1603322,150006000000,2404612,44243630.0,18.53,18.53,18.51,18.5,18.49,18.48,24604,1400,4800,6400,7000,18.55,18.56,18.57,18.59,18.6,700,3900,3500,2000,13200,18.57,16033220000.0,1587020000000000.0,116253622.0
4941244,20200416.0,1603505,150006000000,4860468,107171500.0,22.17,22.16,22.15,22.14,22.13,22.12,6700,13000,15200,11000,18500,22.17,22.18,22.2,22.21,22.22,2024,1000,3600,3900,130300,21.48,16035050000.0,1587020000000000.0,116247664.0
4941245,20200416.0,1603505,150006000000,4860468,107171500.0,22.17,22.16,22.15,22.14,22.13,22.12,6700,13000,15200,11000,18500,22.17,22.18,22.2,22.21,22.22,2024,1000,3600,3900,130300,21.48,16035050000.0,1587020000000000.0,116253623.0


'1.66%'

-------------------------------------------------------------------------------------------
SZ lv2


8248205

8166252

8248205

8248205

8166252

SZ lv2 is complete


'0.99%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.33%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.12%'

-----------------------------------------------------------------------------------------------------
SH index data


991994

991994

991994

14760

17216

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'4%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:38.564865
0:00:56.570018


0

102391778

102391778

102391778

20200416finished
--------------------------------------------------------------------------------------------
20200417
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.60%'

-------------------------------------------------------------------------------------------
SZ lv2


8342871

8252640

8342871

8342871

8252640

SZ lv2 is complete


'1.08%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.41%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.38%'

-----------------------------------------------------------------------------------------------------
SH index data


939237

939237

939237

14660

16445

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'7%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:40.415830
0:01:17.831287


0

112390410

112390410

112390410

20200417finished
--------------------------------------------------------------------------------------------
20200420
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
162014,20200420.0,1600053,124245000000,1588027,39902645.31,25.20,25.17,25.16,25.15,25.14,25.11,600,4300,5100,1600,2500,25.20,25.21,25.22,25.23,25.24,400,100,900,3600,2300,25.05,1.600053e+10,1.587358e+15,65359575.0
162015,20200420.0,1600053,124245000000,1588027,39902645.31,25.20,25.17,25.16,25.15,25.14,25.11,600,4300,5100,1600,2500,25.20,25.21,25.22,25.23,25.24,400,100,900,3600,2300,25.05,1.600053e+10,1.587358e+15,65359638.0
162016,20200420.0,1600053,124245000000,1588027,39902645.31,25.20,25.17,25.16,25.15,25.14,25.11,600,4300,5100,1600,2500,25.20,25.21,25.22,25.23,25.24,400,100,900,3600,2300,25.05,1.600053e+10,1.587358e+15,65359575.0
162017,20200420.0,1600053,124245000000,1588027,39902645.31,25.20,25.17,25.16,25.15,25.14,25.11,600,4300,5100,1600,2500,25.20,25.21,25.22,25.23,25.24,400,100,900,3600,2300,25.05,1.600053e+10,1.587358e+15,65359638.0
198589,20200420.0,1600061,124243000000,5412200,66094298.00,12.20,12.19,12.18,12.17,12.16,12.15,41300,80800,65500,65600,131900,12.20,12.21,12.22,12.23,12.24,24100,39200,5600,20200,38100,12.24,1.600061e+10,1.587358e+15,65359102.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6105966,20200420.0,1688188,124243000000,109685,14386699.75,131.16,131.22,131.17,131.16,131.15,131.13,400,1000,2247,500,1085,131.64,131.65,131.66,131.67,131.68,223,4985,759,850,200,131.72,1.688188e+10,1.587358e+15,65359235.0
6116951,20200420.0,1688199,124245000000,839157,47121644.14,56.16,56.03,56.02,56.01,56.00,55.99,500,200,3950,5116,1700,56.16,56.17,56.20,56.22,56.25,2930,687,4037,1400,5100,57.09,1.688199e+10,1.587358e+15,65359585.0
6116952,20200420.0,1688199,124245000000,839157,47121644.14,56.16,56.03,56.02,56.01,56.00,55.99,500,200,3950,5116,1700,56.16,56.17,56.20,56.22,56.25,2930,687,4037,1400,5100,57.09,1.688199e+10,1.587358e+15,65359686.0
6116953,20200420.0,1688199,124245000000,839157,47121644.14,56.16,56.03,56.02,56.01,56.00,55.99,500,200,3950,5116,1700,56.16,56.17,56.20,56.22,56.25,2930,687,4037,1400,5100,57.09,1.688199e+10,1.587358e+15,65359585.0


'2.05%'

-------------------------------------------------------------------------------------------
SZ lv2


8107705

7986837

8107705

8107705

7986837

SZ lv2 is complete


'1.49%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'1.41%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.90%'

-----------------------------------------------------------------------------------------------------
SH index data


554697

549211

549352

14521

15792

'99.04%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'41%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:17.836801
0:00:59.093786


0

99763826

99763826

99763826

20200420finished
--------------------------------------------------------------------------------------------
20200421
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
515071,20200421.0,1600148,150005000000,14395052,2.045936e+08,14.24,14.24,14.23,14.22,14.21,14.20,5631,28800,49700,26100,11600,14.25,14.26,14.27,14.28,14.29,79500,900,2200,6100,100,13.34,1.600148e+10,1.587452e+15,120808540.0
515072,20200421.0,1600148,150005000000,14395052,2.045936e+08,14.24,14.24,14.23,14.22,14.21,14.20,5631,28800,49700,26100,11600,14.25,14.26,14.27,14.28,14.29,79500,900,2200,6100,100,13.34,1.600148e+10,1.587452e+15,120814700.0
515073,20200421.0,1600148,150005000000,14395052,2.045936e+08,14.24,14.24,14.23,14.22,14.21,14.20,5631,28800,49700,26100,11600,14.25,14.26,14.27,14.28,14.29,79500,900,2200,6100,100,13.34,1.600148e+10,1.587452e+15,120808540.0
515074,20200421.0,1600148,150005000000,14395052,2.045936e+08,14.24,14.24,14.23,14.22,14.21,14.20,5631,28800,49700,26100,11600,14.25,14.26,14.27,14.28,14.29,79500,900,2200,6100,100,13.34,1.600148e+10,1.587452e+15,120814700.0
559846,20200421.0,1600160,150005000000,17371239,1.194033e+08,6.94,6.94,6.93,6.92,6.91,6.90,648363,57700,59600,89800,128700,6.95,6.96,6.97,6.98,6.99,233800,194500,86200,111500,136900,6.89,1.600160e+10,1.587452e+15,120808566.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6156902,20200421.0,1688039,150005000000,479898,4.106245e+07,84.80,84.80,84.79,84.77,84.76,84.72,3840,1513,400,200,200,85.00,85.10,85.12,85.18,85.90,200,200,275,600,400,86.05,1.688039e+10,1.587452e+15,120814916.0
6167657,20200421.0,1688066,150005000000,2311624,1.092044e+08,47.04,47.03,46.99,46.98,46.97,46.87,1200,2000,2900,1534,300,47.04,47.05,47.06,47.08,47.10,7459,4405,300,2800,4412,47.60,1.688066e+10,1.587452e+15,120808187.0
6167658,20200421.0,1688066,150005000000,2311624,1.092044e+08,47.04,47.03,46.99,46.98,46.97,46.87,1200,2000,2900,1534,300,47.04,47.05,47.06,47.08,47.10,7459,4405,300,2800,4412,47.60,1.688066e+10,1.587452e+15,120814917.0
6167659,20200421.0,1688066,150005000000,2311624,1.092044e+08,47.04,47.03,46.99,46.98,46.97,46.87,1200,2000,2900,1534,300,47.04,47.05,47.06,47.08,47.10,7459,4405,300,2800,4412,47.60,1.688066e+10,1.587452e+15,120808187.0


'1.72%'

-------------------------------------------------------------------------------------------
SZ lv2


8342196

8233748

8342196

8342196

8233748

SZ lv2 is complete


'1.30%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.61%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.48%'

-----------------------------------------------------------------------------------------------------
SH index data


873458

873458

873455

14649

16638

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'1%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:19.144252
0:01:15.677213


0

106699332

106699332

106699332

20200421finished
--------------------------------------------------------------------------------------------
20200422
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.24%'

-------------------------------------------------------------------------------------------
SZ lv2


8261171

8181702

8261171

8261171

8181702

SZ lv2 is complete


'0.96%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.03%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.02%'

-----------------------------------------------------------------------------------------------------
SH index data


986047

986047

986047

14772

16815

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'4%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:24.351487
0:01:01.250377


0

101568132

101568132

101568132

20200422finished
--------------------------------------------------------------------------------------------
20200423
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
1284873,20200423.0,1600375,131826000000,3935122,17810578.64,4.55,4.55,4.54,4.53,4.52,4.51,19700,32000,43200,15200,62500,4.56,4.57,4.58,4.59,4.6,64174,116100,50100,89300,146900,4.48,16003750000.0,1587619000000000.0,79136562.0
1284874,20200423.0,1600375,131826000000,3935122,17810578.64,4.55,4.55,4.54,4.53,4.52,4.51,19700,32000,43200,15200,62500,4.56,4.57,4.58,4.59,4.6,64174,116100,50100,89300,146900,4.48,16003750000.0,1587619000000000.0,79136563.0


'1.24%'

-------------------------------------------------------------------------------------------
SZ lv2


8292689

8219866

8292689

8292689

8219866

SZ lv2 is complete


'0.88%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.36%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.01%'

-----------------------------------------------------------------------------------------------------
SH index data


897160

897160

897160

14656

17528

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'2%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:14.929705
0:01:16.200175


0

108037073

108037073

108037073

20200423finished
--------------------------------------------------------------------------------------------
20200424
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.25%'

-------------------------------------------------------------------------------------------
SZ lv2


8398312

8304912

8398312

8398312

8304912

SZ lv2 is complete


'1.11%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.16%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.05%'

-----------------------------------------------------------------------------------------------------
SH index data


827108

827108

827108

14900

16111

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'8%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:46.931338
0:01:21.339476


0

107023313

107023313

107023313

20200424finished
--------------------------------------------------------------------------------------------
20200427
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.40%'

-------------------------------------------------------------------------------------------
SZ lv2


8102581

7928528

8102581

8102581

7928528

SZ lv2 is complete


'2.15%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.27%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.02%'

-----------------------------------------------------------------------------------------------------
SH index data


743294

743294

743294

14280

17060

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'4%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:25.776710
0:00:56.233112


0

95291410

95291410

95291410

20200427finished
--------------------------------------------------------------------------------------------
20200428
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
4027766,20200428.0,1601949,150006000000,5040231,2.622190e+07,5.21,5.21,5.20,5.19,5.18,5.17,2799,42800,41300,69400,35900,5.22,5.23,5.24,5.25,5.26,13200,19732,18400,26200,15500,5.31,1.601949e+10,1.588057e+15,125395309.0
4027767,20200428.0,1601949,150006000000,5040231,2.622190e+07,5.21,5.21,5.20,5.19,5.18,5.17,2799,42800,41300,69400,35900,5.22,5.23,5.24,5.25,5.26,13200,19732,18400,26200,15500,5.31,1.601949e+10,1.588057e+15,125401568.0
4027768,20200428.0,1601949,150006000000,5040231,2.622190e+07,5.21,5.21,5.20,5.19,5.18,5.17,2799,42800,41300,69400,35900,5.22,5.23,5.24,5.25,5.26,13200,19732,18400,26200,15500,5.31,1.601949e+10,1.588057e+15,125395309.0
4027769,20200428.0,1601949,150006000000,5040231,2.622190e+07,5.21,5.21,5.20,5.19,5.18,5.17,2799,42800,41300,69400,35900,5.22,5.23,5.24,5.25,5.26,13200,19732,18400,26200,15500,5.31,1.601949e+10,1.588057e+15,125401568.0
4155851,20200428.0,1603013,150006000000,7290284,1.110309e+08,14.89,14.89,14.88,14.85,14.83,14.82,43000,47200,200,2900,2700,14.90,14.93,14.94,14.95,15.00,1000,2100,3700,20400,6800,16.55,1.603013e+10,1.588057e+15,125395310.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6399891,20200428.0,1688310,150006000000,1327766,3.683320e+07,28.00,28.00,27.99,27.85,27.84,27.83,64,2000,300,500,1758,28.01,28.03,28.05,28.06,28.10,200,500,4800,5394,1600,29.16,1.688310e+10,1.588057e+15,125401601.0
6408223,20200428.0,1688321,150006000000,1974746,9.560493e+07,48.49,48.49,48.41,48.39,48.36,48.35,2451,200,1800,1000,500,48.50,48.51,48.52,48.53,48.58,64901,2500,1670,2300,900,47.66,1.688321e+10,1.588057e+15,125395340.0
6408224,20200428.0,1688321,150006000000,1974746,9.560493e+07,48.49,48.49,48.41,48.39,48.36,48.35,2451,200,1800,1000,500,48.50,48.51,48.52,48.53,48.58,64901,2500,1670,2300,900,47.66,1.688321e+10,1.588057e+15,125401592.0
6408225,20200428.0,1688321,150006000000,1974746,9.560493e+07,48.49,48.49,48.41,48.39,48.36,48.35,2451,200,1800,1000,500,48.50,48.51,48.52,48.53,48.58,64901,2500,1670,2300,900,47.66,1.688321e+10,1.588057e+15,125395340.0


'1.36%'

-------------------------------------------------------------------------------------------
SZ lv2


8331463

8253089

8331463

8331463

8253089

SZ lv2 is complete


'0.94%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.30%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.29%'

-----------------------------------------------------------------------------------------------------
SH index data


928139

928139

928139

14652

17430

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'1%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:51.544321
0:01:23.085528


0

111186082

111186082

111186082

20200428finished
--------------------------------------------------------------------------------------------
20200429
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.77%'

-------------------------------------------------------------------------------------------
SZ lv2


7896563

7767853

7896563

7896563

7767853

SZ lv2 is complete


'1.63%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.38%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.25%'

-----------------------------------------------------------------------------------------------------
SH index data


781623

781623

781617

14755

16218

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'4%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:10.736028
0:00:52.639266


0

91594174

91594174

91594174

20200429finished
--------------------------------------------------------------------------------------------
20200430
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
74569,20200430.0,1600021,150004000000,3291143,24187650.0,7.36,7.36,7.35,7.34,7.33,7.32,26236,106100,15900,14500,7000,7.37,7.38,7.39,7.4,7.41,75350,202800,300400,116200,24500,7.25,16000210000.0,1588230000000000.0,117350193.0
74570,20200430.0,1600021,150004000000,3291143,24187650.0,7.36,7.36,7.35,7.34,7.33,7.32,26236,106100,15900,14500,7000,7.37,7.38,7.39,7.4,7.41,75350,202800,300400,116200,24500,7.25,16000210000.0,1588230000000000.0,117356700.0
74571,20200430.0,1600021,150004000000,3291143,24187650.0,7.36,7.36,7.35,7.34,7.33,7.32,26236,106100,15900,14500,7000,7.37,7.38,7.39,7.4,7.41,75350,202800,300400,116200,24500,7.25,16000210000.0,1588230000000000.0,117350193.0
74572,20200430.0,1600021,150004000000,3291143,24187650.0,7.36,7.36,7.35,7.34,7.33,7.32,26236,106100,15900,14500,7000,7.37,7.38,7.39,7.4,7.41,75350,202800,300400,116200,24500,7.25,16000210000.0,1588230000000000.0,117356700.0
986043,20200430.0,1600293,150004000000,8382761,21014780.0,2.54,2.53,2.52,2.51,2.5,2.49,108050,168500,210300,261100,146200,2.54,2.55,2.56,2.57,2.58,22359,615837,123000,98300,131250,2.39,16002930000.0,1588230000000000.0,117349815.0
986044,20200430.0,1600293,150004000000,8382761,21014780.0,2.54,2.53,2.52,2.51,2.5,2.49,108050,168500,210300,261100,146200,2.54,2.55,2.56,2.57,2.58,22359,615837,123000,98300,131250,2.39,16002930000.0,1588230000000000.0,117356701.0
986045,20200430.0,1600293,150004000000,8382761,21014780.0,2.54,2.53,2.52,2.51,2.5,2.49,108050,168500,210300,261100,146200,2.54,2.55,2.56,2.57,2.58,22359,615837,123000,98300,131250,2.39,16002930000.0,1588230000000000.0,117349815.0
986046,20200430.0,1600293,150004000000,8382761,21014780.0,2.54,2.53,2.52,2.51,2.5,2.49,108050,168500,210300,261100,146200,2.54,2.55,2.56,2.57,2.58,22359,615837,123000,98300,131250,2.39,16002930000.0,1588230000000000.0,117356701.0
2570637,20200430.0,1600777,150004000000,109754226,197977200.0,1.8,1.79,1.78,1.77,1.76,1.75,3173600,3339095,4199200,826300,1458300,1.8,1.81,1.82,1.83,1.84,1716225,2429583,4020569,3484740,1848920,1.79,16007780000.0,1588230000000000.0,117350183.0
2570638,20200430.0,1600777,150004000000,109754226,197977200.0,1.8,1.79,1.78,1.77,1.76,1.75,3173600,3339095,4199200,826300,1458300,1.8,1.81,1.82,1.83,1.84,1716225,2429583,4020569,3484740,1848920,1.79,16007780000.0,1588230000000000.0,117356702.0


'1.67%'

-------------------------------------------------------------------------------------------
SZ lv2


8200814

8085204

8200814

8200814

8085204

SZ lv2 is complete


'1.41%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.22%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.14%'

-----------------------------------------------------------------------------------------------------
SH index data


860229

860229

860229

15140

17227

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'2%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:58.220781
0:01:02.025956


0

103449455

103449455

103449455

Unnamed: 0,skey,date,num,sequenceNo,seq1,clockAtArrival,nan,count,tag,dup1,seq2,ApplSeqNum,sum_nan
48070636,2002504,20200430.0,,458603.0,415653.0,1588209000000000.0,0,0.0,trade,1.0,33589207,38201.0,42950
48070637,2002504,20200430.0,,458603.0,415653.0,,1,0.0,trade,0.0,33589208,103751.0,42951
44099381,2002329,20200430.0,,474966.0,432014.0,1588209000000000.0,0,0.0,trade,1.0,29617952,42348.0,42952
44099382,2002329,20200430.0,,474966.0,432014.0,,1,0.0,trade,0.0,29617953,103801.0,42953
46704038,2002459,20200430.0,,496482.0,453528.0,1588209000000000.0,0,0.0,trade,1.0,32222609,48037.0,42954
46704039,2002459,20200430.0,,496482.0,453528.0,,1,0.0,trade,0.0,32222610,101065.0,42955
40218941,2002134,20200430.0,,599771.0,556814.0,1588209000000000.0,0,0.0,trade,1.0,25737512,78504.0,42957
40218942,2002134,20200430.0,,599771.0,556814.0,,1,0.0,trade,0.0,25737513,109692.0,42958
53070484,2002920,20200430.0,,696186.0,653150.0,1588209000000000.0,0,0.0,trade,1.0,38589055,98033.0,43036
53070485,2002920,20200430.0,,696186.0,653150.0,,1,0.0,trade,0.0,38589056,103756.0,43037


AssertionError: 

In [3]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()


import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)


startDate = 20200501
endDate = 20200529
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
test = db.read('md_index', start_date=startDate, end_date=endDate, symbol=[1000300])
date_list = test['date'].unique()
del test

new_trade_data = []
new_order_data = []

for i in date_list:
    print('--------------------------------------------------------------------------------------------')
    print(i)
    print('SH lv2')
    startDate = str(i)
    endDate = str(i)
    db = DB("192.168.10.178", database_name, user, password)
    SH = db.read('md_snapshot_l2', start_date=startDate, end_date=endDate)
    SZ = SH[SH['skey'] > 2000000]
    SH = SH[SH['skey'] < 2000000]
    SH['num'] = SH['skey'] * 10000 + SH['ordering']
    SZ['num'] = SZ['skey'] * 10000 + SZ['ordering']
    
    SH = SH[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]
    SZ = SZ[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SH_***'))
        SH1 = pd.read_csv(path1[0])
        index1 = SH1[SH1['StockID'].isin([16, 300, 852, 905])]
        SH1 = SH1[SH1['source'] == 4]

        SH1['skey'] = SH1['StockID'] + 1000000
        SH1 = SH1.rename(columns={"openPrice":"open"})
        SH1["open"] = np.where(SH1["cum_volume"] > 0, SH1.groupby("skey")["open"].transform("max"), SH1["open"])
        SH1["time"] = SH1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SH1 = SH1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount', "close", 'open']:
        SH1[cols] = SH1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SH1 = SH1[SH1['skey'].isin(SH['skey'].unique())]
    re = pd.merge(SH, SH1, on=cols, how='outer')

    p21 = re[(re['date'].isnull())][['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    p22 = re[(re['sequenceNo'].isnull())][["skey", "date", "time", 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    p11 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)]
    p12 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())].drop_duplicates(['num'], keep=False)
    p11 = p11.sort_values(by=['num', 'sequenceNo'])
    display(p11)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p11_1 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)].drop_duplicates('num')
    p11_1 = pd.merge(p11_1, p11[['num', 'order1']], on='num', how='left')
    p11_1 = p11_1[p11_1['order1'].isnull()]
    p11_1['sequenceNo'] = np.nan
    p11_1['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p11_1.drop(['order1'],axis=1,inplace=True)
    p11 = pd.concat([p11, p11_1])
    
    p1 = pd.concat([p11, p12])
    p2 = pd.merge(p22, p21[['skey', 'time', 'clockAtArrival', 'sequenceNo']], on=['skey', 'time'], how='left')
    re1 = pd.concat([p1, p2])
    re1 = re1.sort_values(by='num')
    re1['seq1'] = re1.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(SH['skey'].unique()) - set(SH1['skey'].unique()))
    re1.loc[re1['skey'].isin(sl), 'seq1'] = np.nan
    re1['count1'] = re1.groupby(['seq1']).cumcount()
    re1['count2'] = re1.groupby(['seq1'])['count1'].transform('nunique')
    re1['min_seq'] = re1.groupby('skey')['sequenceNo'].transform('min')
    re1['count'] = np.where(re1['seq1'] != re1['min_seq'], re1['count1'], re1['count1']+1-re1['count2'])
    re1.drop(["min_seq"],axis=1,inplace=True)
    re1.drop(["count1"],axis=1,inplace=True)
    re1.drop(["count2"],axis=1,inplace=True)
    re1['dup'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo']).cumcount(), 0)
    re1['dup1'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
    re1['nan'] = np.where((re1['sequenceNo'].isnull()) | (re1['dup'] != 0), 1, 0)
    re1.loc[(re1['dup1'] > 1) & (re1['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re1[re1['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re1[re1['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re1.shape[0] == SH.shape[0])

    display('%.2f%%' % (re1[re1['sequenceNo'].isnull()].shape[0]/re1.shape[0] * 100))
    
    
    
    
    
    
    
    print('-------------------------------------------------------------------------------------------')
    print('SZ lv2')
    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SZ_***'))
        SZ1 = pd.read_csv(path1[0])
        SZ1 = SZ1[SZ1['source'] == 4]

        SZ1['skey'] = SZ1['StockID'] + 2000000
        SZ1 = SZ1.rename(columns={"openPrice":"open"})
        SZ1["open"] = np.where(SZ1["cum_volume"] > 0, SZ1.groupby("skey")["open"].transform("max"), SZ1["open"])
        SZ1["time"] = SZ1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SZ1 = SZ1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount']:
        SZ1[cols] = SZ1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SZ1 = SZ1[SZ1['skey'].isin(SZ['skey'].unique())]
    re = pd.merge(SZ, SZ1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(SZ.shape[0])
    display(SZ1.shape[0])

    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('SZ lv2 is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        print('92 have unique values not shared by database')
        re = pd.merge(SZ, SZ1, on=cols, how='left')

    if re[re.duplicated('num', keep=False)].shape[0] == 0:
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))


    else:
        p1 = re[re['num'].duplicated(keep=False)]
        p2 = re.drop_duplicates(['num'], keep=False)
        p1["order1"] = p1.groupby(["num"]).cumcount()
        p1["order2"] = p1.groupby(["sequenceNo"]).cumcount()
        p1 = p1[p1['order1'] == p1['order2']]
        p1.drop(['order1', 'order2'],axis=1,inplace=True)
        re = pd.concat([p1, p2])
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))
    
    print('----------------------------------------------------------------------------------------------')
    print('SH & SZ trade')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    trade = db.read('md_trade', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdTradeLog***'))
        trade1 = pd.read_csv(path1[0])
    trade1['skey'] = np.where(trade1['exchId'] == 2, trade1['SecurityID'] + 2000000, trade1['SecurityID'] + 1000000)
    trade1 = trade1[trade1['skey'].isin(trade['skey'].unique())]
    re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == trade.shape[0])
        display('trade data is complete')
        k = 0
    except:
        display('%.2f%%' % (trade.shape[0]/re.shape[0] * 100))
        k = 1
        display('trade data incomplete')
        k1 = pd.merge(trade1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k1.shape[0])
        display(k1['ExecType'].unique())
        display(k1['TransactTime'].unique())
        k1['date'] = trade['date'].iloc[0]
        new_trade_data += [k1[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'ExecType', 'TradeBSFlag', 
       'TradePrice', 'TradeQty', 'BidApplSeqNum', 'OfferApplSeqNum']]]
        re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == trade.shape[0])

    re3 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re3['seq1'] = re3.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(trade['skey'].unique()) - set(trade1['skey'].unique()))
    re3.loc[re3['skey'].isin(sl), 'seq1'] = np.nan
    re3['count1'] = re3.groupby(['seq1']).cumcount()
    re3['count2'] = re3.groupby(['seq1'])['count1'].transform('nunique')
    re3['min_seq'] = re3.groupby('skey')['sequenceNo'].transform('min')
    re3['count'] = np.where(re3['seq1'] != re3['min_seq'], re3['count1'], re3['count1']+1-re3['count2'])
    re3.drop(["min_seq"],axis=1,inplace=True)
    re3.drop(["count1"],axis=1,inplace=True)
    re3.drop(["count2"],axis=1,inplace=True)
    re3['dup'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo']).cumcount(), 0)
    re3['dup1'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re3['nan'] = np.where((re3['sequenceNo'].isnull()) | (re3['dup'] != 0), 1, 0)
    re3.loc[(re3['dup1'] > 1) & (re3['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re3[re3['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re3[re3['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re3.shape[0] == trade.shape[0])
    if k == 1:
        k1['seq1'] = k1['sequenceNo']
        k1['count'] = 0
        k1['nan'] = 0
        k1['dup1'] = 1
        re3 = pd.concat([re3, k1[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', 'dup1']]])

    display('%.2f%%' % (re3[re3['sequenceNo'].isnull()].shape[0]/re3.shape[0] * 100))

    
    print('--------------------------------------------------------------------------------------------------')
    print('SZ order data')

    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    order = db.read('md_order', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdOrderLog***'))
        order1 = pd.read_csv(path1[0])
    order1['skey'] = order1['SecurityID'] + 2000000
    order1 = order1[order1['skey'].isin(order['skey'].unique())]
    re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == order.shape[0])
        display('order data is complete')
        k = 0
    except:
        display('%.2f%%' % (order.shape[0]/re.shape[0] * 100))
        k = 1
        display('order data incomplete')
        k2 = pd.merge(order1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k2.shape[0])
        display(k2['SecurityID'].unique())
        display(k2['TransactTime'].unique())
        k2['date'] = order['date'].iloc[0]
        new_order_data += [k2[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'Side', 
       'OrderType', 'Price', 'OrderQty']]]
        re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == order.shape[0])

    re4 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re4['seq1'] = re4.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(order['skey'].unique()) - set(order1['skey'].unique()))
    re4.loc[re4['skey'].isin(sl), 'seq1'] = np.nan
    re4['count1'] = re4.groupby(['seq1']).cumcount()
    re4['count2'] = re4.groupby(['seq1'])['count1'].transform('nunique')
    re4['min_seq'] = re4.groupby('skey')['sequenceNo'].transform('min')
    re4['count'] = np.where(re4['seq1'] != re4['min_seq'], re4['count1'], re4['count1']+1-re4['count2'])
    re4.drop(["min_seq"],axis=1,inplace=True)
    re4.drop(["count1"],axis=1,inplace=True)
    re4.drop(["count2"],axis=1,inplace=True)
    re4['dup'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo']).cumcount(), 0)
    re4['dup1'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re4['nan'] = np.where((re4['sequenceNo'].isnull()) | (re4['dup'] != 0), 1, 0)
    re4.loc[(re4['dup1'] > 1) & (re4['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re4[re4['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re4[re4['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re4.shape[0] == order.shape[0])
    if k == 1:
        k2['seq1'] = k2['ApplSeqNum']
        k2['count'] = 0
        k2['nan'] = 0
        k2['dup1'] = 1
        re4 = pd.concat([re4, k2[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', "dup1"]]])


    display('%.2f%%' % (re4[re4['sequenceNo'].isnull()].shape[0]/re4.shape[0] * 100))
    
    print('-----------------------------------------------------------------------------------------------------')
    print('SH index data')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    index = db.read('md_index', start_date=startDate, end_date=endDate)

    index1['skey'] = index1['StockID'] + 1000000
    index1 = index1.rename(columns={"openPrice":"open"})
    index1["open"] = np.where(index1["cum_volume"] > 0, index1.groupby("skey")["open"].transform("max"), index1["open"])
    index1['close'] = np.where(index1['cum_volume'] == 0, 0, index1['close'])
    index1["time"] = index1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)
    index['close'] = np.where(index['cum_volume'] == 0, 0, index['close'])
    index['num'] = index['skey'] * 10000 + index['ordering']
    index = index[['skey', 'date', 'cum_volume', 'cum_amount', "close", "open", 'num']]
    index1 = index1[['clockAtArrival', 'sequenceNo', 'skey', 'cum_volume', 'cum_amount', "close", "open", "time"]]
    for cols in ['cum_amount']:
        index1[cols] = index1[cols].round(1)
    cols = ['skey', 'cum_volume', 'cum_amount', "close", "open"]
    index1 = index1[index1['skey'].isin(index['skey'].unique())]
    re = pd.merge(index, index1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(index.shape[0])
    display(index1.shape[0])
    
    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('index data is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        re = pd.merge(index, index1, on=cols, how='left')
        print('92 have unique values not shared by database')

    p11 = re[re.duplicated('num', keep=False)]
    p2 = re.drop_duplicates('num', keep=False)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p12 = re[re.duplicated('num', keep=False)].drop_duplicates('num')
    p12 = pd.merge(p12, p11[['num', 'order1']], on='num', how='left')
    p12 = p12[p12['order1'].isnull()]
    p12['sequenceNo'] = np.nan
    p12['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p12.drop(['order1'],axis=1,inplace=True)
    p1 = pd.concat([p11, p12])

    re = pd.concat([p1, p2])
    assert(re[re.duplicated('num', keep=False)].shape[0] == 0)

    if re[re['sequenceNo'].isnull()].shape[0] != 0:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    else:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5['sequenceNo']
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    
    
    print('----------------------------------------------------------------------------------------------------')
    print('final concat')
    try:
        assert(len(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
        display(SH1[SH1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(index1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(index1[index1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])

    del SH
    del SH1
    del SZ
    del SZ1
    del trade
    del trade1
    del order
    del order1
    del index
    del index1
    re1['tag'] = 'SH'
    re2['tag'] = 'SZ'
    re3['tag'] = 'trade'
    re4['tag'] = 'order'
    re5['tag'] = 'index'
    
    re1 = re1[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re2 = re2[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re3 = re3[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re4 = re4[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re5 = re5[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re1 = re1.sort_values(by='num').reset_index(drop=True)
    re1['seq2'] = re1.index
    re2 = re2.sort_values(by='num').reset_index(drop=True)
    re2['seq2'] = re2.index
    re3 = re3.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re3['seq2'] = re3.index
    re4 = re4.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re4['seq2'] = re4.index
    re5 = re5.sort_values(by='num').reset_index(drop=True)
    re5['seq2'] = re5.index

    fr1 = []
    fr2 = []
    fr1 += [re1[re1['seq1'].isnull()]]
    fr2 += [re1[~re1['seq1'].isnull()]]
    del re1
    display('1. here~')
    fr1 += [re2[re2['seq1'].isnull()]]
    fr2 += [re2[~re2['seq1'].isnull()]]
    del re2
    display('2. here~')
    fr1 += [re3[re3['seq1'].isnull()]]
    fr2 += [re3[~re3['seq1'].isnull()]]
    del re3
    display('3. here~')
    fr1 += [re4[re4['seq1'].isnull()]]
    fr2 += [re4[~re4['seq1'].isnull()]]
    del re4
    display('4. here~')
    fr1 += [re5[re5['seq1'].isnull()]]
    fr2 += [re5[~re5['seq1'].isnull()]]
    del re5
    display('5. here~')
    fr1 = pd.concat(fr1).reset_index(drop=True)
    fr2 = pd.concat(fr2).reset_index(drop=True)
    
    startTm = datetime.datetime.now()
    fr2 = fr2.sort_values(by=['seq1', 'seq2'])
    print(datetime.datetime.now() - startTm)

    fr2.loc[(fr2['nan']==0) & (fr2['dup1']==1), 'count'] = 0
    fr2['sum_nan'] = fr2['nan'].cumsum()
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['sum_nan']
    startTm = datetime.datetime.now()
    fr2['sequenceNo'] = fr2.groupby('seq1')['sequenceNo'].ffill().bfill()
    print(datetime.datetime.now() - startTm)
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['count']
    fr21 = fr2[~fr2['sequenceNo'].isnull()]
    fr22 = fr2[fr2['sequenceNo'].isnull()]
    display(fr22.shape[0])
    display(fr21.shape[0])
    display(fr2.shape[0])
    if fr22.shape[0] != 0:
        fr22['sequenceNo'] = range(int(fr21['sequenceNo'].max()) + 1, int(fr21['sequenceNo'].max()) + 1 + fr22.shape[0])
        fr2 = pd.concat([fr21, fr22])
    del fr21
    del fr22
    display(fr2.shape[0])
    try:
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    except:
        te_st = fr2[fr2.duplicated('sequenceNo', keep=False)]
        display(te_st)
        caa = te_st['clockAtArrival'].max()
        seq = te_st['sequenceNo'].iloc[0]
        m_in = fr2[fr2['sequenceNo'] > seq]['sequenceNo'].min()
        if m_in > seq + 1:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 1
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        else:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 2
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
        
    
    fr1['sequenceNo'] = range(int(fr2['sequenceNo'].max()) + 1, int(fr2['sequenceNo'].max()) + 1 + fr1.shape[0])
    fr2 = pd.concat([fr1, fr2])
    del fr1
    assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    
    import pickle
    os.mkdir('/mnt/e/result/' + startDate)
    SH = fr2[fr2['tag'] == 'SH'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SH.to_pickle('/mnt/e/result/' + startDate + '/SH.pkl')
    del SH

    SZ = fr2[fr2['tag'] == 'SZ'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SZ.to_pickle('/mnt/e/result/' + startDate + '/SZ.pkl')
    del SZ
    
    trade = fr2[fr2['tag'] == 'trade'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    trade.to_pickle('/mnt/e/result/' + startDate + '/trade.pkl')
    del trade
    
    order = fr2[fr2['tag'] == 'order'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    order.to_pickle('/mnt/e/result/' + startDate + '/order.pkl')
    del order
    
    index = fr2[fr2['tag'] == 'index'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    index.to_pickle('/mnt/e/result/' + startDate + '/index.pkl')
    del index
    del fr2
    
    print(str(i) + 'finished')


--------------------------------------------------------------------------------------------
20200506
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
5310913,20200506.0,1603657,150005000000,1398389,34208126.18,24.61,24.61,24.6,24.59,24.58,24.57,7600,3700,3100,3100,200,24.62,24.63,24.64,24.65,24.66,4400,6800,17200,59500,6000,24.0,16036570000.0,1588748000000000.0,119488939.0
5310914,20200506.0,1603657,150005000000,1398389,34208126.18,24.61,24.61,24.6,24.59,24.58,24.57,7600,3700,3100,3100,200,24.62,24.63,24.64,24.65,24.66,4400,6800,17200,59500,6000,24.0,16036570000.0,1588748000000000.0,119495116.0
5310915,20200506.0,1603657,150005000000,1398389,34208126.18,24.61,24.61,24.6,24.59,24.58,24.57,7600,3700,3100,3100,200,24.62,24.63,24.64,24.65,24.66,4400,6800,17200,59500,6000,24.0,16036570000.0,1588748000000000.0,119488939.0
5310916,20200506.0,1603657,150005000000,1398389,34208126.18,24.61,24.61,24.6,24.59,24.58,24.57,7600,3700,3100,3100,200,24.62,24.63,24.64,24.65,24.66,4400,6800,17200,59500,6000,24.0,16036570000.0,1588748000000000.0,119495116.0


'1.57%'

-------------------------------------------------------------------------------------------
SZ lv2


8146325

8054622

8146325

8146325

8054622

SZ lv2 is complete


'1.13%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.40%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.40%'

-----------------------------------------------------------------------------------------------------
SH index data


992277

992277

992277

15172

17042

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'6%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:38.152847
0:01:19.869427


0

105430435

105430435

105430435

20200506finished
--------------------------------------------------------------------------------------------
20200507
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
23005,20200507.0,1600008,150014000000,37912863,129783700.0,3.41,3.41,3.4,3.39,3.38,3.37,3709446,236400,240400,344100,414300,3.42,3.43,3.44,3.45,3.46,360400,1071659,900100,1282362,1199200,3.4,16000080000.0,1588835000000000.0,118009131.0
23006,20200507.0,1600008,150014000000,37912863,129783700.0,3.41,3.41,3.4,3.39,3.38,3.37,3709446,236400,240400,344100,414300,3.42,3.43,3.44,3.45,3.46,360400,1071659,900100,1282362,1199200,3.4,16000080000.0,1588835000000000.0,118009179.0
23007,20200507.0,1600008,150014000000,37912863,129783700.0,3.41,3.41,3.4,3.39,3.38,3.37,3709446,236400,240400,344100,414300,3.42,3.43,3.44,3.45,3.46,360400,1071659,900100,1282362,1199200,3.4,16000080000.0,1588835000000000.0,118009131.0
23008,20200507.0,1600008,150014000000,37912863,129783700.0,3.41,3.41,3.4,3.39,3.38,3.37,3709446,236400,240400,344100,414300,3.42,3.43,3.44,3.45,3.46,360400,1071659,900100,1282362,1199200,3.4,16000080000.0,1588835000000000.0,118009179.0
486611,20200507.0,1600141,150014000000,9530093,86769070.0,9.11,9.1,9.09,9.08,9.07,9.06,148500,237200,43900,48200,103400,9.11,9.12,9.13,9.14,9.15,54400,52900,7800,38100,89532,9.15,16001410000.0,1588835000000000.0,118009139.0
486612,20200507.0,1600141,150014000000,9530093,86769070.0,9.11,9.1,9.09,9.08,9.07,9.06,148500,237200,43900,48200,103400,9.11,9.12,9.13,9.14,9.15,54400,52900,7800,38100,89532,9.15,16001410000.0,1588835000000000.0,118009180.0
486613,20200507.0,1600141,150014000000,9530093,86769070.0,9.11,9.1,9.09,9.08,9.07,9.06,148500,237200,43900,48200,103400,9.11,9.12,9.13,9.14,9.15,54400,52900,7800,38100,89532,9.15,16001410000.0,1588835000000000.0,118009139.0
486614,20200507.0,1600141,150014000000,9530093,86769070.0,9.11,9.1,9.09,9.08,9.07,9.06,148500,237200,43900,48200,103400,9.11,9.12,9.13,9.14,9.15,54400,52900,7800,38100,89532,9.15,16001410000.0,1588835000000000.0,118009180.0
741751,20200507.0,1600217,140738000000,19224512,115070000.0,5.96,5.96,5.95,5.94,5.93,5.92,78800,43627,119400,22200,42100,5.97,5.98,5.99,6.0,6.01,47900,35500,76900,213101,49100,5.91,16002170000.0,1588832000000000.0,95523878.0
741752,20200507.0,1600217,140738000000,19224512,115070000.0,5.96,5.96,5.95,5.94,5.93,5.92,78800,43627,119400,22200,42100,5.97,5.98,5.99,6.0,6.01,47900,35500,76900,213101,49100,5.91,16002170000.0,1588832000000000.0,95523880.0


'1.61%'

-------------------------------------------------------------------------------------------
SZ lv2


8008708

7902469

8008708

8008708

7902469

SZ lv2 is complete


'1.33%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.40%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.41%'

-----------------------------------------------------------------------------------------------------
SH index data


697815

697815

697815

14280

17185

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'1%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:26.799331
0:01:18.557507


0

104243163

104243163

104243163

20200507finished
--------------------------------------------------------------------------------------------
20200508
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.25%'

-------------------------------------------------------------------------------------------
SZ lv2


8153099

8078039

8153099

8153099

8078039

SZ lv2 is complete


'0.92%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.07%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.00%'

-----------------------------------------------------------------------------------------------------
SH index data


725526

725526

725526

14272

16742

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'4%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:34.529593
0:01:22.007850


0

104478465

104478465

104478465

20200508finished
--------------------------------------------------------------------------------------------
20200511
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
147598,20200511.0,1600048,150005000000,58748185,9.299882e+08,15.75,15.75,15.74,15.73,15.72,15.71,8900,163300,153200,254300,232900,15.76,15.77,15.78,15.79,15.80,17400,8800,71400,200500,114161,15.88,1.600049e+10,1.589180e+15,118303396.0
147599,20200511.0,1600048,150005000000,58748185,9.299882e+08,15.75,15.75,15.74,15.73,15.72,15.71,8900,163300,153200,254300,232900,15.76,15.77,15.78,15.79,15.80,17400,8800,71400,200500,114161,15.88,1.600049e+10,1.589180e+15,118309998.0
147600,20200511.0,1600048,150005000000,58748185,9.299882e+08,15.75,15.75,15.74,15.73,15.72,15.71,8900,163300,153200,254300,232900,15.76,15.77,15.78,15.79,15.80,17400,8800,71400,200500,114161,15.88,1.600049e+10,1.589180e+15,118303396.0
147601,20200511.0,1600048,150005000000,58748185,9.299882e+08,15.75,15.75,15.74,15.73,15.72,15.71,8900,163300,153200,254300,232900,15.76,15.77,15.78,15.79,15.80,17400,8800,71400,200500,114161,15.88,1.600049e+10,1.589180e+15,118309998.0
343114,20200511.0,1600101,150005000000,4008062,2.461584e+07,6.13,6.12,6.11,6.10,6.09,6.08,266640,71400,378200,91700,59700,6.13,6.14,6.15,6.16,6.17,20400,46200,28200,38100,24500,6.22,1.600101e+10,1.589180e+15,118303422.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5917292,20200511.0,1688007,150005000000,1655560,4.105486e+07,24.67,24.67,24.63,24.62,24.61,24.60,11837,2000,734,272,1400,24.68,24.69,24.70,24.73,24.80,1731,200,22908,2000,3500,24.90,1.688007e+10,1.589180e+15,118309977.0
6191376,20200511.0,1688288,150005000000,3338084,1.198437e+08,35.01,35.01,35.00,34.99,34.98,34.97,2955,25871,2177,4179,42138,35.08,35.11,35.12,35.13,35.15,100,200,300,200,4397,33.80,1.688288e+10,1.589180e+15,118303627.0
6191377,20200511.0,1688288,150005000000,3338084,1.198437e+08,35.01,35.01,35.00,34.99,34.98,34.97,2955,25871,2177,4179,42138,35.08,35.11,35.12,35.13,35.15,100,200,300,200,4397,33.80,1.688288e+10,1.589180e+15,118310013.0
6191378,20200511.0,1688288,150005000000,3338084,1.198437e+08,35.01,35.01,35.00,34.99,34.98,34.97,2955,25871,2177,4179,42138,35.08,35.11,35.12,35.13,35.15,100,200,300,200,4397,33.80,1.688288e+10,1.589180e+15,118303627.0


'1.33%'

-------------------------------------------------------------------------------------------
SZ lv2


8064079

7989031

8064079

8064079

7989031

SZ lv2 is complete


'0.93%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.12%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.00%'

-----------------------------------------------------------------------------------------------------
SH index data


734469

734469

734469

14281

17313

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'1%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:37.579608
0:01:20.606123


0

104495263

104495263

104495263

20200511finished
--------------------------------------------------------------------------------------------
20200512
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.38%'

-------------------------------------------------------------------------------------------
SZ lv2


7925227

7846522

7925227

7925227

7846522

SZ lv2 is complete


'0.99%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.10%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.05%'

-----------------------------------------------------------------------------------------------------
SH index data


708500

708500

708500

14288

15755

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'7%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:33.367548
0:00:58.872595


0

98517233

98517233

98517233

20200512finished
--------------------------------------------------------------------------------------------
20200513
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.34%'

-------------------------------------------------------------------------------------------
SZ lv2


7789170

7721865

7789170

7789170

7721865

SZ lv2 is complete


'0.86%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.10%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.03%'

-----------------------------------------------------------------------------------------------------
SH index data


845253

845253

845253

14773

16479

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'8%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:22.744550
0:00:56.261799


0

95353509

95353509

95353509

20200513finished
--------------------------------------------------------------------------------------------
20200514
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
291589,20200514.0,1600088,150016000000,3275131,41594040.0,12.68,12.67,12.66,12.65,12.64,12.63,10800,2100,5700,4100,37400,12.68,12.69,12.7,12.71,12.72,2000,12900,5400,14800,20900,12.64,16000880000.0,1589440000000000.0,113117192.0
291590,20200514.0,1600088,150016000000,3275131,41594040.0,12.68,12.67,12.66,12.65,12.64,12.63,10800,2100,5700,4100,37400,12.68,12.69,12.7,12.71,12.72,2000,12900,5400,14800,20900,12.64,16000880000.0,1589440000000000.0,113122861.0
291591,20200514.0,1600088,150016000000,3275131,41594040.0,12.68,12.67,12.66,12.65,12.64,12.63,10800,2100,5700,4100,37400,12.68,12.69,12.7,12.71,12.72,2000,12900,5400,14800,20900,12.64,16000880000.0,1589440000000000.0,113117192.0
291592,20200514.0,1600088,150016000000,3275131,41594040.0,12.68,12.67,12.66,12.65,12.64,12.63,10800,2100,5700,4100,37400,12.68,12.69,12.7,12.71,12.72,2000,12900,5400,14800,20900,12.64,16000880000.0,1589440000000000.0,113122861.0
395674,20200514.0,1600116,150016000000,12809998,111128500.0,8.67,8.67,8.66,8.65,8.64,8.63,177800,392100,34347,24200,26100,8.68,8.69,8.7,8.71,8.72,60800,18000,36900,16200,16500,8.47,16001160000.0,1589440000000000.0,113117194.0
395675,20200514.0,1600116,150016000000,12809998,111128500.0,8.67,8.67,8.66,8.65,8.64,8.63,177800,392100,34347,24200,26100,8.68,8.69,8.7,8.71,8.72,60800,18000,36900,16200,16500,8.47,16001160000.0,1589440000000000.0,113122862.0
395676,20200514.0,1600116,150016000000,12809998,111128500.0,8.67,8.67,8.66,8.65,8.64,8.63,177800,392100,34347,24200,26100,8.68,8.69,8.7,8.71,8.72,60800,18000,36900,16200,16500,8.47,16001160000.0,1589440000000000.0,113117194.0
395677,20200514.0,1600116,150016000000,12809998,111128500.0,8.67,8.67,8.66,8.65,8.64,8.63,177800,392100,34347,24200,26100,8.68,8.69,8.7,8.71,8.72,60800,18000,36900,16200,16500,8.47,16001160000.0,1589440000000000.0,113122862.0
619118,20200514.0,1600187,150016000000,5211697,11558820.0,2.2,2.2,2.19,2.18,2.17,2.16,615400,233600,79800,56700,65500,2.21,2.22,2.23,2.24,2.25,20695,596202,222200,333300,444433,2.24,16001870000.0,1589440000000000.0,113117117.0
619119,20200514.0,1600187,150016000000,5211697,11558820.0,2.2,2.2,2.19,2.18,2.17,2.16,615400,233600,79800,56700,65500,2.21,2.22,2.23,2.24,2.25,20695,596202,222200,333300,444433,2.24,16001870000.0,1589440000000000.0,113122863.0


'1.39%'

-------------------------------------------------------------------------------------------
SZ lv2


7922048

7846301

7922048

7922048

7846301

SZ lv2 is complete


'0.96%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.07%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.02%'

-----------------------------------------------------------------------------------------------------
SH index data


989784

989784

989784

15164

17484

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:35.476648
0:00:58.305018


0

99531318

99531318

99531318

20200514finished
--------------------------------------------------------------------------------------------
20200515
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
3078256,20200515.0,1601015,150004000000,2767590,7373589.0,2.66,2.65,2.64,2.63,2.62,2.61,271800,67500,100700,27800,89000,2.66,2.67,2.68,2.69,2.7,11400,161520,286165,161640,138010,2.66,16010150000.0,1589526000000000.0,112589068.0
3078257,20200515.0,1601015,150004000000,2767590,7373589.0,2.66,2.65,2.64,2.63,2.62,2.61,271800,67500,100700,27800,89000,2.66,2.67,2.68,2.69,2.7,11400,161520,286165,161640,138010,2.66,16010150000.0,1589526000000000.0,112595783.0
3078258,20200515.0,1601015,150004000000,2767590,7373589.0,2.66,2.65,2.64,2.63,2.62,2.61,271800,67500,100700,27800,89000,2.66,2.67,2.68,2.69,2.7,11400,161520,286165,161640,138010,2.66,16010150000.0,1589526000000000.0,112589068.0
3078259,20200515.0,1601015,150004000000,2767590,7373589.0,2.66,2.65,2.64,2.63,2.62,2.61,271800,67500,100700,27800,89000,2.66,2.67,2.68,2.69,2.7,11400,161520,286165,161640,138010,2.66,16010150000.0,1589526000000000.0,112595783.0
4070599,20200515.0,1603076,150005000000,1430568,34803315.0,24.31,24.31,24.3,24.28,24.27,24.26,2700,1900,2400,1000,3300,24.32,24.33,24.34,24.35,24.36,2000,2200,4600,2100,500,23.73,16030760000.0,1589526000000000.0,112596324.0
4070600,20200515.0,1603076,150005000000,1430568,34803315.0,24.31,24.31,24.3,24.28,24.27,24.26,2700,1900,2400,1000,3300,24.32,24.33,24.34,24.35,24.36,2000,2200,4600,2100,500,23.73,16030760000.0,1589526000000000.0,112602134.0
4070601,20200515.0,1603076,150005000000,1430568,34803315.0,24.31,24.31,24.3,24.28,24.27,24.26,2700,1900,2400,1000,3300,24.32,24.33,24.34,24.35,24.36,2000,2200,4600,2100,500,23.73,16030760000.0,1589526000000000.0,112596324.0
4070602,20200515.0,1603076,150005000000,1430568,34803315.0,24.31,24.31,24.3,24.28,24.27,24.26,2700,1900,2400,1000,3300,24.32,24.33,24.34,24.35,24.36,2000,2200,4600,2100,500,23.73,16030760000.0,1589526000000000.0,112602134.0
4259516,20200515.0,1603168,150004000000,1039903,7707494.06,7.39,7.38,7.37,7.36,7.35,7.34,35885,39300,25900,15500,14800,7.39,7.4,7.41,7.42,7.43,5400,10000,1000,5000,3500,7.56,16031680000.0,1589526000000000.0,112589056.0
4259517,20200515.0,1603168,150004000000,1039903,7707494.06,7.39,7.38,7.37,7.36,7.35,7.34,35885,39300,25900,15500,14800,7.39,7.4,7.41,7.42,7.43,5400,10000,1000,5000,3500,7.56,16031680000.0,1589526000000000.0,112595791.0


'1.38%'

-------------------------------------------------------------------------------------------
SZ lv2


7945060

7865798

7945060

7945060

7865798

SZ lv2 is complete


'1.00%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.08%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.02%'

-----------------------------------------------------------------------------------------------------
SH index data


912462

912462

912461

14625

16486

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:35.663228
0:00:59.528888


0

99081068

99081068

99081068

20200515finished
--------------------------------------------------------------------------------------------
20200518
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'91.19%'

-------------------------------------------------------------------------------------------
SZ lv2


7994331

712743

7994331

7994331

712743

SZ lv2 is complete


'91.08%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'82.38%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'79.02%'

-----------------------------------------------------------------------------------------------------
SH index data


484574

471813

484574

14261

3254

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'90%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:02:24.430501
0:00:19.441200


0

108273601

108273601

108273601

20200518finished
--------------------------------------------------------------------------------------------
20200519
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.61%'

-------------------------------------------------------------------------------------------
SZ lv2


7820488

7743282

7820488

7820488

7743282

SZ lv2 is complete


'0.99%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.19%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.06%'

-----------------------------------------------------------------------------------------------------
SH index data


762848

762848

762848

14284

17524

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'1%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:26.767547
0:00:56.284404


0

96385897

96385897

96385897

20200519finished
--------------------------------------------------------------------------------------------
20200520
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
4457801,20200520.0,1603260,95635000000,901904,22353284.79,24.67,24.67,24.66,24.65,24.64,24.63,2279,4300,1100,4600,100,24.68,24.69,24.7,24.73,24.74,2333,404,2500,200,14700,24.47,16032600000.0,1589940000000000.0,29018676.0
4457802,20200520.0,1603260,95635000000,901904,22353284.79,24.67,24.67,24.66,24.65,24.64,24.63,2279,4300,1100,4600,100,24.68,24.69,24.7,24.73,24.74,2333,404,2500,200,14700,24.47,16032600000.0,1589940000000000.0,29018678.0


'1.91%'

-------------------------------------------------------------------------------------------
SZ lv2


7957016

7820140

7957016

7957016

7820140

SZ lv2 is complete


'1.72%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.17%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.07%'

-----------------------------------------------------------------------------------------------------
SH index data


694510

694510

694510

14652

15135

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'7%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:28.026284
0:01:18.614477


0

105502694

105502694

105502694

20200520finished
--------------------------------------------------------------------------------------------
20200521
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.67%'

-------------------------------------------------------------------------------------------
SZ lv2


8029398

7853805

8029398

8029398

7853805

SZ lv2 is complete


'2.19%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.19%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.04%'

-----------------------------------------------------------------------------------------------------
SH index data


933950

933950

933949

14655

16346

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'7%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:40.734741
0:00:59.659851


0

101383909

101383909

101383909

20200521finished
--------------------------------------------------------------------------------------------
20200522
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.72%'

-------------------------------------------------------------------------------------------
SZ lv2


8113570

7964107

8113570

8113570

7964107

SZ lv2 is complete


'1.84%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.20%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.05%'

-----------------------------------------------------------------------------------------------------
SH index data


716742

716742

716742

14273

16530

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:37.856832
0:00:58.646890


0

100341420

100341420

100341420

20200522finished
--------------------------------------------------------------------------------------------
20200525
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
2983420,20200525.0,1601015,150005000000,6137475,16305040.0,2.66,2.66,2.65,2.64,2.63,2.62,52475,61300,93710,80700,29630,2.67,2.68,2.69,2.7,2.71,122870,233020,213800,282620,129550,2.61,16010150000.0,1590390000000000.0,99586409.0
2983421,20200525.0,1601015,150005000000,6137475,16305040.0,2.66,2.66,2.65,2.64,2.63,2.62,52475,61300,93710,80700,29630,2.67,2.68,2.69,2.7,2.71,122870,233020,213800,282620,129550,2.61,16010150000.0,1590390000000000.0,99592417.0
2983422,20200525.0,1601015,150005000000,6137475,16305040.0,2.66,2.66,2.65,2.64,2.63,2.62,52475,61300,93710,80700,29630,2.67,2.68,2.69,2.7,2.71,122870,233020,213800,282620,129550,2.61,16010150000.0,1590390000000000.0,99586409.0
2983423,20200525.0,1601015,150005000000,6137475,16305040.0,2.66,2.66,2.65,2.64,2.63,2.62,52475,61300,93710,80700,29630,2.67,2.68,2.69,2.7,2.71,122870,233020,213800,282620,129550,2.61,16010150000.0,1590390000000000.0,99592417.0
3090941,20200525.0,1601118,150005000000,31669283,165962900.0,5.19,5.18,5.17,5.16,5.15,5.14,13500,79400,344539,215890,144100,5.19,5.2,5.21,5.22,5.23,598000,197400,132900,101200,23900,5.28,16011180000.0,1590390000000000.0,99586336.0
3090942,20200525.0,1601118,150005000000,31669283,165962900.0,5.19,5.18,5.17,5.16,5.15,5.14,13500,79400,344539,215890,144100,5.19,5.2,5.21,5.22,5.23,598000,197400,132900,101200,23900,5.28,16011180000.0,1590390000000000.0,99592418.0
3090943,20200525.0,1601118,150005000000,31669283,165962900.0,5.19,5.18,5.17,5.16,5.15,5.14,13500,79400,344539,215890,144100,5.19,5.2,5.21,5.22,5.23,598000,197400,132900,101200,23900,5.28,16011180000.0,1590390000000000.0,99586336.0
3090944,20200525.0,1601118,150005000000,31669283,165962900.0,5.19,5.18,5.17,5.16,5.15,5.14,13500,79400,344539,215890,144100,5.19,5.2,5.21,5.22,5.23,598000,197400,132900,101200,23900,5.28,16011180000.0,1590390000000000.0,99592418.0
4123989,20200525.0,1603166,150005000000,9028682,56379520.0,6.46,6.46,6.45,6.44,6.43,6.42,4023522,15800,5100,5000,1800,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,5.9,16031660000.0,1590390000000000.0,99586420.0
4123990,20200525.0,1603166,150005000000,9028682,56379520.0,6.46,6.46,6.45,6.44,6.43,6.42,4023522,15800,5100,5000,1800,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,5.9,16031660000.0,1590390000000000.0,99592442.0


'1.85%'

-------------------------------------------------------------------------------------------
SZ lv2


7552785

7384014

7552785

7552785

7384014

SZ lv2 is complete


'2.23%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'1.76%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.76%'

-----------------------------------------------------------------------------------------------------
SH index data


880431

880431

880431

14639

16246

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:03:59.861906
0:00:50.651043


0

86824427

86824427

86824427

20200525finished
--------------------------------------------------------------------------------------------
20200526
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.88%'

-------------------------------------------------------------------------------------------
SZ lv2


7819501

7653740

7819501

7819501

7653740

SZ lv2 is complete


'2.12%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'1.20%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.49%'

-----------------------------------------------------------------------------------------------------
SH index data


899021

899021

899021

14656

17719

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'1%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:08.863256
0:00:53.422529


0

89315068

89315068

89315068

20200526finished
--------------------------------------------------------------------------------------------
20200527
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
1171413,20200527.0,1600373,150023000000,9887219,110627400.0,11.23,11.22,11.21,11.2,11.19,11.18,40800,13600,43200,26600,1500,11.23,11.24,11.25,11.26,11.27,17200,25700,15700,37280,22100,11.41,16003730000.0,1590563000000000.0,108156565.0
1171414,20200527.0,1600373,150023000000,9887219,110627400.0,11.23,11.22,11.21,11.2,11.19,11.18,40800,13600,43200,26600,1500,11.23,11.24,11.25,11.26,11.27,17200,25700,15700,37280,22100,11.41,16003730000.0,1590563000000000.0,108156813.0
1171415,20200527.0,1600373,150023000000,9887219,110627400.0,11.23,11.22,11.21,11.2,11.19,11.18,40800,13600,43200,26600,1500,11.23,11.24,11.25,11.26,11.27,17200,25700,15700,37280,22100,11.41,16003730000.0,1590563000000000.0,108156565.0
1171416,20200527.0,1600373,150023000000,9887219,110627400.0,11.23,11.22,11.21,11.2,11.19,11.18,40800,13600,43200,26600,1500,11.23,11.24,11.25,11.26,11.27,17200,25700,15700,37280,22100,11.41,16003730000.0,1590563000000000.0,108156813.0
2113289,20200527.0,1600685,150023000000,18024628,301823100.0,16.56,16.55,16.54,16.53,16.52,16.51,26900,35600,31200,43500,37800,16.56,16.57,16.58,16.59,16.6,382000,20100,12100,10800,25500,16.93,16006850000.0,1590563000000000.0,108156594.0
2113290,20200527.0,1600685,150023000000,18024628,301823100.0,16.56,16.55,16.54,16.53,16.52,16.51,26900,35600,31200,43500,37800,16.56,16.57,16.58,16.59,16.6,382000,20100,12100,10800,25500,16.93,16006850000.0,1590563000000000.0,108156814.0
2113291,20200527.0,1600685,150023000000,18024628,301823100.0,16.56,16.55,16.54,16.53,16.52,16.51,26900,35600,31200,43500,37800,16.56,16.57,16.58,16.59,16.6,382000,20100,12100,10800,25500,16.93,16006850000.0,1590563000000000.0,108156594.0
2113292,20200527.0,1600685,150023000000,18024628,301823100.0,16.56,16.55,16.54,16.53,16.52,16.51,26900,35600,31200,43500,37800,16.56,16.57,16.58,16.59,16.6,382000,20100,12100,10800,25500,16.93,16006850000.0,1590563000000000.0,108156814.0


'1.85%'

-------------------------------------------------------------------------------------------
SZ lv2


7845046

7677783

7845046

7845046

7677783

SZ lv2 is complete


'2.13%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'0.76%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.37%'

-----------------------------------------------------------------------------------------------------
SH index data


941379

941379

941377

15154

16833

'100.00%'

92 have unique values not shared by database


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'8%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:18.874923
0:00:54.645088


0

94845515

94845515

94845515

20200527finished
--------------------------------------------------------------------------------------------
20200528
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
45414,20200528.0,1600015,150025000000,13188834,84326873.88,6.39,6.38,6.37,6.36,6.35,6.34,422554,574800,208200,175500,82700,6.39,6.4,6.41,6.42,6.43,39434,708764,230300,426396,1131134,6.36,16000150000.0,1590649000000000.0,110880856.0
45415,20200528.0,1600015,150025000000,13188834,84326873.88,6.39,6.38,6.37,6.36,6.35,6.34,422554,574800,208200,175500,82700,6.39,6.4,6.41,6.42,6.43,39434,708764,230300,426396,1131134,6.36,16000150000.0,1590649000000000.0,110881077.0
45416,20200528.0,1600015,150025000000,13188834,84326873.88,6.39,6.38,6.37,6.36,6.35,6.34,422554,574800,208200,175500,82700,6.39,6.4,6.41,6.42,6.43,39434,708764,230300,426396,1131134,6.36,16000150000.0,1590649000000000.0,110880856.0
45417,20200528.0,1600015,150025000000,13188834,84326873.88,6.39,6.38,6.37,6.36,6.35,6.34,422554,574800,208200,175500,82700,6.39,6.4,6.41,6.42,6.43,39434,708764,230300,426396,1131134,6.36,16000150000.0,1590649000000000.0,110881077.0
397099,20200528.0,1600119,150025000000,8026130,43190018.24,5.51,5.5,5.49,5.48,5.47,5.46,36800,30000,2600,600,20000,5.51,0.0,0.0,0.0,0.0,92732,0,0,0,0,5.3,16001190000.0,1590649000000000.0,110880929.0
397100,20200528.0,1600119,150025000000,8026130,43190018.24,5.51,5.5,5.49,5.48,5.47,5.46,36800,30000,2600,600,20000,5.51,0.0,0.0,0.0,0.0,92732,0,0,0,0,5.3,16001190000.0,1590649000000000.0,110881078.0
397101,20200528.0,1600119,150025000000,8026130,43190018.24,5.51,5.5,5.49,5.48,5.47,5.46,36800,30000,2600,600,20000,5.51,0.0,0.0,0.0,0.0,92732,0,0,0,0,5.3,16001190000.0,1590649000000000.0,110880929.0
397102,20200528.0,1600119,150025000000,8026130,43190018.24,5.51,5.5,5.49,5.48,5.47,5.46,36800,30000,2600,600,20000,5.51,0.0,0.0,0.0,0.0,92732,0,0,0,0,5.3,16001190000.0,1590649000000000.0,110881078.0
456118,20200528.0,1600136,150025000000,4252017,33804300.16,7.96,7.95,7.94,7.93,7.92,7.91,6500,1200,600,7000,16772,7.96,7.97,7.98,7.99,8.0,2000,43900,10500,10600,32200,8.04,16001360000.0,1590649000000000.0,110880866.0
456119,20200528.0,1600136,150025000000,4252017,33804300.16,7.96,7.95,7.94,7.93,7.92,7.91,6500,1200,600,7000,16772,7.96,7.97,7.98,7.99,8.0,2000,43900,10500,10600,32200,8.04,16001360000.0,1590649000000000.0,110881079.0


'1.88%'

-------------------------------------------------------------------------------------------
SZ lv2


8030782

7845789

8030782

8030782

7845789

SZ lv2 is complete


'2.30%'

----------------------------------------------------------------------------------------------
SH & SZ trade


'trade data is complete'

'1.01%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.39%'

-----------------------------------------------------------------------------------------------------
SH index data


723635

723635

723635

14793

17006

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'3%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:15.356772
0:00:57.574851


0

97326245

97326245

97326245

20200528finished
--------------------------------------------------------------------------------------------
20200529
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo


'1.90%'

-------------------------------------------------------------------------------------------
SZ lv2


7861475

7702353

7861475

7861475

7702353

SZ lv2 is complete


'2.02%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'1.15%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.57%'

-----------------------------------------------------------------------------------------------------
SH index data


738697

738697

738697

14276

16458

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'5%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:04:58.384419
0:00:56.296228


0

93734087

93734087

93734087

20200529finished


In [18]:
p2[p2['num'].duplicated(keep=False)]

Unnamed: 0,skey,date,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
100414,1603499,20200311.0,150006000000,1909424,30524037.52,15.66,15.66,15.65,15.64,15.63,15.62,2000,11700,2000,5100,10100,15.67,15.68,15.7,15.71,15.72,6900,9900,1000,1400,400,16.35,16034990000.0,1583910000000000.0,152274826.0
100415,1603499,20200311.0,150006000000,1909424,30524037.52,15.66,15.66,15.65,15.64,15.63,15.62,2000,11700,2000,5100,10100,15.67,15.68,15.7,15.71,15.72,6900,9900,1000,1400,400,16.35,16034990000.0,1583910000000000.0,152280611.0
100416,1603499,20200311.0,150006000000,1909424,30524037.52,15.66,15.66,15.65,15.64,15.63,15.62,2000,11700,2000,5100,10100,15.67,15.68,15.7,15.71,15.72,6900,9900,1000,1400,400,16.35,16034990000.0,1583910000000000.0,152274826.0
100417,1603499,20200311.0,150006000000,1909424,30524037.52,15.66,15.66,15.65,15.64,15.63,15.62,2000,11700,2000,5100,10100,15.67,15.68,15.7,15.71,15.72,6900,9900,1000,1400,400,16.35,16034990000.0,1583910000000000.0,152280611.0
103309,1603579,20200311.0,150006000000,808027,23182335.35,28.39,28.39,28.38,28.31,28.3,28.28,1000,800,200,383,600,28.4,28.42,28.5,28.55,28.57,1900,1900,1400,400,500,28.77,16035790000.0,1583910000000000.0,152274807.0
103310,1603579,20200311.0,150006000000,808027,23182335.35,28.39,28.39,28.38,28.31,28.3,28.28,1000,800,200,383,600,28.4,28.42,28.5,28.55,28.57,1900,1900,1400,400,500,28.77,16035790000.0,1583910000000000.0,152280597.0
103311,1603579,20200311.0,150006000000,808027,23182335.35,28.39,28.39,28.38,28.31,28.3,28.28,1000,800,200,383,600,28.4,28.42,28.5,28.55,28.57,1900,1900,1400,400,500,28.77,16035790000.0,1583910000000000.0,152274807.0
103312,1603579,20200311.0,150006000000,808027,23182335.35,28.39,28.39,28.38,28.31,28.3,28.28,1000,800,200,383,600,28.4,28.42,28.5,28.55,28.57,1900,1900,1400,400,500,28.77,16035790000.0,1583910000000000.0,152280597.0


In [16]:
p21[p21.duplicated(['skey', 'time'], keep=False)]

Unnamed: 0,clockAtArrival,sequenceNo,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open
6888692,1583910000000000.0,152274807.0,1603579,150006000000,804527,23082970.35,28.3,28.39,0.0,0.0,0.0,0.0,3500,1000,0,0,0,28.39,0.0,0.0,0.0,0.0,3500,0,0,0,0,28.77
6888693,1583910000000000.0,152280597.0,1603579,150006000000,804527,23082970.35,28.3,28.39,0.0,0.0,0.0,0.0,3500,1000,0,0,0,28.39,0.0,0.0,0.0,0.0,3500,0,0,0,0,28.77
6888694,1583910000000000.0,152274826.0,1603499,150006000000,1866024,29844393.52,15.63,15.66,0.0,0.0,0.0,0.0,43400,2000,0,0,0,15.66,0.0,0.0,0.0,0.0,43400,0,0,0,0,16.35
6888695,1583910000000000.0,152280611.0,1603499,150006000000,1866024,29844393.52,15.63,15.66,0.0,0.0,0.0,0.0,43400,2000,0,0,0,15.66,0.0,0.0,0.0,0.0,43400,0,0,0,0,16.35


In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()


import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)


startDate = 20200430
endDate = 20200430
database_name = 'com_md_eq_cn'
user = "zhenyuy"
password = "bnONBrzSMGoE"

db = DB("192.168.10.178", database_name, user, password)
test = db.read('md_index', start_date=startDate, end_date=endDate, symbol=[1000300])
date_list = test['date'].unique()
del test

new_trade_data = []
new_order_data = []

for i in date_list:
    print('--------------------------------------------------------------------------------------------')
    print(i)
    print('SH lv2')
    startDate = str(i)
    endDate = str(i)
    db = DB("192.168.10.178", database_name, user, password)
    SH = db.read('md_snapshot_l2', start_date=startDate, end_date=endDate)
    SZ = SH[SH['skey'] > 2000000]
    SH = SH[SH['skey'] < 2000000]
    SH['num'] = SH['skey'] * 10000 + SH['ordering']
    SZ['num'] = SZ['skey'] * 10000 + SZ['ordering']
    
    SH = SH[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]
    SZ = SZ[['date', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SH_***'))
        SH1 = pd.read_csv(path1[0])
        index1 = SH1[SH1['StockID'].isin([16, 300, 852, 905])]
        SH1 = SH1[SH1['source'] == 4]

        SH1['skey'] = SH1['StockID'] + 1000000
        SH1 = SH1.rename(columns={"openPrice":"open"})
        SH1["open"] = np.where(SH1["cum_volume"] > 0, SH1.groupby("skey")["open"].transform("max"), SH1["open"])
        SH1["time"] = SH1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SH1 = SH1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount', "close", 'open']:
        SH1[cols] = SH1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SH1 = SH1[SH1['skey'].isin(SH['skey'].unique())]
    re = pd.merge(SH, SH1, on=cols, how='outer')

    p21 = re[(re['date'].isnull())][['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    p22 = re[(re['sequenceNo'].isnull())][["skey", "date", "time", 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open", 'num']]

    p11 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)]
    p12 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())].drop_duplicates(['num'], keep=False)
    p11 = p11.sort_values(by=['num', 'sequenceNo'])
    display(p11)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p11_1 = re[(~re['sequenceNo'].isnull()) & (~re['date'].isnull())][re[(~re['sequenceNo'].isnull()) 
                                                        & (~re['date'].isnull())]['num'].duplicated(keep=False)].drop_duplicates('num')
    p11_1 = pd.merge(p11_1, p11[['num', 'order1']], on='num', how='left')
    p11_1 = p11_1[p11_1['order1'].isnull()]
    p11_1['sequenceNo'] = np.nan
    p11_1['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p11_1.drop(['order1'],axis=1,inplace=True)
    p11 = pd.concat([p11, p11_1])
    
    p1 = pd.concat([p11, p12])
    p2 = pd.merge(p22, p21[['skey', 'time', 'clockAtArrival', 'sequenceNo']], on=['skey', 'time'], how='left')
    re1 = pd.concat([p1, p2])
    re1 = re1.sort_values(by='num')
    re1['seq1'] = re1.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(SH['skey'].unique()) - set(SH1['skey'].unique()))
    re1.loc[re1['skey'].isin(sl), 'seq1'] = np.nan
    re1['count1'] = re1.groupby(['seq1']).cumcount()
    re1['count2'] = re1.groupby(['seq1'])['count1'].transform('nunique')
    re1['min_seq'] = re1.groupby('skey')['sequenceNo'].transform('min')
    re1['count'] = np.where(re1['seq1'] != re1['min_seq'], re1['count1'], re1['count1']+1-re1['count2'])
    re1.drop(["min_seq"],axis=1,inplace=True)
    re1.drop(["count1"],axis=1,inplace=True)
    re1.drop(["count2"],axis=1,inplace=True)
    re1['dup'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo']).cumcount(), 0)
    re1['dup1'] = np.where(~re1["sequenceNo"].isnull(), re1.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
    re1['nan'] = np.where((re1['sequenceNo'].isnull()) | (re1['dup'] != 0), 1, 0)
    re1.loc[(re1['dup1'] > 1) & (re1['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re1[re1['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re1[re1['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re1.shape[0] == SH.shape[0])

    display('%.2f%%' % (re1[re1['sequenceNo'].isnull()].shape[0]/re1.shape[0] * 100))
    
    
    
    
    
    
    
    print('-------------------------------------------------------------------------------------------')
    print('SZ lv2')
    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]


    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdLog_SZ_***'))
        SZ1 = pd.read_csv(path1[0])
        SZ1 = SZ1[SZ1['source'] == 4]

        SZ1['skey'] = SZ1['StockID'] + 2000000
        SZ1 = SZ1.rename(columns={"openPrice":"open"})
        SZ1["open"] = np.where(SZ1["cum_volume"] > 0, SZ1.groupby("skey")["open"].transform("max"), SZ1["open"])
        SZ1["time"] = SZ1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)

    SZ1 = SZ1[['clockAtArrival', 'sequenceNo', 'skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]]
    for cols in ['cum_amount']:
        SZ1[cols] = SZ1[cols].round(2)
    cols = ['skey', 'time', 'cum_volume', 'cum_amount', "close", "bid1p", "bid2p", "bid3p", "bid4p", "bid5p", "bid1q", "bid2q",
               "bid3q", "bid4q", "bid5q", "ask1p", "ask2p", "ask3p", "ask4p", "ask5p", "ask1q", "ask2q", "ask3q",
               "ask4q", "ask5q", "open"]
    SZ1 = SZ1[SZ1['skey'].isin(SZ['skey'].unique())]
    re = pd.merge(SZ, SZ1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(SZ.shape[0])
    display(SZ1.shape[0])

    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('SZ lv2 is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        print('92 have unique values not shared by database')
        re = pd.merge(SZ, SZ1, on=cols, how='left')

    if re[re.duplicated('num', keep=False)].shape[0] == 0:
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))


    else:
        p1 = re[re['num'].duplicated(keep=False)]
        p2 = re.drop_duplicates(['num'], keep=False)
        p1["order1"] = p1.groupby(["num"]).cumcount()
        p1["order2"] = p1.groupby(["sequenceNo"]).cumcount()
        p1 = p1[p1['order1'] == p1['order2']]
        p1.drop(['order1', 'order2'],axis=1,inplace=True)
        re = pd.concat([p1, p2])
        re2 = re.sort_values(by='num')
        re2['seq1'] = re2.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(SZ['skey'].unique()) - set(SZ1['skey'].unique()))
        re2.loc[re2['skey'].isin(sl), 'seq1'] = np.nan
        re2['count1'] = re2.groupby(['seq1']).cumcount()
        re2['count2'] = re2.groupby(['seq1'])['count1'].transform('nunique')
        re2['min_seq'] = re2.groupby('skey')['sequenceNo'].transform('min')
        re2['count'] = np.where(re2['seq1'] != re2['min_seq'], re2['count1'], re2['count1']+1-re2['count2'])
        re2.drop(["min_seq"],axis=1,inplace=True)
        re2.drop(["count1"],axis=1,inplace=True)
        re2.drop(["count2"],axis=1,inplace=True)
        re2['dup'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo']).cumcount(), 0)
        re2['dup1'] = np.where(~re2["sequenceNo"].isnull(), re2.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re2['nan'] = np.where((re2['sequenceNo'].isnull()) | (re2['dup'] != 0), 1, 0)
        re2.loc[(re2['dup1'] > 1) & (re2['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re2[re2['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re2[re2['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re2.shape[0] == SZ.shape[0])

        display('%.2f%%' % (re2[re2['sequenceNo'].isnull()].shape[0]/re2.shape[0] * 100))
    
    print('----------------------------------------------------------------------------------------------')
    print('SH & SZ trade')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    trade = db.read('md_trade', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdTradeLog***'))
        trade1 = pd.read_csv(path1[0])
    trade1['skey'] = np.where(trade1['exchId'] == 2, trade1['SecurityID'] + 2000000, trade1['SecurityID'] + 1000000)
    trade1 = trade1[trade1['skey'].isin(trade['skey'].unique())]
    re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == trade.shape[0])
        display('trade data is complete')
        k = 0
    except:
        display('%.2f%%' % (trade.shape[0]/re.shape[0] * 100))
        k = 1
        display('trade data incomplete')
        k1 = pd.merge(trade1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k1.shape[0])
        display(k1['ExecType'].unique())
        display(k1['TransactTime'].unique())
        k1['date'] = trade['date'].iloc[0]
        new_trade_data += [k1[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'ExecType', 'TradeBSFlag', 
       'TradePrice', 'TradeQty', 'BidApplSeqNum', 'OfferApplSeqNum']]]
        re = pd.merge(trade, trade1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == trade.shape[0])

    re3 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re3['seq1'] = re3.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(trade['skey'].unique()) - set(trade1['skey'].unique()))
    re3.loc[re3['skey'].isin(sl), 'seq1'] = np.nan
    re3['count1'] = re3.groupby(['seq1']).cumcount()
    re3['count2'] = re3.groupby(['seq1'])['count1'].transform('nunique')
    re3['min_seq'] = re3.groupby('skey')['sequenceNo'].transform('min')
    re3['cc'] = np.where(re3['sequenceNo'] == re3['seq1'], re3['count1'], 0)
    re3['cc'] = re3.groupby(['seq1'])['cc'].transform('max')
    re3['count'] = re3['count1']-re3['cc']
    re3.drop(["min_seq"],axis=1,inplace=True)
    re3.drop(["count1"],axis=1,inplace=True)
    re3.drop(["count2"],axis=1,inplace=True)
    re3.drop(["cc"],axis=1,inplace=True)
    re3['dup'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo']).cumcount(), 0)
    re3['dup1'] = np.where(~re3["sequenceNo"].isnull(), re3.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re3['nan'] = np.where((re3['sequenceNo'].isnull()) | (re3['dup'] != 0), 1, 0)
    re3.loc[(re3['dup1'] > 1) & (re3['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re3[re3['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re3[re3['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re3.shape[0] == trade.shape[0])
    if k == 1:
        k1['seq1'] = k1['sequenceNo']
        k1['count'] = 0
        k1['nan'] = 0
        k1['dup1'] = 1
        re3 = pd.concat([re3, k1[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', 'dup1']]])

    display('%.2f%%' % (re3[re3['sequenceNo'].isnull()].shape[0]/re3.shape[0] * 100))

    
    print('--------------------------------------------------------------------------------------------------')
    print('SZ order data')

    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    order = db.read('md_order', start_date=startDate, end_date=endDate)[['skey', 'date', 'ApplSeqNum']]

    startDate = str(i)
    endDate = str(i)

    readPath = '/mnt/e/result/logs_***_zs_92_01_day_data'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([os.path.basename(i).split('_')[1] for i in dataPathLs])
    dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
    for n in range(len(dataPathLs)):
        path1 = np.array(glob.glob(dataPathLs[n] + '/mdOrderLog***'))
        order1 = pd.read_csv(path1[0])
    order1['skey'] = order1['SecurityID'] + 2000000
    order1 = order1[order1['skey'].isin(order['skey'].unique())]
    re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='outer')
    try:
        assert(re.shape[0] == order.shape[0])
        display('order data is complete')
        k = 0
    except:
        display('%.2f%%' % (order.shape[0]/re.shape[0] * 100))
        k = 1
        display('order data incomplete')
        k2 = pd.merge(order1, re[re['date'].isnull()][['skey', 'ApplSeqNum']], on=['skey', 'ApplSeqNum'], how='right')
        display(k2.shape[0])
        display(k2['SecurityID'].unique())
        display(k2['TransactTime'].unique())
        k2['date'] = order['date'].iloc[0]
        new_order_data += [k2[['clockAtArrival', 'sequenceNo', 'TransactTime', 'ApplSeqNum', 'date', 'skey', 'Side', 
       'OrderType', 'Price', 'OrderQty']]]
        re = pd.merge(order, order1[['skey', 'ApplSeqNum', 'sequenceNo', 'clockAtArrival']], on=['skey', 'ApplSeqNum'],
                 how='left')
        assert(re.shape[0] == order.shape[0])

    re4 = re.sort_values(by=['skey', 'ApplSeqNum'])
    re4['seq1'] = re4.groupby('skey')['sequenceNo'].ffill().bfill()
    sl = list(set(order['skey'].unique()) - set(order1['skey'].unique()))
    re4.loc[re4['skey'].isin(sl), 'seq1'] = np.nan
    re4['count1'] = re4.groupby(['seq1']).cumcount()
    re4['count2'] = re4.groupby(['seq1'])['count1'].transform('nunique')
    re4['min_seq'] = re4.groupby('skey')['sequenceNo'].transform('min')
    re4['count'] = np.where(re4['seq1'] != re4['min_seq'], re4['count1'], re4['count1']+1-re4['count2'])
    re4.drop(["min_seq"],axis=1,inplace=True)
    re4.drop(["count1"],axis=1,inplace=True)
    re4.drop(["count2"],axis=1,inplace=True)
    re4['dup'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo']).cumcount(), 0)
    re4['dup1'] = np.where(~re4["sequenceNo"].isnull(), re4.groupby(['sequenceNo'])['ApplSeqNum'].transform('nunique'), 0)
    re4['nan'] = np.where((re4['sequenceNo'].isnull()) | (re4['dup'] != 0), 1, 0)
    re4.loc[(re4['dup1'] > 1) & (re4['dup'] > 0), 'sequenceNo'] = np.nan
    assert((len(set(sl) - set(re4[re4['seq1'].isnull()]['skey'].unique())) == 0) & 
           (len(set(re4[re4['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
    assert(re4.shape[0] == order.shape[0])
    if k == 1:
        k2['seq1'] = k2['ApplSeqNum']
        k2['count'] = 0
        k2['nan'] = 0
        k2['dup1'] = 1
        re4 = pd.concat([re4, k2[['clockAtArrival', 'date', 'sequenceNo', 'skey', 'ApplSeqNum', 'seq1', 
                                  'count', 'nan', "dup1"]]])


    display('%.2f%%' % (re4[re4['sequenceNo'].isnull()].shape[0]/re4.shape[0] * 100))
    
    print('-----------------------------------------------------------------------------------------------------')
    print('SH index data')
    
    startDate = str(i)
    endDate = str(i)
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db = DB("192.168.10.178", database_name, user, password)
    index = db.read('md_index', start_date=startDate, end_date=endDate)

    index1['skey'] = index1['StockID'] + 1000000
    index1 = index1.rename(columns={"openPrice":"open"})
    index1["open"] = np.where(index1["cum_volume"] > 0, index1.groupby("skey")["open"].transform("max"), index1["open"])
    index1['close'] = np.where(index1['cum_volume'] == 0, 0, index1['close'])
    index1["time"] = index1["time"].apply(lambda x: int((x.replace(':', "")).replace(".", "")) * 1000)
    index['close'] = np.where(index['cum_volume'] == 0, 0, index['close'])
    index['num'] = index['skey'] * 10000 + index['ordering']
    index = index[['skey', 'date', 'cum_volume', 'cum_amount', "close", "open", 'num']]
    index1 = index1[['clockAtArrival', 'sequenceNo', 'skey', 'cum_volume', 'cum_amount', "close", "open", "time"]]
    for cols in ['cum_amount']:
        index1[cols] = index1[cols].round(1)
    cols = ['skey', 'cum_volume', 'cum_amount', "close", "open"]
    index1 = index1[index1['skey'].isin(index['skey'].unique())]
    re = pd.merge(index, index1, on=cols, how='outer')

    display(re.shape[0])
    display(re[~re['sequenceNo'].isnull()].shape[0])
    display(re[~re['date'].isnull()].shape[0])
    display(index.shape[0])
    display(index1.shape[0])
    
    try:
        assert(re.shape[0] == re[~re['date'].isnull()].shape[0])
        print('index data is complete')
    except:
        display('%.2f%%' % (re[~re['date'].isnull()].shape[0]/re.shape[0] * 100))
        re = pd.merge(index, index1, on=cols, how='left')
        print('92 have unique values not shared by database')

    p11 = re[re.duplicated('num', keep=False)]
    p2 = re.drop_duplicates('num', keep=False)
    p11["order1"] = p11.groupby(["num"]).cumcount()
    p11["order2"] = p11.groupby(["sequenceNo"]).cumcount()
    p11 = p11[p11['order1'] == p11['order2']]

    p12 = re[re.duplicated('num', keep=False)].drop_duplicates('num')
    p12 = pd.merge(p12, p11[['num', 'order1']], on='num', how='left')
    p12 = p12[p12['order1'].isnull()]
    p12['sequenceNo'] = np.nan
    p12['clockAtArrival'] = np.nan

    p11.drop(['order1', 'order2'],axis=1,inplace=True)
    p12.drop(['order1'],axis=1,inplace=True)
    p1 = pd.concat([p11, p12])

    re = pd.concat([p1, p2])
    assert(re[re.duplicated('num', keep=False)].shape[0] == 0)

    if re[re['sequenceNo'].isnull()].shape[0] != 0:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5.groupby('skey')['sequenceNo'].ffill().bfill()
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    else:
        re5 = re.sort_values(by='num')
        re5['seq1'] = re5['sequenceNo']
        sl = list(set(index['skey'].unique()) - set(index1['skey'].unique()))
        re5.loc[re5['skey'].isin(sl), 'seq1'] = np.nan
        re5['count1'] = re5.groupby(['seq1']).cumcount()
        re5['count2'] = re5.groupby(['seq1'])['count1'].transform('nunique')
        re5['min_seq'] = re5.groupby('skey')['sequenceNo'].transform('min')
        re5['count'] = np.where(re5['seq1'] != re5['min_seq'], re5['count1'], re5['count1']+1-re5['count2'])
        re5.drop(["min_seq"],axis=1,inplace=True)
        re5.drop(["count1"],axis=1,inplace=True)
        re5.drop(["count2"],axis=1,inplace=True)
        re5['dup'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo']).cumcount(), 0)
        re5['dup1'] = np.where(~re5["sequenceNo"].isnull(), re5.groupby(['sequenceNo'])['num'].transform('nunique'), 0)
        re5['nan'] = np.where((re5['sequenceNo'].isnull()) | (re5['dup'] != 0), 1, 0)
        re5.loc[(re5['dup1'] > 1) & (re5['dup'] > 0), 'sequenceNo'] = np.nan
        assert((len(set(sl) - set(re5[re5['seq1'].isnull()]['skey'].unique())) == 0) & 
               (len(set(re5[re5['seq1'].isnull()]['skey'].unique()) - set(sl)) == 0))
        assert(re5.shape[0] == index.shape[0])

        display('%.0f%%' % (re5[re5['sequenceNo'].isnull()].shape[0]/re5.shape[0] * 100))
    
    
    print('----------------------------------------------------------------------------------------------------')
    print('final concat')
    try:
        assert(len(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
        display(SH1[SH1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(SH1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SZ1[SZ1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SZ1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
        display(trade1[trade1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(trade1['sequenceNo'])))])
    try:
        assert(len(set(SH1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(SH1[SH1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(SH1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(order1['sequenceNo'])))])
    try:
        assert(len(set(trade1['sequenceNo']) & set(index1['sequenceNo'])) == 0)
    except:
        display(trade1[trade1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
        display(index1[index1['sequenceNo'].isin(list(set(trade1['sequenceNo']) & set(index1['sequenceNo'])))])
    try:
        assert(len(set(index1['sequenceNo']) & set(order1['sequenceNo'])) == 0)
    except:
        display(index1[index1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])
        display(order1[order1['sequenceNo'].isin(list(set(index1['sequenceNo']) & set(order1['sequenceNo'])))])

    del SH
    del SH1
    del SZ
    del SZ1
    del trade
    del trade1
    del order
    del order1
    del index
    del index1
    re1['tag'] = 'SH'
    re2['tag'] = 'SZ'
    re3['tag'] = 'trade'
    re4['tag'] = 'order'
    re5['tag'] = 'index'
    
    re1 = re1[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re2 = re2[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re3 = re3[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re4 = re4[['skey', 'date', 'ApplSeqNum', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re5 = re5[['skey', 'date', 'num', 'sequenceNo', 'seq1', 'clockAtArrival', 'nan', 'count', 'tag', 'dup1']]
    re1 = re1.sort_values(by='num').reset_index(drop=True)
    re1['seq2'] = re1.index
    re2 = re2.sort_values(by='num').reset_index(drop=True)
    re2['seq2'] = re2.index
    re3 = re3.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re3['seq2'] = re3.index
    re4 = re4.sort_values(by=['skey', 'ApplSeqNum']).reset_index(drop=True)
    re4['seq2'] = re4.index
    re5 = re5.sort_values(by='num').reset_index(drop=True)
    re5['seq2'] = re5.index

    fr1 = []
    fr2 = []
    fr1 += [re1[re1['seq1'].isnull()]]
    fr2 += [re1[~re1['seq1'].isnull()]]
    del re1
    display('1. here~')
    fr1 += [re2[re2['seq1'].isnull()]]
    fr2 += [re2[~re2['seq1'].isnull()]]
    del re2
    display('2. here~')
    fr1 += [re3[re3['seq1'].isnull()]]
    fr2 += [re3[~re3['seq1'].isnull()]]
    del re3
    display('3. here~')
    fr1 += [re4[re4['seq1'].isnull()]]
    fr2 += [re4[~re4['seq1'].isnull()]]
    del re4
    display('4. here~')
    fr1 += [re5[re5['seq1'].isnull()]]
    fr2 += [re5[~re5['seq1'].isnull()]]
    del re5
    display('5. here~')
    fr1 = pd.concat(fr1).reset_index(drop=True)
    fr2 = pd.concat(fr2).reset_index(drop=True)
    
    startTm = datetime.datetime.now()
    fr2 = fr2.sort_values(by=['seq1', 'seq2'])
    print(datetime.datetime.now() - startTm)

#     fr2.loc[(fr2['nan']==0) & (fr2['dup1']==1), 'count'] = 0
    fr2['sum_nan'] = fr2['nan'].cumsum()
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['sum_nan']
    startTm = datetime.datetime.now()
    fr2['sequenceNo'] = fr2.groupby('seq1')['sequenceNo'].ffill().bfill()
    print(datetime.datetime.now() - startTm)
    fr2['sequenceNo'] = fr2['sequenceNo'] + fr2['count']
    fr21 = fr2[~fr2['sequenceNo'].isnull()]
    fr22 = fr2[fr2['sequenceNo'].isnull()]
    display(fr22.shape[0])
    display(fr21.shape[0])
    display(fr2.shape[0])
    if fr22.shape[0] != 0:
        fr22['sequenceNo'] = range(int(fr21['sequenceNo'].max()) + 1, int(fr21['sequenceNo'].max()) + 1 + fr22.shape[0])
        fr2 = pd.concat([fr21, fr22])
    del fr21
    del fr22
    display(fr2.shape[0])
    try:
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    except:
        te_st = fr2[fr2.duplicated('sequenceNo', keep=False)]
        display(te_st)
        caa = te_st['clockAtArrival'].max()
        seq = te_st['sequenceNo'].iloc[0]
        m_in = fr2[fr2['sequenceNo'] > seq]['sequenceNo'].min()
        if m_in > seq + 1:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 1
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        else:
            fr2.loc[fr2['sequenceNo'] > seq, 'sequenceNo'] = fr2[fr2['sequenceNo'] > seq]['sequenceNo'] + 2
            fr2.loc[(fr2['sequenceNo'] == seq) & (fr2['clockAtArrival'] == caa), 'sequenceNo'] = seq + 1
        assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
        
    
    fr1['sequenceNo'] = range(int(fr2['sequenceNo'].max()) + 1, int(fr2['sequenceNo'].max()) + 1 + fr1.shape[0])
    fr2 = pd.concat([fr1, fr2])
    del fr1
    assert(fr2[fr2.duplicated('sequenceNo', keep=False)].shape[0] == 0)
    
    import pickle
    os.mkdir('/mnt/e/result/' + startDate)
    SH = fr2[fr2['tag'] == 'SH'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SH.to_pickle('/mnt/e/result/' + startDate + '/SH.pkl')
    del SH

    SZ = fr2[fr2['tag'] == 'SZ'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    SZ.to_pickle('/mnt/e/result/' + startDate + '/SZ.pkl')
    del SZ
    
    trade = fr2[fr2['tag'] == 'trade'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    trade.to_pickle('/mnt/e/result/' + startDate + '/trade.pkl')
    del trade
    
    order = fr2[fr2['tag'] == 'order'][["skey", "date", "ApplSeqNum", 'sequenceNo', "clockAtArrival"]]
    order.to_pickle('/mnt/e/result/' + startDate + '/order.pkl')
    del order
    
    index = fr2[fr2['tag'] == 'index'][["skey", "date", "num", 'sequenceNo', "clockAtArrival"]]
    index.to_pickle('/mnt/e/result/' + startDate + '/index.pkl')
    del index
    del fr2
    
    print(str(i) + 'finished')


--------------------------------------------------------------------------------------------
20200430
SH lv2


Unnamed: 0,date,skey,time,cum_volume,cum_amount,close,bid1p,bid2p,bid3p,bid4p,bid5p,bid1q,bid2q,bid3q,bid4q,bid5q,ask1p,ask2p,ask3p,ask4p,ask5p,ask1q,ask2q,ask3q,ask4q,ask5q,open,num,clockAtArrival,sequenceNo
74569,20200430.0,1600021,150004000000,3291143,24187650.0,7.36,7.36,7.35,7.34,7.33,7.32,26236,106100,15900,14500,7000,7.37,7.38,7.39,7.4,7.41,75350,202800,300400,116200,24500,7.25,16000210000.0,1588230000000000.0,117350193.0
74570,20200430.0,1600021,150004000000,3291143,24187650.0,7.36,7.36,7.35,7.34,7.33,7.32,26236,106100,15900,14500,7000,7.37,7.38,7.39,7.4,7.41,75350,202800,300400,116200,24500,7.25,16000210000.0,1588230000000000.0,117356700.0
74571,20200430.0,1600021,150004000000,3291143,24187650.0,7.36,7.36,7.35,7.34,7.33,7.32,26236,106100,15900,14500,7000,7.37,7.38,7.39,7.4,7.41,75350,202800,300400,116200,24500,7.25,16000210000.0,1588230000000000.0,117350193.0
74572,20200430.0,1600021,150004000000,3291143,24187650.0,7.36,7.36,7.35,7.34,7.33,7.32,26236,106100,15900,14500,7000,7.37,7.38,7.39,7.4,7.41,75350,202800,300400,116200,24500,7.25,16000210000.0,1588230000000000.0,117356700.0
986043,20200430.0,1600293,150004000000,8382761,21014780.0,2.54,2.53,2.52,2.51,2.5,2.49,108050,168500,210300,261100,146200,2.54,2.55,2.56,2.57,2.58,22359,615837,123000,98300,131250,2.39,16002930000.0,1588230000000000.0,117349815.0
986044,20200430.0,1600293,150004000000,8382761,21014780.0,2.54,2.53,2.52,2.51,2.5,2.49,108050,168500,210300,261100,146200,2.54,2.55,2.56,2.57,2.58,22359,615837,123000,98300,131250,2.39,16002930000.0,1588230000000000.0,117356701.0
986045,20200430.0,1600293,150004000000,8382761,21014780.0,2.54,2.53,2.52,2.51,2.5,2.49,108050,168500,210300,261100,146200,2.54,2.55,2.56,2.57,2.58,22359,615837,123000,98300,131250,2.39,16002930000.0,1588230000000000.0,117349815.0
986046,20200430.0,1600293,150004000000,8382761,21014780.0,2.54,2.53,2.52,2.51,2.5,2.49,108050,168500,210300,261100,146200,2.54,2.55,2.56,2.57,2.58,22359,615837,123000,98300,131250,2.39,16002930000.0,1588230000000000.0,117356701.0
2570637,20200430.0,1600777,150004000000,109754226,197977200.0,1.8,1.79,1.78,1.77,1.76,1.75,3173600,3339095,4199200,826300,1458300,1.8,1.81,1.82,1.83,1.84,1716225,2429583,4020569,3484740,1848920,1.79,16007780000.0,1588230000000000.0,117350183.0
2570638,20200430.0,1600777,150004000000,109754226,197977200.0,1.8,1.79,1.78,1.77,1.76,1.75,3173600,3339095,4199200,826300,1458300,1.8,1.81,1.82,1.83,1.84,1716225,2429583,4020569,3484740,1848920,1.79,16007780000.0,1588230000000000.0,117356702.0


'1.67%'

-------------------------------------------------------------------------------------------
SZ lv2


8200814

8085204

8200814

8200814

8085204

SZ lv2 is complete


'1.41%'

----------------------------------------------------------------------------------------------
SH & SZ trade


  interactivity=interactivity, compiler=compiler, result=result)


'trade data is complete'

'0.22%'

--------------------------------------------------------------------------------------------------
SZ order data


  interactivity=interactivity, compiler=compiler, result=result)


'order data is complete'

'0.14%'

-----------------------------------------------------------------------------------------------------
SH index data


860229

860229

860229

15140

17227

index data is complete


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


'2%'

----------------------------------------------------------------------------------------------------
final concat


'1. here~'

'2. here~'

'3. here~'

'4. here~'

'5. here~'

0:05:11.021673
0:01:16.995497


0

103449455

103449455

103449455

20200430finished
