In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz


def DB(host, db_name, user, passwd):
    auth_db = db_name if user not in ('admin', 'root') else 'admin'
    uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)
    return DBObj(uri, db_name=db_name)


class DBObj(object):
    def __init__(self, uri, symbol_column='skey', db_name='white_db'):
        self.db_name = db_name
        self.uri = uri
        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        pickle_protocol = 4
        if version == 1:
            return gzip.compress(pickle.dumps(s, protocol=pickle_protocol), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s, protocol=pickle_protocol), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')


def patch_pandas_pickle():
    if pd.__version__ < '0.24':
        import sys
        from types import ModuleType
        from pandas.core.internals import BlockManager
        pkg_name = 'pandas.core.internals.managers'
        if pkg_name not in sys.modules:
            m = ModuleType(pkg_name)
            m.BlockManager = BlockManager
            sys.modules[pkg_name] = m
patch_pandas_pickle()











import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = '/home/work516/day_stock/***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SH' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

year = "2017"
startDate = '20171221'
endDate = '20171231'
readPath = '/mnt/usb/data/' + year + '/***/***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i).split('_')[0] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
date_list = pd.read_csv("/home/work516/KR_upload_code/trading_days.csv")
wr_ong = []
mi_ss = []
less = []

for data in dataPathLs:
    if len(np.array(glob.glob(data + '/SH/***'))) == 0:
        if int(os.path.basename(data)) not in date_list["Date"].values:
            continue
        else:
            print(os.path.basename(data) + " less data!!!!!!!!!!!!!!!!!")
            less.append(data)
            continue
    startTm = datetime.datetime.now()
    date = os.path.basename(data)
    rar_path = data + '/SH/snapshot.7z'
    path = '/mnt/e/unzip_data/2017/SH'
    path1 = path + '/' + date
    un_path = path1
    cmd = '7za x {} -o{}'.format(rar_path, un_path)
    os.system(cmd)
    print(datetime.datetime.now() - startTm)
    print(date + ' unzip finished')

    readPath = path1 + '/snapshot/***2/***'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dataPathLs])
    dataPathLs = dataPathLs[((dateLs >= 600000) & (dateLs <= 700000))]
    SH = []
    ll = []
    startTm = datetime.datetime.now()
    for i in dataPathLs:
        try:
            df = pd.read_csv(i, usecols = [0,1,3,5,7,9,10,11,15,17,18,19,20,21,22,23,25,26,28,29,30,31,32,33,37,39,40,41,
                                          42,46,47,49,50])
        except:
            print("empty data")
            print(i)
            ll.append(int(os.path.basename(i).split('.')[0]))
            continue
        df["StockID"] = int(os.path.basename(i).split('.')[0])
        SH += [df]
    del df
    SH = pd.concat(SH).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    startTm = datetime.datetime.now()
    SH["skey"] = SH["StockID"] + 1000000
    SH.drop(["StockID"],axis=1,inplace=True)
    SH["date"] = int(SH["QuotTime"].iloc[0]//1000000000)
    SH["time"] = (SH['QuotTime'] - int(SH['QuotTime'].iloc[0]//1000000000*1000000000)).astype(np.int64) * 1000
    SH["clockAtArrival"] = SH["QuotTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    SH.drop(["QuotTime"],axis=1,inplace=True)
    SH['datetime'] = SH["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    print(datetime.datetime.now() - startTm)

    startTm = datetime.datetime.now()
    SH["BidPrice"] = SH["BidPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["OfferPrice"] = SH["OfferPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["BidOrderQty"] = SH["BidOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrderQty"] = SH["OfferOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["BidNumOrders"] = SH["BidNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferNumOrders"] = SH["OfferNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 11):
        SH["bid" + str(i) + 'p'] = SH["BidPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["BidPrice"],axis=1,inplace=True)
    print("1")
    for i in range(1, 11):
        SH["ask" + str(i) + 'p'] = SH["OfferPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["OfferPrice"],axis=1,inplace=True)
    print("2")
    for i in range(1, 11):
        SH["bid" + str(i) + 'q'] = SH["BidOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["BidOrderQty"],axis=1,inplace=True)
    print("3")
    for i in range(1, 11):
        SH["ask" + str(i) + 'q'] = SH["OfferOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["OfferOrderQty"],axis=1,inplace=True)
    print("4")
    for i in range(1, 11):
        SH["bid" + str(i) + 'n'] = SH["BidNumOrders"].apply(lambda x: x[i-1])
        SH["bid" + str(i) + 'n'] = SH["bid" + str(i) + 'n'].astype('int32')
    SH.drop(["BidNumOrders"],axis=1,inplace=True)
    print("5")
    for i in range(1, 11):
        SH["ask" + str(i) + 'n'] = SH["OfferNumOrders"].apply(lambda x: x[i-1])
        SH["ask" + str(i) + 'n'] = SH["ask" + str(i) + 'n'].astype('int32') 
    SH.drop(["OfferNumOrders"],axis=1,inplace=True)
    print("6")
    
    SH["BidOrders"] = SH["BidOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrders"] = SH["OfferOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 51):
        SH["bid1Top" + str(i) + 'q'] = SH["BidOrders"].apply(lambda x: x[i-1])
        SH["bid1Top" + str(i) + 'q'] = SH["bid1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["BidOrders"],axis=1,inplace=True)
    print("7")
    
    for i in range(1, 51):
        SH["ask1Top" + str(i) + 'q'] = SH["OfferOrders"].apply(lambda x: x[i-1])
        SH["ask1Top" + str(i) + 'q'] = SH["ask1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["OfferOrders"],axis=1,inplace=True)
    print("8")
    print(datetime.datetime.now() - startTm)
    
    
    startTm = datetime.datetime.now()
    SH.columns = ['cum_trades_cnt', 'ask_trade_max_duration', 'total_bid_orders',
       'cum_canceled_sell_amount', 'total_ask_quantity', 'cum_canceled_buy_orders',
       'total_ask_vwap', 'cum_canceled_sell_volume', 'cum_volume', 'open',
       'high', 'prev_close', 'low', 'total_bid_vwap',
       'cum_canceled_sell_orders', 'total_ask_orders', 'total_ask_levels',
       'total_bid_quantity', 'cum_canceled_buy_volume', 'bid_trade_max_duration',
       'total_bid_levels', 'close', 'cum_amount', 'cum_canceled_buy_amount', 'skey', 'date', 'time', 'clockAtArrival',
       'datetime', 'bid1p', 'bid2p', 'bid3p', 'bid4p', 'bid5p', 'bid6p',
       'bid7p', 'bid8p', 'bid9p', 'bid10p', 'ask1p', 'ask2p', 'ask3p',
       'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p',
       'bid1q', 'bid2q', 'bid3q', 'bid4q', 'bid5q', 'bid6q', 'bid7q',
       'bid8q', 'bid9q', 'bid10q', 'ask1q', 'ask2q', 'ask3q', 'ask4q',
       'ask5q', 'ask6q', 'ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid1n',
       'bid2n', 'bid3n', 'bid4n', 'bid5n', 'bid6n', 'bid7n', 'bid8n',
       'bid9n', 'bid10n', 'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n',
       'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'bid1Top1q',
       'bid1Top2q', 'bid1Top3q', 'bid1Top4q', 'bid1Top5q', 'bid1Top6q',
       'bid1Top7q', 'bid1Top8q', 'bid1Top9q', 'bid1Top10q', 'bid1Top11q',
       'bid1Top12q', 'bid1Top13q', 'bid1Top14q', 'bid1Top15q',
       'bid1Top16q', 'bid1Top17q', 'bid1Top18q', 'bid1Top19q',
       'bid1Top20q', 'bid1Top21q', 'bid1Top22q', 'bid1Top23q',
       'bid1Top24q', 'bid1Top25q', 'bid1Top26q', 'bid1Top27q',
       'bid1Top28q', 'bid1Top29q', 'bid1Top30q', 'bid1Top31q',
       'bid1Top32q', 'bid1Top33q', 'bid1Top34q', 'bid1Top35q',
       'bid1Top36q', 'bid1Top37q', 'bid1Top38q', 'bid1Top39q',
       'bid1Top40q', 'bid1Top41q', 'bid1Top42q', 'bid1Top43q',
       'bid1Top44q', 'bid1Top45q', 'bid1Top46q', 'bid1Top47q',
       'bid1Top48q', 'bid1Top49q', 'bid1Top50q', 'ask1Top1q', 'ask1Top2q',
       'ask1Top3q', 'ask1Top4q', 'ask1Top5q', 'ask1Top6q', 'ask1Top7q',
       'ask1Top8q', 'ask1Top9q', 'ask1Top10q', 'ask1Top11q', 'ask1Top12q',
       'ask1Top13q', 'ask1Top14q', 'ask1Top15q', 'ask1Top16q',
       'ask1Top17q', 'ask1Top18q', 'ask1Top19q', 'ask1Top20q',
       'ask1Top21q', 'ask1Top22q', 'ask1Top23q', 'ask1Top24q',
       'ask1Top25q', 'ask1Top26q', 'ask1Top27q', 'ask1Top28q',
       'ask1Top29q', 'ask1Top30q', 'ask1Top31q', 'ask1Top32q',
       'ask1Top33q', 'ask1Top34q', 'ask1Top35q', 'ask1Top36q',
       'ask1Top37q', 'ask1Top38q', 'ask1Top39q', 'ask1Top40q',
       'ask1Top41q', 'ask1Top42q', 'ask1Top43q', 'ask1Top44q',
       'ask1Top45q', 'ask1Top46q', 'ask1Top47q', 'ask1Top48q',
       'ask1Top49q', 'ask1Top50q']
    SH = SH.fillna(0)
#     SH["p1"] = SH["bid1p"] + SH["ask1p"]
#     tt = SH[(SH["cum_volume"] > 0) & (SH["time"] < 145700000000)].groupby("skey")['p1'].min()
#     SH.drop("p1", axis=1, inplace=True)
#     try:
#         assert(tt[tt == 0].shape[0] == 0)
#     except:
#         display(tt[tt == 0])
#     SH = SH[~((SH["bid1p"] == 0) & (SH["ask1p"] == 0))]
    SH["ordering"] = SH.groupby("skey").cumcount()
    SH["ordering"] = SH["ordering"] + 1
    
    SH["has_missing"] = 0
    
    for col in ["skey", "date", "cum_trades_cnt", "total_bid_orders",
        'total_ask_orders', 'total_bid_levels', 'total_ask_levels', 'cum_canceled_buy_orders','cum_canceled_sell_orders',
            "ordering", 'bid_trade_max_duration', 'ask_trade_max_duration','has_missing']:
        SH[col] = SH[col].astype('int32')
    
#     for cols in ["prev_close", 'open', "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p',
#              'bid2p','bid1p','ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p']:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
    
#     for cols in ['cum_amount', "cum_canceled_sell_amount", "cum_canceled_buy_amount"]:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        
    for cols in ['total_bid_vwap', "total_ask_vwap"]:
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        SH[cols] = SH[cols].apply(lambda x: round(x, 3))
        
   
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    SH["prev_close"] = np.where(SH["time"] >= 91500000000, SH.groupby("skey")["prev_close"].transform("max"), SH["prev_close"]) 
    SH["open"] = np.where(SH["cum_volume"] > 0, SH.groupby("skey")["open"].transform("max"), SH["open"])
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    assert(SH[SH["cum_volume"] > 0]["open"].min() > 0)
    print(datetime.datetime.now() - startTm)
    
    
    # check 1
    startTm = datetime.datetime.now()
    da_te = str(SH["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["ID"] = db1["ID"].str[2:].astype(int) + 1000000
    db1["date"] = (db1["date"].str[:4] + db1["date"].str[5:7] + db1["date"].str[8:]).astype(int)
    SH["cum_max"] = SH.groupby("skey")["cum_volume"].transform(max)
    s2 = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey").first().reset_index()
    dd = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey")["time"].first().reset_index()
    SH.drop("cum_max", axis=1, inplace=True)
    s2 = s2.rename(columns={"skey": "ID", 'open':"d_open", "prev_close":"d_yclose","high":"d_high", "low":"d_low", "close":"d_close", "cum_volume":"d_volume", "cum_amount":"d_amount"})
    if SH["date"].iloc[0] < 20180820:
        s2["auction"] = 0
    else:
        dd["auction"] = np.where(dd["time"]<=145700000000, 0, 1)
        dd = dd.rename(columns={"skey": "ID"})
        s2 = pd.merge(s2, dd[["ID", "auction"]], on="ID")
    s2 = s2[["ID", "date", "d_open", "d_yclose", "d_high", "d_low", "d_close", "d_volume", "d_amount", "auction"]]
    re = pd.merge(db1, s2, on=["ID", "date", "d_open", "d_yclose","d_high", "d_low", "d_volume"], how="outer")
    try:
        assert(sum(re["d_amount_y"].isnull()) == 0)
    except:
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print(re[re["d_amount_y"].isnull()])
        wr_ong += [re[re["d_amount_y"].isnull()]]
    print(datetime.datetime.now() - startTm)
    
    # check 2
    # first part
    startTm = datetime.datetime.now()
    date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
    date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
    date["group"] = date["time"]//10000
    SH["group"] = SH["time"]//10000000
    gl = date[((date["time"] >= 93000000) & (date["time"] <= 113000000))|((date["time"] >= 130000000) & (date["time"] <= 150000000))]["group"].unique()
    l = set(gl) - set(SH["group"].unique())
    SH["has_missing1"] = 0 
    if len(l) != 0:
        print("massive missing")
        print(l)
        SH["order"] = SH.groupby(["skey", "time"]).cumcount()
        for i in l:
            SH["t"] = SH[SH["group"] > i].groupby("StockID")["time"].transform("min")
            SH["has_missing1"] = np.where((SH["time"] == SH["t"]) & (SH["order"] == 0), 1, 0)
        SH.drop(["order", "t", "group"], axis=1, inplace=True)   
    else:
        print("no massive missing")
        SH.drop(["group"], axis=1, inplace=True)
    



    # second part

    SH["time_interval"] = SH.groupby("skey")["datetime"].apply(lambda x: x - x.shift(1))
    SH["time_interval"] = SH["time_interval"].apply(lambda x: x.seconds)
    SH["tn_update"] = SH.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

    f1 = SH[(SH["time"] >= 93000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f1 = f1.rename(columns={"time": "time1"})
    f2 = SH[(SH["time"] >= 130000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f2 = f2.rename(columns={"time": "time2"})
    f3 = SH[(SH["time"] >= 150000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f3 = f3.rename(columns={"time": "time3"})
    SH = pd.merge(SH, f1, on="skey", how="left")
    del f1
    SH = pd.merge(SH, f2, on="skey", how="left")
    del f2
    SH = pd.merge(SH, f3, on="skey", how="left")
    del f3
    p99 = SH[(SH["time"] > 93000000000) & (SH["time"] < 145700000000) & (SH["time"] != SH["time2"]) & (SH["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).reset_index()
    p99 = p99.rename(columns={"tn_update":"99%"})
    SH = pd.merge(SH, p99, on="skey", how="left")

    SH["has_missing2"] = 0
    SH["has_missing2"] = np.where((SH["time_interval"] > 60) & (SH["tn_update"] > SH["99%"]) & 
         (SH["time"] > SH["time1"]) & (SH["time"] != SH["time2"]) & (SH["time"] != SH["time3"]) & (SH["time"] != 100000000000), 1, 0)
    SH.drop(["time_interval", "tn_update", "time1", "time2", "time3", "99%"], axis=1, inplace=True) 

    SH["has_missing"] = np.where((SH["has_missing1"] == 1) | (SH["has_missing2"] == 1), 1, 0)
    SH.drop(["has_missing1", "has_missing2"], axis=1, inplace=True) 
    if SH[SH["has_missing"] == 1].shape[0] != 0:
        print("has missing!!!!!!!!!!!!!!!!!!!!!!!")
        print(SH[SH["has_missing"] == 1].shape[0])
        mi_ss += [SH[SH["has_missing"] == 1]]
    print(datetime.datetime.now() - startTm)
    
    
    
    startTm = datetime.datetime.now()
    SH["has_missing"] = SH["has_missing"].astype('int32')
    SH = SH[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"]]
    
    display(SH["date"].iloc[0])
    print("SH finished")
    
    database_name = 'com_md_eq_cn'
    user = "zhenyuy"
    password = "bnONBrzSMGoE"

    db1 = DB("192.168.10.223", database_name, user, password)
    db1.write('md_snapshot_l2', SH)
    
    del SH
    print(datetime.datetime.now() - startTm)

wr_ong = pd.concat(wr_ong).reset_index(drop=True)
print(wr_ong)
mi_ss = pd.concat(mi_ss).reset_index(drop=True)
print(mi_ss)
print(less)



0:02:49.960861
0:00:30.837842
20171221 unzip finished
0:00:42.180450
0:01:16.141935
1
2
3
4
5
6
7
8
0:09:32.621643
0:00:38.556106


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0:00:03.644240
no massive missing
0:01:54.038428


20171221

SH finished
0:00:49.503493
0:00:29.770743
20171222 unzip finished
0:00:43.316478
0:01:13.956920
1
2
3
4
5
6
7
8
0:08:42.488465
0:00:31.001987
0:00:03.276001
no massive missing
0:01:45.872211


20171222

SH finished
0:00:48.239804
0:00:30.815254
20171225 unzip finished
0:00:48.438521
0:01:18.646083
1
2
3
4
5
6
7
8
0:09:04.893046
0:00:32.370597
0:00:03.442836
no massive missing
0:01:50.792061


20171225

SH finished
0:00:48.093796
0:00:31.092955
20171226 unzip finished
0:00:45.187833
0:01:20.740660
1
2
3
4
5
6
7
8
0:09:37.207188
0:00:37.872338
0:00:03.539421
no massive missing
0:02:07.362572


20171226

SH finished
0:00:57.158847
0:00:31.670637
20171227 unzip finished
0:00:47.947829
0:01:30.226104
1
2
3
4
5
6
7
8
0:09:23.338418
0:00:34.687073
0:00:03.548308
no massive missing
0:01:55.308764


20171227

SH finished
0:00:58.958541
0:00:38.771532
20171228 unzip finished
0:00:47.848440
0:01:20.218259
1
2
3
4
5
6
7
8
0:09:27.425180
0:00:34.968190
0:00:03.288688
no massive missing
0:01:55.635146


20171228

SH finished
0:01:09.630626
0:00:30.334153
20171229 unzip finished
0:00:46.769373
0:01:17.007209
1
2
3
4
5
6
7
8
0:09:05.701341
0:00:33.097044
0:00:03.426779
no massive missing
0:01:49.626338


20171229

SH finished
0:01:08.846568


ValueError: No objects to concatenate

In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz

class DB(object):
    def __init__(self, uri, symbol_column='skey'):
        self.db_name = 'white_db'
        user, passwd, host = self.parse_uri(uri)
        auth_db = 'admin' if user in ('admin', 'root') else self.db_name
        self.uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)

        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        if version == 1:
            return gzip.compress(pickle.dumps(s), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')











import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time
pd.set_option("max_columns", 200)

startTm = datetime.datetime.now()
readPath = '/home/work516/day_stock/***'
dataPathLs = np.array(glob.glob(readPath))
dataPathLs = dataPathLs[[np.array([os.path.basename(i).split('.')[0][:2] == 'SH' for i in dataPathLs])]]
db = pd.DataFrame()
for p in dataPathLs:
    dayData = pd.read_csv(p, compression='gzip')
    db = pd.concat([db, dayData])
print(datetime.datetime.now() - startTm)

year = "2018"
startDate = '20180102'
endDate = '20180731'
readPath = '/mnt/usb/' + year + '/***/***'
dataPathLs = np.array(glob.glob(readPath))
dateLs = np.array([os.path.basename(i).split('_')[0] for i in dataPathLs])
dataPathLs = dataPathLs[(dateLs >= startDate) & (dateLs <= endDate)]
date_list = pd.read_csv("/home/work516/KR_upload_code/trading_days.csv")
wr_ong = []
mi_ss = []
less = []

for data in dataPathLs:
    if len(np.array(glob.glob(data + '/SH/***'))) == 0:
        if int(os.path.basename(data)) not in date_list["Date"].values:
            continue
        else:
            print(os.path.basename(data) + " less data!!!!!!!!!!!!!!!!!")
            less.append(data)
            continue
    startTm = datetime.datetime.now()
    date = os.path.basename(data)
    rar_path = data + '/SH/snapshot.7z'
    path = '/mnt/e/unzip_data/2018/SH'
    path1 = path + '/' + date
    un_path = path1
    cmd = '7za x {} -o{}'.format(rar_path, un_path)
    os.system(cmd)
    print(datetime.datetime.now() - startTm)
    print(date + ' unzip finished')

    readPath = path1 + '/snapshot/***2/***'
    dataPathLs = np.array(glob.glob(readPath))
    dateLs = np.array([int(os.path.basename(i).split('.')[0]) for i in dataPathLs])
    dataPathLs = dataPathLs[((dateLs >= 600000) & (dateLs <= 700000))]
    SH = []
    ll = []
    startTm = datetime.datetime.now()
    for i in dataPathLs:
        try:
            df = pd.read_csv(i, usecols = [0,1,3,5,7,9,10,11,15,17,18,19,20,21,22,23,25,26,28,29,30,31,32,33,37,39,40,41,
                                          42,46,47,49,50])
        except:
            print("empty data")
            print(i)
            ll.append(int(os.path.basename(i).split('.')[0]))
            continue
        df["StockID"] = int(os.path.basename(i).split('.')[0])
        SH += [df]
    del df
    SH = pd.concat(SH).reset_index(drop=True)
    print(datetime.datetime.now() - startTm)
    
    startTm = datetime.datetime.now()
    SH["skey"] = SH["StockID"] + 1000000
    SH.drop(["StockID"],axis=1,inplace=True)
    SH["date"] = int(SH["QuotTime"].iloc[0]//1000000000)
    SH["time"] = (SH['QuotTime'] - int(SH['QuotTime'].iloc[0]//1000000000*1000000000)).astype(np.int64) * 1000
    SH["clockAtArrival"] = SH["QuotTime"].astype(str).apply(lambda x: np.int64(datetime.datetime.strptime(x, '%Y%m%d%H%M%S%f').timestamp()*1e6))
    SH.drop(["QuotTime"],axis=1,inplace=True)
    SH['datetime'] = SH["clockAtArrival"].apply(lambda x: datetime.datetime.fromtimestamp(x/1e6))
    print(datetime.datetime.now() - startTm)

    startTm = datetime.datetime.now()
    SH["BidPrice"] = SH["BidPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["OfferPrice"] = SH["OfferPrice"].apply(lambda x: [float(i) for i in x[1:-1].split(',')])
    SH["BidOrderQty"] = SH["BidOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrderQty"] = SH["OfferOrderQty"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["BidNumOrders"] = SH["BidNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferNumOrders"] = SH["OfferNumOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 11):
        SH["bid" + str(i) + 'p'] = SH["BidPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["BidPrice"],axis=1,inplace=True)
    print("1")
    for i in range(1, 11):
        SH["ask" + str(i) + 'p'] = SH["OfferPrice"].apply(lambda x: x[i-1],2)
    SH.drop(["OfferPrice"],axis=1,inplace=True)
    print("2")
    for i in range(1, 11):
        SH["bid" + str(i) + 'q'] = SH["BidOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["BidOrderQty"],axis=1,inplace=True)
    print("3")
    for i in range(1, 11):
        SH["ask" + str(i) + 'q'] = SH["OfferOrderQty"].apply(lambda x: x[i-1])
    SH.drop(["OfferOrderQty"],axis=1,inplace=True)
    print("4")
    for i in range(1, 11):
        SH["bid" + str(i) + 'n'] = SH["BidNumOrders"].apply(lambda x: x[i-1])
        SH["bid" + str(i) + 'n'] = SH["bid" + str(i) + 'n'].astype('int32')
    SH.drop(["BidNumOrders"],axis=1,inplace=True)
    print("5")
    for i in range(1, 11):
        SH["ask" + str(i) + 'n'] = SH["OfferNumOrders"].apply(lambda x: x[i-1])
        SH["ask" + str(i) + 'n'] = SH["ask" + str(i) + 'n'].astype('int32') 
    SH.drop(["OfferNumOrders"],axis=1,inplace=True)
    print("6")
    
    SH["BidOrders"] = SH["BidOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])
    SH["OfferOrders"] = SH["OfferOrders"].apply(lambda x: [int(i) for i in x[1:-1].split(',')])

    for i in range(1, 51):
        SH["bid1Top" + str(i) + 'q'] = SH["BidOrders"].apply(lambda x: x[i-1])
        SH["bid1Top" + str(i) + 'q'] = SH["bid1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["BidOrders"],axis=1,inplace=True)
    print("7")
    
    for i in range(1, 51):
        SH["ask1Top" + str(i) + 'q'] = SH["OfferOrders"].apply(lambda x: x[i-1])
        SH["ask1Top" + str(i) + 'q'] = SH["ask1Top" + str(i) + 'q'].astype('int32') 
    SH.drop(["OfferOrders"],axis=1,inplace=True)
    print("8")
    print(datetime.datetime.now() - startTm)
    
    
    startTm = datetime.datetime.now()
    SH.columns = ['cum_trades_cnt', 'ask_trade_max_duration', 'total_bid_orders',
       'cum_canceled_sell_amount', 'total_ask_quantity', 'cum_canceled_buy_orders',
       'total_ask_vwap', 'cum_canceled_sell_volume', 'cum_volume', 'open',
       'high', 'prev_close', 'low', 'total_bid_vwap',
       'cum_canceled_sell_orders', 'total_ask_orders', 'total_ask_levels',
       'total_bid_quantity', 'cum_canceled_buy_volume', 'bid_trade_max_duration',
       'total_bid_levels', 'close', 'cum_amount', 'cum_canceled_buy_amount', 'skey', 'date', 'time', 'clockAtArrival',
       'datetime', 'bid1p', 'bid2p', 'bid3p', 'bid4p', 'bid5p', 'bid6p',
       'bid7p', 'bid8p', 'bid9p', 'bid10p', 'ask1p', 'ask2p', 'ask3p',
       'ask4p', 'ask5p', 'ask6p', 'ask7p', 'ask8p', 'ask9p', 'ask10p',
       'bid1q', 'bid2q', 'bid3q', 'bid4q', 'bid5q', 'bid6q', 'bid7q',
       'bid8q', 'bid9q', 'bid10q', 'ask1q', 'ask2q', 'ask3q', 'ask4q',
       'ask5q', 'ask6q', 'ask7q', 'ask8q', 'ask9q', 'ask10q', 'bid1n',
       'bid2n', 'bid3n', 'bid4n', 'bid5n', 'bid6n', 'bid7n', 'bid8n',
       'bid9n', 'bid10n', 'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n',
       'ask6n', 'ask7n', 'ask8n', 'ask9n', 'ask10n', 'bid1Top1q',
       'bid1Top2q', 'bid1Top3q', 'bid1Top4q', 'bid1Top5q', 'bid1Top6q',
       'bid1Top7q', 'bid1Top8q', 'bid1Top9q', 'bid1Top10q', 'bid1Top11q',
       'bid1Top12q', 'bid1Top13q', 'bid1Top14q', 'bid1Top15q',
       'bid1Top16q', 'bid1Top17q', 'bid1Top18q', 'bid1Top19q',
       'bid1Top20q', 'bid1Top21q', 'bid1Top22q', 'bid1Top23q',
       'bid1Top24q', 'bid1Top25q', 'bid1Top26q', 'bid1Top27q',
       'bid1Top28q', 'bid1Top29q', 'bid1Top30q', 'bid1Top31q',
       'bid1Top32q', 'bid1Top33q', 'bid1Top34q', 'bid1Top35q',
       'bid1Top36q', 'bid1Top37q', 'bid1Top38q', 'bid1Top39q',
       'bid1Top40q', 'bid1Top41q', 'bid1Top42q', 'bid1Top43q',
       'bid1Top44q', 'bid1Top45q', 'bid1Top46q', 'bid1Top47q',
       'bid1Top48q', 'bid1Top49q', 'bid1Top50q', 'ask1Top1q', 'ask1Top2q',
       'ask1Top3q', 'ask1Top4q', 'ask1Top5q', 'ask1Top6q', 'ask1Top7q',
       'ask1Top8q', 'ask1Top9q', 'ask1Top10q', 'ask1Top11q', 'ask1Top12q',
       'ask1Top13q', 'ask1Top14q', 'ask1Top15q', 'ask1Top16q',
       'ask1Top17q', 'ask1Top18q', 'ask1Top19q', 'ask1Top20q',
       'ask1Top21q', 'ask1Top22q', 'ask1Top23q', 'ask1Top24q',
       'ask1Top25q', 'ask1Top26q', 'ask1Top27q', 'ask1Top28q',
       'ask1Top29q', 'ask1Top30q', 'ask1Top31q', 'ask1Top32q',
       'ask1Top33q', 'ask1Top34q', 'ask1Top35q', 'ask1Top36q',
       'ask1Top37q', 'ask1Top38q', 'ask1Top39q', 'ask1Top40q',
       'ask1Top41q', 'ask1Top42q', 'ask1Top43q', 'ask1Top44q',
       'ask1Top45q', 'ask1Top46q', 'ask1Top47q', 'ask1Top48q',
       'ask1Top49q', 'ask1Top50q']
    SH = SH.fillna(0)
#     SH["p1"] = SH["bid1p"] + SH["ask1p"]
#     tt = SH[(SH["cum_volume"] > 0) & (SH["time"] < 145700000000)].groupby("skey")['p1'].min()
#     SH.drop("p1", axis=1, inplace=True)
#     try:
#         assert(tt[tt == 0].shape[0] == 0)
#     except:
#         display(tt[tt == 0])
#     SH = SH[~((SH["bid1p"] == 0) & (SH["ask1p"] == 0))]
    SH["ordering"] = SH.groupby("skey").cumcount()
    SH["ordering"] = SH["ordering"] + 1
    
    SH["has_missing"] = 0
    
    for col in ["skey", "date", "cum_trades_cnt", "total_bid_orders",
        'total_ask_orders', 'total_bid_levels', 'total_ask_levels', 'cum_canceled_buy_orders','cum_canceled_sell_orders',
            "ordering", 'bid_trade_max_duration', 'ask_trade_max_duration','has_missing']:
        SH[col] = SH[col].astype('int32')
    
#     for cols in ["prev_close", 'open', "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p',
#              'bid2p','bid1p','ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p']:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
    
#     for cols in ['cum_amount', "cum_canceled_sell_amount", "cum_canceled_buy_amount"]:
# #         SH[cols] = SH[cols].apply(lambda x: round(x, 2)).astype('float64')
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        
    for cols in ['total_bid_vwap', "total_ask_vwap"]:
#         print(cols)
#         print(SH[cols].astype(str).apply(lambda x: len(str(x.split('.')[1]))).unique())
        SH[cols] = SH[cols].apply(lambda x: round(x, 3))
        
   
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    SH["prev_close"] = np.where(SH["time"] >= 91500000000, SH.groupby("skey")["prev_close"].transform("max"), SH["prev_close"]) 
    SH["open"] = np.where(SH["cum_volume"] > 0, SH.groupby("skey")["open"].transform("max"), SH["open"])
    assert(sum(SH[SH["open"] != 0].groupby("skey")["open"].nunique() != 1) == 0)
    assert(sum(SH[SH["prev_close"] != 0].groupby("skey")["prev_close"].nunique() != 1) == 0)
    assert(SH[SH["cum_volume"] > 0]["open"].min() > 0)
    print(datetime.datetime.now() - startTm)
    
    
    # check 1
    startTm = datetime.datetime.now()
    da_te = str(SH["date"].iloc[0]) 
    da_te = da_te[:4] + '-' + da_te[4:6] + '-' + da_te[6:8]
    db1 = db[db["date"] == da_te]
    db1["ID"] = db1["ID"].str[2:].astype(int) + 1000000
    db1["date"] = (db1["date"].str[:4] + db1["date"].str[5:7] + db1["date"].str[8:]).astype(int)
    SH["cum_max"] = SH.groupby("skey")["cum_volume"].transform(max)
    s2 = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey").first().reset_index()
    dd = SH[SH["cum_volume"] == SH["cum_max"]].groupby("skey")["time"].first().reset_index()
    SH.drop("cum_max", axis=1, inplace=True)
    s2 = s2.rename(columns={"skey": "ID", 'open':"d_open", "prev_close":"d_yclose","high":"d_high", "low":"d_low", "close":"d_close", "cum_volume":"d_volume", "cum_amount":"d_amount"})
    if SH["date"].iloc[0] < 20180820:
        s2["auction"] = 0
    else:
        dd["auction"] = np.where(dd["time"]<=145700000000, 0, 1)
        dd = dd.rename(columns={"skey": "ID"})
        s2 = pd.merge(s2, dd[["ID", "auction"]], on="ID")
    s2 = s2[["ID", "date", "d_open", "d_yclose", "d_high", "d_low", "d_close", "d_volume", "d_amount", "auction"]]
    re = pd.merge(db1, s2, on=["ID", "date", "d_open", "d_yclose","d_high", "d_low", "d_volume"], how="outer")
    try:
        assert(sum(re["d_amount_y"].isnull()) == 0)
    except:
        print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
        print(re[re["d_amount_y"].isnull()])
        wr_ong += [re[re["d_amount_y"].isnull()]]
    print(datetime.datetime.now() - startTm)
    
    # check 2
    # first part
    startTm = datetime.datetime.now()
    date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
    date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
    date["group"] = date["time"]//30000
    SH["group"] = SH["time"]//30000000
    gl = date[((date["time"] >= 93000000) & (date["time"] <= 113000000))|((date["time"] >= 130000000) & (date["time"] <= 150000000))]["group"].unique()
    l = set(gl) - set(SH["group"].unique())
    SH["has_missing1"] = 0 
    if len(l) != 0:
        print("massive missing")
        print(l)
        SH["order"] = SH.groupby(["skey", "time"]).cumcount()
        for i in l:
            SH["t"] = SH[SH["group"] > i].groupby("StockID")["time"].transform("min")
            SH["has_missing1"] = np.where((SH["time"] == SH["t"]) & (SH["order"] == 0), 1, 0)
        SH.drop(["order", "t", "group"], axis=1, inplace=True)   
    else:
        print("no massive missing")
        SH.drop(["group"], axis=1, inplace=True)
    



    # second part

    SH["time_interval"] = SH.groupby("skey")["datetime"].apply(lambda x: x - x.shift(1))
    SH["time_interval"] = SH["time_interval"].apply(lambda x: x.seconds)
    SH["tn_update"] = SH.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

    f1 = SH[(SH["time"] >= 93000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f1 = f1.rename(columns={"time": "time1"})
    f2 = SH[(SH["time"] >= 130000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f2 = f2.rename(columns={"time": "time2"})
    f3 = SH[(SH["time"] >= 150000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f3 = f3.rename(columns={"time": "time3"})
    SH = pd.merge(SH, f1, on="skey", how="left")
    del f1
    SH = pd.merge(SH, f2, on="skey", how="left")
    del f2
    SH = pd.merge(SH, f3, on="skey", how="left")
    del f3
    p99 = SH[(SH["time"] > 93000000000) & (SH["time"] < 145700000000) & (SH["time"] != SH["time2"]) & (SH["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).reset_index()
    p99 = p99.rename(columns={"tn_update":"99%"})
    SH = pd.merge(SH, p99, on="skey", how="left")

    SH["has_missing2"] = 0
    SH["has_missing2"] = np.where((SH["time_interval"] > 60) & (SH["tn_update"] > SH["99%"]) & 
         (SH["time"] > SH["time1"]) & (SH["time"] != SH["time2"]) & (SH["time"] != SH["time3"]) & (SH["time"] != 100000000000), 1, 0)
    SH.drop(["time_interval", "tn_update", "time1", "time2", "time3", "99%"], axis=1, inplace=True) 

    SH["has_missing"] = np.where((SH["has_missing1"] == 1) | (SH["has_missing2"] == 1), 1, 0)
    SH.drop(["has_missing1", "has_missing2"], axis=1, inplace=True) 
    if SH[SH["has_missing"] == 1].shape[0] != 0:
        print("has missing!!!!!!!!!!!!!!!!!!!!!!!")
        print(SH[SH["has_missing"] == 1].shape[0])
        mi_ss += [SH[SH["has_missing"] == 1]]
    print(datetime.datetime.now() - startTm)
    
    
    
    startTm = datetime.datetime.now()
    SH["has_missing"] = SH["has_missing"].astype('int32')
    SH = SH[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"]]
    
    display(SH["date"].iloc[0])
    print("SH finished")
    
    db1 = DB("mongodb://user_rw:faa96dfc@192.168.10.223")
    db1.write('snapshot', SH)
    
    del SH
    print(datetime.datetime.now() - startTm)

wr_ong = pd.concat(wr_ong).reset_index(drop=True)
print(wr_ong)
mi_ss = pd.concat(mi_ss).reset_index(drop=True)
print(mi_ss)
print(less)



0:02:59.964729
0:00:30.595898
20180102 unzip finished
0:00:40.450600
0:01:11.328064
1
2
3
4
5
6
7
8
0:08:37.025018
0:00:32.409448


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180102    49.0   50.57   47.8      50.57     45.97   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913     0.100065      0.037972       0.013023        0.010651   

       d_volume    d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  24117001.0  1.194709e+09  0.06072    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:03.299227
no massive missing
0:01:50.613524


20180102

SH finished
0:00:45.858295
0:00:33.481722
20180103 unzip finished
0:00:42.783249
0:01:17.293036
1
2
3
4
5
6
7
8
0:09:13.338815
0:00:33.769279


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180103    53.0   55.63  52.61      55.63     50.57   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913     0.100059      0.128169       0.008848        0.009557   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  18047055.0  982933573.0  0.045438    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:03.339777
no massive missing
0:01:57.297762


20180103

SH finished
0:00:55.162355
0:00:31.366676
20180104 unzip finished
0:00:41.453744
0:01:14.266645
1
2
3
4
5
6
7
8
0:08:58.048898
0:00:33.865847


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
353  1601360  20180104    61.0   61.19   59.6      61.19     55.63   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
353      0.970913     0.099946      0.375984       0.004584        0.002094   

      d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
353  7024221.0  427732585.0  0.017685    0.0    1.0   1.0    0.0    0.0   0.0   

     tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
353        0.0         0.0   397182443.0  397182443.0        NaN         NaN   

     auction  
353      NaN  
0:00:03.377441
no massive missing
0:01:59.632728


20180104

SH finished
0:00:54.160439
0:00:31.272841
20180105 unzip finished
0:00:43.793662
0:01:17.590030
1
2
3
4
5
6
7
8
0:09:13.217271
0:00:33.111722


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180105    66.5    66.5  55.07      55.42     61.19   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913    -0.094296      0.227736      -0.000044        0.000279   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  72643793.0  4.249961e+09  0.182898    0.0    0.0   0.0    0.0    1.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.275072
no massive missing
0:01:54.958138


20180105

SH finished
0:00:51.128083
0:00:31.895417
20180108 unzip finished
0:00:44.134529
0:01:18.563658
1
2
3
4
5
6
7
8
0:09:17.899427
0:00:33.479088


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180108   54.55    55.4  52.33      53.36     55.42   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913    -0.037171      0.160757       0.004508        0.001592   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  40804367.0  2.184401e+09  0.102735    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.403666
no massive missing
0:01:59.278259


20180108

SH finished
0:00:49.455141
0:00:32.399935
20180109 unzip finished
0:00:42.651775
0:01:16.752168
1
2
3
4
5
6
7
8
0:09:06.082166
0:00:33.911910


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180109    53.0    53.1  51.34      52.53     53.36   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.015555      0.038758      -0.000068       -0.001607   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
354  28344914.0  1.479833e+09  0.071365    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
354   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
354         NaN      NaN  
0:00:03.284278
no massive missing
0:01:56.877928


20180109

SH finished
0:01:04.656143
0:00:48.009624
20180110 unzip finished
0:00:45.761506
0:01:19.698435
1
2
3
4
5
6
7
8
0:09:14.890409
0:00:32.806445


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180110   53.19   54.01  50.07      51.11     52.53   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.027032     -0.081251      -0.006176       -0.005289   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
354  38493554.0  1.997918e+09  0.096917    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
354   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
354         NaN      NaN  
0:00:03.283652
no massive missing
0:01:56.456116


20180110

SH finished
0:00:49.351961
0:00:32.497557
20180111 unzip finished
0:00:41.385190
0:01:14.739770
1
2
3
4
5
6
7
8
0:09:02.355163
0:00:31.654285


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
357  1601360  20180111   50.93   53.75  49.03      52.52     51.11   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
357      0.970913     0.027588      -0.14169       0.003122        0.005129   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
357  39093172.0  2.008223e+09  0.098426    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
357   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
357         NaN      NaN  
0:00:03.207524
no massive missing
0:01:52.808554


20180111

SH finished
0:00:48.220271
0:00:30.666257
20180112 unzip finished
0:00:41.601699
0:01:15.832177
1
2
3
4
5
6
7
8
0:09:04.243642
0:00:32.556340


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180112   51.85   52.14   50.1      50.21     52.52   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913    -0.043983     -0.094009      -0.004134        -0.00663   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  25743348.0  1.311933e+09  0.064815    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:03.267997
no massive missing
0:01:54.190598


20180112

SH finished
0:01:01.295751
0:00:34.196035
20180115 unzip finished
0:00:42.431957
0:01:19.474230
1
2
3
4
5
6
7
8
0:09:16.297940
0:00:33.456896


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
356  1601360  20180115   48.62    49.2  46.27      47.05     50.21   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
356      0.970913    -0.062936     -0.118253      -0.022814       -0.027852   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
356  27161903.0  1.300160e+09  0.068386    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
356   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
356         NaN      NaN  
0:00:03.309707
no massive missing
0:02:02.234621


20180115

SH finished
0:01:06.463461
0:00:33.687503
20180116 unzip finished
0:00:41.549369
0:01:17.865672
1
2
3
4
5
6
7
8
0:09:07.089279
0:00:32.114931


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180116    46.8    49.0  46.21      48.21     47.05   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913     0.024655     -0.082239       0.007637        0.004527   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  21086023.0  1.010841e+09  0.053089    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:03.242022
no massive missing
0:01:53.065574


20180116

SH finished
0:00:48.263092
0:00:32.421370
20180117 unzip finished
0:00:42.474266
0:01:18.730389
1
2
3
4
5
6
7
8
0:09:19.549645
0:00:34.172916


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180117    47.2   49.86  46.91      48.27     48.21   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913     0.001245     -0.055566      -0.005344       -0.002559   

       d_volume   d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
354  20240477.0  984998524.0  0.05096    0.0    0.0   0.0    0.0    0.0   0.0   

     tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
354        0.0         0.0   397182443.0  397182443.0        NaN         NaN   

     auction  
354      NaN  
0:00:03.273972
no massive missing
0:01:55.876896


20180117

SH finished
0:00:53.717010
0:00:30.551687
20180118 unzip finished
0:00:42.547173
0:01:16.561529
1
2
3
4
5
6
7
8
0:09:03.341954
0:00:32.788577


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180118   47.61   50.64  47.28      49.53     48.27   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913     0.026103     -0.056931       0.003086        0.001782   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
354  22698685.0  1.115041e+09  0.057149    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
354   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
354         NaN      NaN  
0:00:03.264560
no massive missing
0:01:57.823321


20180118

SH finished
0:00:53.199451
0:00:31.557762
20180119 unzip finished
0:00:42.253009
0:01:19.141487
1
2
3
4
5
6
7
8
0:09:21.919337
0:00:34.695387


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
353  1601360  20180119   49.42    50.0  47.71      47.94     49.53   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
353      0.970913    -0.032102      -0.04521      -0.002294       -0.002433   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
353  18219939.0  889073693.0  0.045873    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
353   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
353         NaN      NaN  
0:00:03.259012
no massive missing
0:01:54.908924


20180119

SH finished
0:01:05.163701
0:00:31.153094
20180122 unzip finished
0:00:42.376752
0:01:20.654252
1
2
3
4
5
6
7
8
0:09:17.243129
0:00:33.166164


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180122    47.1   47.44   45.5      46.49     47.94   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.030246     -0.011902       0.011998        0.010988   

       d_volume   d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
354  15942930.0  742261255.0  0.04014    0.0    0.0   0.0    0.0    0.0   0.0   

     tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
354        0.0         0.0   397182443.0  397182443.0        NaN         NaN   

     auction  
354      NaN  
0:00:03.314858
no massive missing
0:01:56.584637


20180122

SH finished
0:00:56.552250
0:00:31.480218
20180123 unzip finished
0:00:43.385110
0:01:17.892906
1
2
3
4
5
6
7
8
0:09:07.816666
0:00:33.026345


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180123    46.6    47.3  45.86      46.39     46.49   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.002151     -0.037752       0.001159        0.000774   

       d_volume   d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
354  10223540.0  476271128.0  0.02574    0.0    0.0   0.0    0.0    0.0   0.0   

     tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
354        0.0         0.0   397182443.0  397182443.0        NaN         NaN   

     auction  
354      NaN  
0:00:03.189495
no massive missing
0:01:56.728132


20180123

SH finished
0:00:49.902715
0:00:32.574763
20180124 unzip finished
0:00:43.526999
0:01:18.713935
1
2
3
4
5
6
7
8
0:09:21.065303
0:00:33.420225


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
355  1601360  20180124    46.3   47.78   45.9      47.08     46.39   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
355      0.970913     0.014874     -0.024653       0.006184        0.007469   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
355  14004193.0  656790229.0  0.035259    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
355   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
355         NaN      NaN  
0:00:03.347901
no massive missing
0:01:57.229747


20180124

SH finished
0:00:50.101939
0:00:32.101268
20180125 unzip finished
0:00:44.750210
0:01:22.462836
1
2
3
4
5
6
7
8
0:09:33.126551
0:00:34.701794


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
354  1601360  20180125    46.9    47.5  46.06      46.12     47.08   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
354      0.970913    -0.020391     -0.068847      -0.001812       -0.001382   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
354  12428410.0  580615701.0  0.031291    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
354   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
354         NaN      NaN  
0:00:03.301909
no massive missing
0:01:57.356237


20180125

SH finished
0:00:50.776794
0:00:30.902420
20180126 unzip finished
0:00:41.285393
0:01:17.774800
1
2
3
4
5
6
7
8
0:09:06.076208
0:00:31.927263


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
353  1601360  20180126    45.8    46.6  45.11      45.45     46.12   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
353      0.970913    -0.014527      -0.05194       0.000143       -0.000938   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
353  11679159.0  534878432.0  0.029405    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
353   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
353         NaN      NaN  
0:00:03.168505
no massive missing
0:01:52.301002


20180126

SH finished
0:00:48.007717
0:00:30.822025
20180129 unzip finished
0:00:41.347923
0:01:16.896576
1
2
3
4
5
6
7
8
0:09:16.601186
0:00:32.894926


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180129    46.2    49.8   45.6      48.64     45.45   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913     0.070187      0.046247      -0.011553       -0.011839   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  24606194.0  1.171856e+09  0.061952    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.285772
no massive missing
0:01:54.906946


20180129

SH finished
0:01:00.669511
0:00:30.083962
20180130 unzip finished
0:00:39.826412
0:01:14.142760
1
2
3
4
5
6
7
8
0:08:45.331999
0:00:32.135454


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180130    49.0    53.5   49.0       53.5     48.64   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913     0.099918      0.153266       0.000564       -0.002472   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  37399423.0  1.921412e+09  0.094162    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.987250
no massive missing
0:01:50.404214


20180130

SH finished
0:00:47.563306
0:00:32.394752
20180131 unzip finished
0:00:40.978742
0:01:20.674032
1
2
3
4
5
6
7
8
0:09:29.568477
0:00:35.306723


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180131    55.0   55.88   51.2      51.26      53.5   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913    -0.041869      0.088785      -0.019657       -0.023613   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  37234004.0  1.980966e+09  0.093745    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.419556
no massive missing
0:02:02.618988


20180131

SH finished
0:00:54.207294
0:00:34.194468
20180201 unzip finished
0:00:44.632037
0:01:23.312733
1
2
3
4
5
6
7
8
0:09:46.110593
0:00:36.487385


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180201   49.97    51.6  49.01      50.03     51.26   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913    -0.023995      0.084779      -0.033057       -0.041902   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  21872506.0  1.099551e+09  0.055069    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.566883
no massive missing
0:02:04.252369


20180201

SH finished
0:00:59.454123
0:00:31.609946
20180202 unzip finished
0:00:41.740958
0:01:17.902786
1
2
3
4
5
6
7
8
0:08:57.621194
0:00:32.346647


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
352  1601360  20180202    54.0   55.03  52.02      55.03     50.03   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
352      0.970913      0.09994      0.210781        0.00336       -0.002856   

       d_volume    d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  \
352  25471230.0  1.380523e+09  0.06413    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
352   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
352         NaN      NaN  
0:00:03.297403
no massive missing
0:01:53.137230


20180202

SH finished
0:00:49.200319
0:00:28.846936
20180205 unzip finished
0:00:40.408737
0:01:11.866277
1
2
3
4
5
6
7
8
0:08:27.667010
0:00:30.801970


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
349  1601360  20180205    55.0   60.53  53.88      60.53     55.03   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
349      0.970913     0.099945      0.244449      -0.001103       -0.004016   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
349  36025537.0  2.099217e+09  0.090703    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
349   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
349         NaN      NaN  
0:00:03.162326
no massive missing
0:01:50.281402


20180205

SH finished
0:01:00.496516
0:00:32.260075
20180206 unzip finished
0:00:42.207830
0:01:20.143769
1
2
3
4
5
6
7
8
0:09:11.480537
0:00:37.512796


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
348  1601360  20180206    58.7   60.45  54.48      54.48     60.53   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
348      0.970913     -0.09995      0.018318      -0.049008       -0.049389   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
348  34965214.0  2.001063e+09  0.088033    0.0    0.0   0.0    0.0    1.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
348   1.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
348         NaN      NaN  
0:00:03.390419
no massive missing
0:02:00.468246


20180206

SH finished
0:00:58.702828
0:00:33.366414
20180207 unzip finished
0:00:42.550885
0:01:18.222264
1
2
3
4
5
6
7
8
0:08:56.591768
0:00:33.355514


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
345  1601360  20180207    56.4   59.48   54.5      57.99     54.48   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
345      0.970913     0.064427      0.131291       0.001239        0.002538   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
345  38767633.0  2.228451e+09  0.097607    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
345   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
345         NaN      NaN  
0:00:03.256475
no massive missing
0:01:54.636767


20180207

SH finished
0:00:50.903094
0:00:30.182848
20180208 unzip finished
0:00:38.765808
0:01:12.992970
1
2
3
4
5
6
7
8
0:08:30.737455
0:00:30.348267


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
342  1601360  20180208   56.88   60.89  55.15      57.17     57.99   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
342      0.970913     -0.01414      0.142714       0.009656        0.012985   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
342  36779918.0  2.155862e+09  0.092602    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
342   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
342         NaN      NaN  
0:00:03.047831
no massive missing
0:01:45.535375


20180208

SH finished
0:00:44.506248
0:00:32.260588
20180209 unzip finished
0:00:39.772433
0:01:14.744656
1
2
3
4
5
6
7
8
0:08:49.285505
0:00:32.942219


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
342  1601360  20180209    53.8    54.7  51.45      51.45     57.17   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
342      0.970913    -0.100052     -0.065055      -0.036692       -0.029688   

       d_volume    d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  \
342  34030404.0  1.781182e+09  0.08568    0.0    0.0   0.0    0.0    1.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
342   1.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
342         NaN      NaN  
0:00:03.344806
no massive missing
0:01:58.346399


20180209

SH finished
0:00:50.456761
0:00:28.453169
20180212 unzip finished
0:00:37.761136
0:01:09.602429
1
2
3
4
5
6
7
8
0:08:15.496036
0:00:30.771224


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
345  1601360  20180212    52.5    53.5  51.45      52.29     51.45   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
345      0.970913     0.016327     -0.136131       0.025959        0.024902   

       d_volume   d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
345  18486114.0  972092458.0  0.046543    0.0    0.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
345   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
345         NaN      NaN  
0:00:03.162888
no massive missing
0:01:48.771013


20180212

SH finished
0:00:46.406080
0:00:27.256236
20180213 unzip finished
0:00:36.159473
0:01:07.202543
1
2
3
4
5
6
7
8
0:07:59.020917
0:00:28.781073


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
347  1601360  20180213    53.8   57.52  53.11      57.52     52.29   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
347      0.970913     0.100019        0.0558       0.006642        0.002057   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
347  22514233.0  1.254491e+09  0.056685    0.0    1.0   1.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
347   0.0        0.0         0.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
347         NaN      NaN  
0:00:02.862994
no massive missing
0:01:41.815829


20180213

SH finished
0:00:46.796017
0:00:24.935387
20180214 unzip finished
0:00:34.104611
0:01:02.979389
1
2
3
4
5
6
7
8
0:07:21.903429
0:00:27.352675


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
347  1601360  20180214    61.5   63.27   60.0      63.24     57.52   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
347      0.970913     0.099444      0.090533       0.003117        0.002277   

       d_volume    d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  \
347  40461380.0  2.522157e+09  0.101871    0.0    1.0   0.0    0.0    0.0   

     isDT  tmrHalted  haltedDays  marketShares  totalShares  d_close_y  \
347   0.0        1.0         4.0   397182443.0  397182443.0        NaN   

     d_amount_y  auction  
347         NaN      NaN  
0:00:02.727439
no massive missing
0:01:36.524592


20180214

SH finished
0:00:38.818844
0:00:26.477307
20180222 unzip finished
0:00:36.970135
0:01:06.865694
1
2
3
4
5
6
7
8
0:07:55.961656
0:00:28.305791


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0:00:02.891109
no massive missing
0:01:40.041450


20180222

SH finished
0:00:44.554304
0:00:48.135567
20180223 unzip finished
0:00:35.451086
0:01:06.690731
1
2
3
4
5
6
7
8
0:08:07.115484
0:00:30.400302
0:00:03.236827
no massive missing
0:01:44.838418


20180223

SH finished
0:00:42.432375
0:00:31.632898
20180226 unzip finished
0:00:40.221996
0:01:15.296542
1
2
3
4
5
6
7
8
0:09:02.985757
0:00:32.804891
0:00:03.342050
no massive missing
0:01:56.022972


20180226

SH finished
0:00:48.040368
0:00:31.200204
20180227 unzip finished
0:00:39.032016
0:01:12.198236
1
2
3
4
5
6
7
8
0:08:44.486489
0:00:32.102328
0:00:03.250075
no massive missing
0:01:53.167904


20180227

SH finished
0:00:56.732860
0:00:28.629590
20180228 unzip finished
0:00:38.907283
0:01:11.960787
1
2
3
4
5
6
7
8
0:08:34.294196
0:00:30.516429
0:00:03.016931
no massive missing
0:01:46.401241


20180228

SH finished
0:00:49.457986
0:00:29.565491
20180301 unzip finished
0:00:39.984863
0:01:15.801469
1
2
3
4
5
6
7
8
0:08:59.315597
0:00:32.383117
0:00:03.210352
no massive missing
0:01:49.514236


20180301

SH finished
0:00:46.133698
0:00:29.862813
20180302 unzip finished
0:00:39.352773
0:01:11.239555
1
2
3
4
5
6
7
8
0:08:41.094572
0:00:29.787008
0:00:03.113188
no massive missing
0:01:43.613081


20180302

SH finished
0:00:43.900691
0:00:28.248365
20180305 unzip finished
0:00:37.184162
0:01:11.420032
1
2
3
4
5
6
7
8
0:08:30.945151
0:00:30.573001
0:00:03.124586
no massive missing
0:01:50.347789


20180305

SH finished
0:00:45.197845
0:00:31.351654
20180306 unzip finished
0:00:41.753750
0:01:19.573288
1
2
3
4
5
6
7
8
0:09:08.257004
0:00:31.896544
0:00:03.249891
no massive missing
0:01:54.256443


20180306

SH finished
0:00:48.624192
0:00:30.755684
20180307 unzip finished
0:00:40.563830
0:01:14.073899
1
2
3
4
5
6
7
8
0:08:53.146383
0:00:31.992184
0:00:03.193410
no massive missing
0:01:51.140521


20180307

SH finished
0:00:47.686857
0:00:29.567660
20180308 unzip finished
0:00:39.061101
0:01:13.814603
1
2
3
4
5
6
7
8
0:08:43.143820
0:00:31.455882
0:00:03.171413
no massive missing
0:01:49.744642


20180308

SH finished
0:00:54.756896
0:00:30.841836
20180309 unzip finished
0:00:46.548767
0:01:17.662781
1
2
3
4
5
6
7
8
0:09:39.708739
0:00:50.183883
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
           ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
1290  1603817  20180309   10.56    10.8  10.45      10.74     10.58   

      d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
1290      0.996175     0.015123      0.026769       0.012307        0.016645   

       d_volume  d_amount_x    TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
1290  7313246.0  78079928.0  0.040629    0.0    0.0   0.0    0.0    0.0   0.0   

      tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
1290        0.0         0.0   180000000.0  450000000.0        NaN         NaN   

      auction  
1290      NaN  
0:00:03.405628
no massive missing
0:01:56.901331


20180309

SH finished
0:00:50.718782
0:00:32.200505
20180312 unzip finished
0:00:41.994817
0:01:19.201440
1
2
3
4
5
6
7
8
0:09:42.789939
0:00:34.308103


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0:00:03.390494
no massive missing
0:01:58.468832


20180312

SH finished
0:00:50.971512
0:00:32.174670
20180313 unzip finished
0:00:42.127461
0:01:17.991023
1
2
3
4
5
6
7
8
0:09:19.758660
0:00:33.118606
0:00:03.358079
no massive missing
0:02:02.711578


20180313

SH finished
0:01:03.217300
0:00:31.006294
20180314 unzip finished
0:00:41.129516
0:01:14.787019
1
2
3
4
5
6
7
8
0:09:03.284402
0:00:32.811325
0:00:03.264000
no massive missing
0:01:52.528116


20180314

SH finished
0:00:53.192104
0:00:29.607926
20180315 unzip finished
0:00:41.936637
0:01:15.990367
1
2
3
4
5
6
7
8
0:09:11.810244
0:00:34.829594
0:00:03.268727
no massive missing
0:01:56.884024


20180315

SH finished
0:00:49.562492
0:00:28.726834
20180316 unzip finished
0:00:40.510253
0:01:14.781359
1
2
3
4
5
6
7
8
0:09:09.180291
0:00:33.128035
0:00:03.223996
no massive missing
0:01:53.857666


20180316

SH finished
0:00:47.033108
0:00:28.429159
20180319 unzip finished
0:00:41.759345
0:01:14.849879
1
2
3
4
5
6
7
8
0:08:44.560476
0:00:30.813065
0:00:03.111518
no massive missing
0:01:50.243744


20180319

SH finished
0:00:47.611088
0:00:30.985697
20180320 unzip finished
0:00:40.463477
0:01:16.198253
1
2
3
4
5
6
7
8
0:09:07.132585
0:00:32.761286
0:00:03.451062
no massive missing
0:01:59.811084


20180320

SH finished
0:01:00.086230
0:00:31.147816
20180321 unzip finished
0:00:41.999089
0:01:17.246485
1
2
3
4
5
6
7
8
0:09:12.375681
0:00:35.469611
0:00:03.642059
no massive missing
0:02:00.422263


20180321

SH finished
0:00:58.243121
0:00:30.970729
20180322 unzip finished
0:00:41.371628
0:01:13.884305
1
2
3
4
5
6
7
8
0:08:55.103295
0:00:34.587208
0:00:03.424345
no massive missing
0:01:58.515553


20180322

SH finished
0:01:09.235339
0:00:34.913042
20180323 unzip finished
0:00:44.266192
0:01:22.988047
1
2
3
4
5
6
7
8
0:10:00.777672
0:00:37.565711
0:00:03.821559
no massive missing
0:02:10.183680


20180323

SH finished
0:01:11.538946
0:00:31.133049
20180326 unzip finished
0:00:41.840347
0:01:28.759506
1
2
3
4
5
6
7
8
0:09:14.189770
0:00:35.864041
0:00:03.484247
no massive missing
0:01:57.930142


20180326

SH finished
0:00:55.067370
0:00:33.317252
20180327 unzip finished
0:00:46.284476
0:01:18.787873
1
2
3
4
5
6
7
8
0:09:24.656024
0:00:33.900833
0:00:03.368151
no massive missing
0:01:57.585383


20180327

SH finished
0:00:59.201419
0:00:30.940008
20180328 unzip finished
0:00:39.667182
0:01:16.140548
1
2
3
4
5
6
7
8
0:08:45.994143
0:00:33.115693
0:00:03.290503
no massive missing
0:01:53.565922


20180328

SH finished
0:01:00.682905
0:00:32.118693
20180329 unzip finished
0:00:40.660282
0:01:14.637890
1
2
3
4
5
6
7
8
0:08:44.277241
0:00:32.844441
0:00:03.312511
no massive missing
0:01:52.922925


20180329

SH finished
0:00:58.871079
0:00:29.701579
20180330 unzip finished
0:00:39.962419
0:01:24.902699
1
2
3
4
5
6
7
8
0:08:55.565121
0:00:33.386386
0:00:03.444324
no massive missing
0:02:02.871498


20180330

SH finished
0:00:58.368720
0:00:31.526822
20180402 unzip finished
0:00:41.422741
0:01:16.818432
1
2
3
4
5
6
7
8
0:09:04.458399
0:00:34.004166
0:00:03.391478
no massive missing
0:01:56.908647


20180402

SH finished
0:01:04.604170
0:00:29.618865
20180403 unzip finished
0:00:40.263559
0:01:15.731827
1
2
3
4
5
6
7
8
0:08:48.569975
0:00:33.542228
0:00:03.327709
no massive missing
0:01:54.379962


20180403

SH finished
0:00:57.526567
0:00:30.045091
20180404 unzip finished
0:00:40.857197
0:01:14.823617
1
2
3
4
5
6
7
8
0:08:56.716505
0:00:32.692099
0:00:03.332885
no massive missing
0:01:52.510819


20180404

SH finished
0:00:50.412864
0:00:29.733434
20180409 unzip finished
0:00:38.960680
0:01:13.084795
1
2
3
4
5
6
7
8
0:08:30.790150
0:00:32.275989
0:00:03.195208
no massive missing
0:01:59.393209


20180409

SH finished
0:00:46.501279
0:00:30.116714
20180410 unzip finished
0:00:40.851090
0:01:17.537182
1
2
3
4
5
6
7
8
0:08:49.172210
0:00:34.032690
0:00:03.452279
no massive missing
0:01:56.260931


20180410

SH finished
0:00:55.060551
0:00:29.687255
20180411 unzip finished
0:00:41.052061
0:01:17.008797
1
2
3
4
5
6
7
8
0:09:28.123682
0:00:38.389215
0:00:03.848004
no massive missing
0:02:06.387350


20180411

SH finished
0:00:52.179068
0:00:29.951744
20180412 unzip finished
0:00:43.211632
0:01:14.791501
1
2
3
4
5
6
7
8
0:09:20.402383
0:00:36.128435
0:00:03.664808
no massive missing
0:02:03.560192


20180412

SH finished
0:01:01.700905
0:00:30.274832
20180413 unzip finished
0:00:42.192559
0:01:16.739694
1
2
3
4
5
6
7
8
0:09:18.334023
0:00:36.300532
0:00:03.727933
no massive missing
0:02:03.028871


20180413

SH finished
0:00:52.856414
0:00:29.372410
20180416 unzip finished
0:00:40.446651
0:01:14.654092
1
2
3
4
5
6
7
8
0:09:07.029779
0:00:37.819369
0:00:03.874550
no massive missing
0:02:06.564178


20180416

SH finished
0:00:53.658781
0:00:30.115153
20180417 unzip finished
0:00:40.899737
0:01:15.949032
1
2
3
4
5
6
7
8
0:09:10.075234
0:00:37.731949
0:00:03.752963
no massive missing
0:02:06.689813


20180417

SH finished
0:00:50.939941
0:00:30.601808
20180418 unzip finished
0:00:40.558921
0:01:14.748893
1
2
3
4
5
6
7
8
0:09:20.912744
0:00:36.511954
0:00:03.695994
no massive missing
0:02:11.449712


20180418

SH finished
0:00:54.070492
0:00:29.338292
20180419 unzip finished
0:00:41.032946
0:01:12.979978
1
2
3
4
5
6
7
8
0:09:10.805280
0:00:35.236975
0:00:03.641919
no massive missing
0:01:58.308006


20180419

SH finished
0:00:58.679936
0:00:31.152245
20180420 unzip finished
0:00:41.791777
0:01:23.602503
1
2
3
4
5
6
7
8
0:09:16.403511
0:00:36.248947
0:00:03.738701
no massive missing
0:02:12.008494


20180420

SH finished
0:00:52.903345
0:00:28.041216
20180423 unzip finished
0:00:38.992400
0:01:14.457023
1
2
3
4
5
6
7
8
0:08:51.956075
0:00:32.273768
0:00:03.227183
no massive missing
0:01:58.123263


20180423

SH finished
0:00:50.322146
0:00:29.865712
20180424 unzip finished
0:00:39.657450
0:01:15.148391
1
2
3
4
5
6
7
8
0:08:38.850567
0:00:33.322563
0:00:03.270097
no massive missing
0:01:56.293095


20180424

SH finished
0:00:54.205298
0:00:28.446037
20180425 unzip finished
0:00:38.396695
0:01:20.015189
1
2
3
4
5
6
7
8
0:08:19.634685
0:00:31.236801
0:00:03.195539
no massive missing
0:01:48.515460


20180425

SH finished
0:00:49.186475
0:00:27.618011
20180426 unzip finished
0:00:38.305347
0:01:11.647256
1
2
3
4
5
6
7
8
0:08:20.123591
0:00:30.512965
0:00:03.087559
no massive missing
0:01:44.463625


20180426

SH finished
0:00:46.225514
0:00:28.179796
20180427 unzip finished
0:00:36.761232
0:01:08.392923
1
2
3
4
5
6
7
8
0:08:13.159507
0:00:30.490024
0:00:03.122788
no massive missing
0:01:46.185838


20180427

SH finished
0:00:46.792607
0:00:28.556128
20180502 unzip finished
0:00:37.745299
0:01:10.766120
1
2
3
4
5
6
7
8
0:08:27.074135
0:00:31.320101
0:00:03.140322
no massive missing
0:01:54.301047


20180502

SH finished
0:00:58.627830
0:00:29.258735
20180503 unzip finished
0:00:38.795752
0:01:13.782988
1
2
3
4
5
6
7
8
0:08:43.499195
0:00:32.743029
0:00:03.302692
no massive missing
0:01:53.295053


20180503

SH finished
0:00:56.218925
0:00:28.243242
20180504 unzip finished
0:00:38.608160
0:01:10.370857
1
2
3
4
5
6
7
8
0:08:22.409909
0:00:31.627722
0:00:03.193749
no massive missing
0:01:47.602107


20180504

SH finished
0:01:03.333356
0:00:29.155853
20180507 unzip finished
0:00:39.958339
0:01:14.270236
1
2
3
4
5
6
7
8
0:09:11.317651
0:00:34.467006
0:00:03.473771
no massive missing
0:01:57.157174


20180507

SH finished
0:00:53.648727
0:00:29.000821
20180508 unzip finished
0:00:41.012249
0:01:15.290022
1
2
3
4
5
6
7
8
0:09:04.117480
0:00:33.979359
0:00:03.513406
no massive missing
0:01:55.537910


20180508

SH finished
0:00:56.606234
0:00:27.793220
20180509 unzip finished
0:00:43.526399
0:01:12.648595
1
2
3
4
5
6
7
8
0:09:14.245509
0:00:49.192374
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
          ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
982  1600980  20180509    14.5   14.59  14.35      14.51     14.56   

     d_cumprodCAA  d_dayReturn  d_5dayReturn  d_ICDayReturn  d_CSIDayReturn  \
982           1.0    -0.003434      0.023272      -0.002082       -0.000484   

      d_volume  d_amount_x   TORate  allZT  hasZT  isZT  allDT  hasDT  isDT  \
982  1804425.0  26094064.0  0.01259    0.0    0.0   0.0    0.0    0.0   0.0   

     tmrHalted  haltedDays  marketShares  totalShares  d_close_y  d_amount_y  \
982        0.0         0.0   143325928.0  152209880.0        NaN         NaN   

     auction  
982      NaN  
0:00:03.411473
no massive missing
0:01:50.583267


20180509

SH finished
0:00:51.919965
0:00:28.901998
20180510 unzip finished
0:00:40.943632
0:01:14.827731
1
2
3
4
5
6
7
8
0:09:12.168065
0:00:34.619301


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


0:00:03.686894
no massive missing
0:01:57.415767


20180510

SH finished
0:01:03.179969
0:00:28.372901
20180511 unzip finished
0:00:39.809108
0:01:22.321930
1
2
3
4
5
6
7
8
0:09:10.355834
0:00:34.371253
0:00:03.562884
no massive missing
0:02:02.803557


20180511

SH finished
0:00:58.771584
0:00:28.092857
20180514 unzip finished
0:00:39.508562
0:01:11.445382
1
2
3
4
5
6
7
8
0:08:58.642418
0:00:32.321381
0:00:03.446989
no massive missing
0:01:51.528468


20180514

SH finished
0:00:47.289366
0:00:28.460623
20180515 unzip finished
0:00:38.483130
0:01:10.402636
1
2
3
4
5
6
7
8
0:08:51.661444
0:00:32.203937
0:00:03.344639
no massive missing
0:01:53.310785


20180515

SH finished
0:01:02.736540
0:00:29.098673
20180516 unzip finished
0:00:40.407363
0:01:14.424407
1
2
3
4
5
6
7
8
0:09:15.973788
0:00:33.325874
0:00:03.455362
no massive missing
0:01:51.674873


20180516

SH finished
0:01:02.323491
0:00:26.737591
20180517 unzip finished
0:00:40.282081
0:01:10.243136
1
2
3
4
5
6
7
8
0:08:45.162661
0:00:33.417207
0:00:03.481868
no massive missing
0:01:54.255336


20180517

SH finished
0:00:45.777180
0:00:29.763797
20180518 unzip finished
0:00:40.528701
0:01:13.537561
1
2
3
4
5
6
7
8
0:09:25.900645
0:00:34.138270
0:00:03.489618
no massive missing
0:01:55.727178


20180518

SH finished
0:01:01.701834
0:00:30.502072
20180521 unzip finished
0:00:43.109558
0:01:15.984265
1
2
3
4
5
6
7
8
0:09:51.778525
0:00:36.266229
0:00:03.751711
no massive missing
0:02:04.546322


20180521

SH finished
0:01:04.479117
0:00:29.669195
20180522 unzip finished
0:00:42.149386
0:01:23.532042
1
2
3
4
5
6
7
8
0:09:38.585747
0:00:35.568961
0:00:03.303600
no massive missing
0:02:03.946825


20180522

SH finished
0:00:48.937455
0:00:30.192191
20180523 unzip finished
0:00:43.460115
0:01:19.321167
1
2
3
4
5
6
7
8
0:09:07.003151
0:00:33.116231
0:00:03.489524
no massive missing
0:01:56.269534


20180523

SH finished
0:00:50.494646
0:00:30.259261
20180524 unzip finished
0:00:40.513280
0:01:14.165309
1
2
3
4
5
6
7
8
0:08:54.676443
0:00:32.165712
0:00:03.219380
no massive missing
0:01:49.205805


20180524

SH finished
0:01:04.943598
0:00:30.496770
20180525 unzip finished
0:00:40.075764
0:01:13.394237
1
2
3
4
5
6
7
8
0:08:54.066366
0:00:36.006768
0:00:03.502308
no massive missing
0:01:55.505092


20180525

SH finished
0:00:47.396655
0:00:29.440677
20180528 unzip finished
0:00:39.879589
0:01:14.140667
1
2
3
4
5
6
7
8
0:08:46.761374
0:00:33.009824
0:00:03.296884
no massive missing
0:01:55.861380


20180528

SH finished
0:00:48.129930
0:00:31.241947
20180529 unzip finished
0:00:40.055761
0:01:14.623025
1
2
3
4
5
6
7
8
0:08:56.198790
0:00:32.367457
0:00:03.246148
no massive missing
0:01:50.439430


20180529

SH finished
0:00:51.179296
0:00:31.971701
20180530 unzip finished
0:00:41.325626
0:01:16.377588
1
2
3
4
5
6
7
8
0:09:05.004161
0:00:33.570657
0:00:03.338861
no massive missing
0:01:56.470401


20180530

SH finished
0:00:58.068875
0:00:30.185085
20180531 unzip finished
0:00:43.049602
0:01:12.792278
1
2
3
4
5
6
7
8
0:08:51.718803
0:00:33.271718
0:00:03.321137
no massive missing
0:01:55.072954


20180531

SH finished
0:01:05.642510
0:00:30.886429
20180601 unzip finished
0:00:40.509231
0:01:13.454566
1
2
3
4
5
6
7
8
0:08:47.821678
0:00:32.658183
0:00:03.262851
no massive missing
0:01:51.538159


20180601

SH finished
0:00:48.453661
0:00:28.058774
20180604 unzip finished
0:00:38.791329
0:01:10.281467
1
2
3
4
5
6
7
8
0:08:36.281909
0:00:31.216834
0:00:03.168960
no massive missing
0:01:48.697195


20180604

SH finished
0:00:46.370874
0:00:28.818766
20180605 unzip finished
0:00:39.720170
0:01:11.100301
1
2
3
4
5
6
7
8
0:08:41.911430
0:00:31.486363
0:00:03.194649
no massive missing
0:01:50.887334


20180605

SH finished
0:01:12.879344
0:00:29.223073
20180606 unzip finished
0:00:41.003613
0:01:09.764255
1
2
3
4
5
6
7
8
0:08:40.314613
0:00:31.986416
0:00:03.177772
no massive missing
0:01:47.638631


20180606

SH finished
0:00:45.924906
0:00:32.637510
20180607 unzip finished
0:00:39.407301
0:01:10.931835
1
2
3
4
5
6
7
8
0:08:35.386852
0:00:32.208207
0:00:03.352497
no massive missing
0:01:50.769865


20180607

SH finished
0:01:01.968562
0:00:30.859729
20180608 unzip finished
0:00:42.841204
0:01:15.482339
1
2
3
4
5
6
7
8
0:08:57.430599
0:00:35.928001
0:00:03.401316
no massive missing
0:01:56.332883


20180608

SH finished
0:00:56.410712
0:00:27.770651
20180611 unzip finished
0:00:39.341651
0:01:07.765952
1
2
3
4
5
6
7
8
0:08:24.410444
0:00:30.525842
0:00:03.098290
no massive missing
0:01:46.932226


20180611

SH finished
0:00:47.621257
0:00:29.189355
20180612 unzip finished
0:00:39.015116
0:01:10.294493
1
2
3
4
5
6
7
8
0:08:42.368910
0:00:31.956349
0:00:03.209868
no massive missing
0:01:50.791164


20180612

SH finished
0:01:12.183046
0:00:27.648148
20180613 unzip finished
0:00:37.624082
0:01:06.282394
1
2
3
4
5
6
7
8
0:08:19.077991
0:00:31.186207
0:00:03.143807
no massive missing
0:01:49.516858


20180613

SH finished
0:01:09.793485
0:00:28.634603
20180614 unzip finished
0:00:39.444011
0:01:09.452160
1
2
3
4
5
6
7
8
0:08:32.357285
0:00:32.253986
0:00:03.262002
no massive missing
0:01:51.877303


20180614

SH finished
0:00:46.661053
0:00:32.715503
20180615 unzip finished
0:00:40.578497
0:01:14.073239
1
2
3
4
5
6
7
8
0:08:55.949895
0:00:32.674999
0:00:03.268805
no massive missing
0:01:53.073882


20180615

SH finished
0:00:48.289590
0:00:33.061133
20180619 unzip finished
0:00:41.466282
0:01:15.670008
1
2
3
4
5
6
7
8
0:09:26.529988
0:00:35.505923
0:00:03.516977
no massive missing
0:02:03.464168


20180619

SH finished
0:00:57.998800
0:00:32.297547
20180620 unzip finished
0:00:41.262352
0:01:14.039527
1
2
3
4
5
6
7
8
0:08:54.639240
0:00:32.635862
0:00:03.248010
no massive missing
0:01:53.175310


20180620

SH finished
0:01:15.961487
0:00:31.868656
20180621 unzip finished
0:00:39.705775
0:01:11.195481
1
2
3
4
5
6
7
8
0:09:00.416217
0:00:32.432021
0:00:03.268937
no massive missing
0:01:54.321589


20180621

SH finished
0:01:02.492707
0:00:28.239115
20180622 unzip finished
0:00:38.747805
0:01:09.870272
1
2
3
4
5
6
7
8
0:08:31.653324
0:00:31.596936
0:00:03.183190
no massive missing
0:01:53.991113


20180622

SH finished
0:00:56.854441
0:00:27.630438
20180625 unzip finished
0:00:38.041255
0:01:05.458451
1
2
3
4
5
6
7
8
0:07:45.317954
0:00:28.607240
0:00:02.941033
no massive missing
0:01:41.541446


20180625

SH finished
0:00:49.512831
0:00:27.653379
20180626 unzip finished
0:00:38.308276
0:01:07.665759
1
2
3
4
5
6
7
8
0:08:09.735359
0:00:30.066067
0:00:03.047116
no massive missing
0:01:44.419405


20180626

SH finished
0:00:43.284751
0:00:27.378617
20180627 unzip finished
0:00:36.594983
0:01:06.987875
1
2
3
4
5
6
7
8
0:08:08.694161
0:00:30.284760
0:00:03.035574
no massive missing
0:01:46.716244


20180627

SH finished
0:00:48.636587
0:00:27.185527
20180628 unzip finished
0:00:36.533057
0:01:05.227019
1
2
3
4
5
6
7
8
0:08:02.534281
0:00:30.298749
0:00:03.080893
no massive missing
0:01:44.971234


20180628

SH finished
0:00:43.752020
0:00:29.973017
20180629 unzip finished
0:00:38.312643
0:01:10.372117
1
2
3
4
5
6
7
8
0:08:41.078085
0:00:32.251392
0:00:03.182711
no massive missing
0:01:49.536826


20180629

SH finished
0:00:53.246966
0:00:28.506952
20180702 unzip finished
0:00:38.451885
0:01:09.868633
1
2
3
4
5
6
7
8
0:08:31.251451
0:00:31.519226
0:00:03.148444
no massive missing
0:01:48.481177


20180702

SH finished
0:00:50.917986
0:00:30.205887
20180703 unzip finished
0:00:39.229526
0:01:12.079771
1
2
3
4
5
6
7
8
0:08:40.566100
0:00:32.423692
0:00:03.267047
no massive missing
0:01:54.345982


20180703

SH finished
0:01:14.533993
0:00:29.803877
20180704 unzip finished
0:00:38.842134
0:01:11.801133
1
2
3
4
5
6
7
8
0:08:34.649880
0:00:32.415740
0:00:03.274260
no massive missing
0:01:52.904444


20180704

SH finished
0:00:52.272787
0:00:29.193201
20180705 unzip finished
0:00:38.682906
0:01:13.427307
1
2
3
4
5
6
7
8
0:08:41.694766
0:00:30.865533
0:00:03.126932
no massive missing
0:01:46.261705


20180705

SH finished
0:01:15.007957
0:00:30.042881
20180706 unzip finished
0:00:39.662016
0:01:14.003959
1
2
3
4
5
6
7
8
0:08:52.190948
0:00:33.349949
0:00:03.397869
no massive missing
0:01:53.791714


20180706

SH finished
0:01:03.851932
0:00:27.751700
20180709 unzip finished
0:00:37.431061
0:01:06.384441
1
2
3
4
5
6
7
8
0:08:15.399466
0:00:29.960503
0:00:03.044878
no massive missing
0:01:45.410265


20180709

SH finished
0:01:01.225388
0:00:28.325124
20180710 unzip finished
0:00:37.667393
0:01:07.005167
1
2
3
4
5
6
7
8
0:08:14.841614
0:00:30.866775
0:00:03.145732
no massive missing
0:01:47.118365


20180710

SH finished
0:00:43.574974
0:00:29.558327
20180711 unzip finished
0:00:38.871507
0:01:10.833289
1
2
3
4
5
6
7
8
0:08:35.619890
0:00:32.695356
0:00:03.248043
no massive missing
0:01:51.528796


20180711

SH finished
0:01:06.156108
0:00:29.513326
20180712 unzip finished
0:00:40.268090
0:01:15.286779
1
2
3
4
5
6
7
8
0:09:00.116972
0:00:34.645606
0:00:03.439276
no massive missing
0:01:58.494431


20180712

SH finished
0:00:49.086380
0:00:29.119393
20180713 unzip finished
0:00:38.694803
0:01:10.266635
1
2
3
4
5
6
7
8
0:08:20.020361
0:00:30.391655
0:00:03.077346
no massive missing
0:01:44.925988


20180713

SH finished
0:00:50.366831
0:00:26.850490
20180716 unzip finished
0:00:38.111945
0:01:08.782104
1
2
3
4
5
6
7
8
0:08:01.136475
0:00:30.604710
0:00:03.088743
no massive missing
0:01:44.851325


20180716

SH finished
0:00:44.431537
0:00:26.141226
20180717 unzip finished
0:00:39.064694
0:01:11.403866
1
2
3
4
5
6
7
8
0:08:20.122700
0:00:30.654486
0:00:03.157142
no massive missing
0:01:48.497626


20180717

SH finished
0:00:44.844720
0:00:28.577267
20180718 unzip finished
0:00:39.284630
0:01:11.550231
1
2
3
4
5
6
7
8
0:08:31.967840
0:00:30.737516
0:00:03.092102
no massive missing
0:01:49.500025


20180718

SH finished
0:00:44.448829
0:00:28.283710
20180719 unzip finished
0:00:37.023724
0:01:08.361417
1
2
3
4
5
6
7
8
0:08:16.557601
0:00:30.215238
0:00:03.102560
no massive missing
0:01:45.141321


20180719

SH finished
0:00:51.602147
0:00:28.817594
20180720 unzip finished
0:00:39.076911
0:01:12.517384
1
2
3
4
5
6
7
8
0:08:29.994520
0:00:31.059683
0:00:03.163180
no massive missing
0:01:47.652480


20180720

SH finished
0:00:45.354458
0:00:30.236988
20180723 unzip finished
0:00:38.814490
0:01:10.684416
1
2
3
4
5
6
7
8
0:08:38.152173
0:00:31.286392
0:00:03.125603
no massive missing
0:01:48.915623


20180723

SH finished
0:00:48.643516
0:00:34.032236
20180724 unzip finished
0:00:43.220537
0:01:18.977112
1
2
3
4
5
6
7
8
0:09:29.881381
0:00:34.957898
0:00:03.508001
no massive missing
0:02:00.957383


20180724

SH finished
0:00:51.444785
0:00:30.310710
20180725 unzip finished
0:00:41.241999
0:01:14.451479
1
2
3
4
5
6
7
8
0:09:01.706173
0:00:32.588765
0:00:03.260611
no massive missing
0:01:53.073765


20180725

SH finished
0:00:49.044602
0:00:30.516478
20180726 unzip finished
0:00:41.545890
0:01:14.594069
1
2
3
4
5
6
7
8
0:09:02.100543
0:00:33.443537
0:00:03.306095
no massive missing
0:01:55.319326


20180726

SH finished
0:00:50.836466
0:00:29.309765
20180727 unzip finished
0:00:40.072050
0:01:12.915873
1
2
3
4
5
6
7
8
0:08:43.420178
0:00:32.552552
0:00:03.297633
no massive missing
0:01:52.634051


20180727

SH finished
0:00:52.374745
0:00:31.977549
20180730 unzip finished
0:00:39.731771
0:01:12.840817
1
2
3
4
5
6
7
8
0:08:50.508765
0:00:32.202543
0:00:03.214048
no massive missing
0:01:51.573176


20180730

SH finished
0:00:50.745454
0:00:28.387985
20180731 unzip finished
0:00:38.256445
0:01:09.896298
1
2
3
4
5
6
7
8
0:08:16.863971
0:00:31.247066
0:00:03.165368
no massive missing
0:01:49.216149


20180731

SH finished
0:00:44.528023
         ID      date  d_open  d_high  d_low  d_close_x  d_yclose  \
0   1601360  20180102   49.00   50.57  47.80      50.57     45.97   
1   1601360  20180103   53.00   55.63  52.61      55.63     50.57   
2   1601360  20180104   61.00   61.19  59.60      61.19     55.63   
3   1601360  20180105   66.50   66.50  55.07      55.42     61.19   
4   1601360  20180108   54.55   55.40  52.33      53.36     55.42   
5   1601360  20180109   53.00   53.10  51.34      52.53     53.36   
6   1601360  20180110   53.19   54.01  50.07      51.11     52.53   
7   1601360  20180111   50.93   53.75  49.03      52.52     51.11   
8   1601360  20180112   51.85   52.14  50.10      50.21     52.52   
9   1601360  20180115   48.62   49.20  46.27      47.05     50.21   
10  1601360  20180116   46.80   49.00  46.21      48.21     47.05   
11  1601360  20180117   47.20   49.86  46.91      48.27     48.21   
12  1601360  20180118   47.61   50.64  47.28      49.53     48.27   
13  160

ValueError: No objects to concatenate

In [1]:
import pymongo
import pandas as pd
import pickle
import datetime
import time
import gzip
import lzma
import pytz

class DB(object):
    def __init__(self, uri, symbol_column='skey'):
        self.db_name = 'white_db'
        user, passwd, host = self.parse_uri(uri)
        auth_db = 'admin' if user in ('admin', 'root') else self.db_name
        self.uri = 'mongodb://%s:%s@%s/?authSource=%s' % (user, passwd, host, auth_db)

        self.client = pymongo.MongoClient(self.uri)
        self.db = self.client[self.db_name]
        self.chunk_size = 20000
        self.symbol_column = symbol_column
        self.date_column = 'date'

    def parse_uri(self, uri):
        # mongodb://user:password@example.com
        return uri.strip().replace('mongodb://', '').strip('/').replace(':', ' ').replace('@', ' ').split(' ')

    def drop_table(self, table_name):
        self.db.drop_collection(table_name)

    def rename_table(self, old_table, new_table):
        self.db[old_table].rename(new_table)

    def write(self, table_name, df):
        if len(df) == 0: return

        multi_date = False

        if self.date_column in df.columns:
            date = str(df.head(1)[self.date_column].iloc[0])
            multi_date = len(df[self.date_column].unique()) > 1
        else:
            raise Exception('DataFrame should contain date column')

        collection = self.db[table_name]
        collection.create_index([('date', pymongo.ASCENDING), ('symbol', pymongo.ASCENDING)], background=True)
        collection.create_index([('symbol', pymongo.ASCENDING), ('date', pymongo.ASCENDING)], background=True)

        if multi_date:
            for (date, symbol), sub_df in df.groupby([self.date_column, self.symbol_column]):
                date = str(date)
                symbol = int(symbol)
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)
        else:
            for symbol, sub_df in df.groupby([self.symbol_column]):
                collection.delete_many({'date': date, 'symbol': symbol})
                self.write_single(collection, date, symbol, sub_df)

    def write_single(self, collection, date, symbol, df):
        for start in range(0, len(df), self.chunk_size):
            end = min(start + self.chunk_size, len(df))
            df_seg = df[start:end]
            version = 1
            seg = {'ver': version, 'data': self.ser(df_seg, version), 'date': date, 'symbol': symbol, 'start': start}
            collection.insert_one(seg)

    def build_query(self, start_date=None, end_date=None, symbol=None):
        query = {}

        def parse_date(x):
            if type(x) == str:
                if len(x) != 8:
                    raise Exception("`date` must be YYYYMMDD format")
                return x
            elif type(x) == datetime.datetime or type(x) == datetime.date:
                return x.strftime("%Y%m%d")
            elif type(x) == int:
                return parse_date(str(x))
            else:
                raise Exception("invalid `date` type: " + str(type(x)))

        if start_date is not None or end_date is not None:
            query['date'] = {}
            if start_date is not None:
                query['date']['$gte'] = parse_date(start_date)
            if end_date is not None:
                query['date']['$lte'] = parse_date(end_date)

        def parse_symbol(x):
            if type(x) == int:
                return x
            else:
                return int(x)

        if symbol:
            if type(symbol) == list or type(symbol) == tuple:
                query['symbol'] = {'$in': [parse_symbol(x) for x in symbol]}
            else:
                query['symbol'] = parse_symbol(symbol)

        return query

    def delete(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot delete the whole table')
            return None

        collection.delete_many(query)

    def read(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]

        query = self.build_query(start_date, end_date, symbol)
        if not query:
            print('cannot read the whole table')
            return None

        segs = []
        for x in collection.find(query):
            x['data'] = self.deser(x['data'], x['ver'])
            segs.append(x)
        segs.sort(key=lambda x: (x['symbol'], x['date'], x['start']))
        return pd.concat([x['data'] for x in segs], ignore_index=True) if segs else None

    def list_tables(self):
        return self.db.collection_names()

    def list_dates(self, table_name, start_date=None, end_date=None, symbol=None):
        collection = self.db[table_name]
        dates = set()
        if start_date is None:
            start_date = '00000000'
        if end_date is None:
            end_date = '99999999'
        for x in collection.find(self.build_query(start_date, end_date, symbol), {"date": 1, '_id': 0}):
            dates.add(x['date'])
        return sorted(list(dates))

    def ser(self, s, version):
        if version == 1:
            return gzip.compress(pickle.dumps(s), compresslevel=2)
        elif version == 2:
            return lzma.compress(pickle.dumps(s), preset=1)
        else:
            raise Exception('unknown version')

    def deser(self, s, version):
        def unpickle(s):
            return pickle.loads(s)

        if version == 1:
            return unpickle(gzip.decompress(s))
        elif version == 2:
            return unpickle(lzma.decompress(s))
        else:
            raise Exception('unknown version')

In [2]:
import pandas as pd
import random
import numpy as np
import glob
import pickle
import os
import datetime
import time

startDate = 20170101
endDate = 20181231
targetStockLs = [2000001]

db = DB("mongodb://user_rw:faa96dfc@192.168.10.223")
mdData = db.read('trade', start_date=startDate, end_date=endDate, symbol=targetStockLs)
date_list = mdData["date"].unique()
mi_ss = []

for i in date_list:
    startDate = str(i)
    endDate = str(i)
    data = db.read('snapshot', start_date=startDate, end_date=endDate)
    SH = data[data["skey"] < 2000000]
    SZ = data[data["skey"] > 2000000]
    del data
    
    # check 2
    # first part
    startTm = datetime.datetime.now()
    date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
    date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
    date["group"] = date["time"]//10000
    SH["group"] = SH["time"]//10000000
    gl = date[((date["time"] >= 93000000) & (date["time"] <= 113000000))|((date["time"] >= 130000000) & (date["time"] <= 150000000))]["group"].unique()
    l = set(gl) - set(SH["group"].unique())
    SH["has_missing1"] = 0 
    if len(l) != 0:
        print("massive missing")
        print(l)
        SH["order"] = SH.groupby(["skey", "time"]).cumcount()
        for i in l:
            SH["t"] = SH[SH["group"] > i].groupby("StockID")["time"].transform("min")
            SH["has_missing1"] = np.where((SH["time"] == SH["t"]) & (SH["order"] == 0), 1, 0)
        SH.drop(["order", "t", "group"], axis=1, inplace=True)   
    else:
        print("no massive missing")
        SH.drop(["group"], axis=1, inplace=True)
    



    # second part

    SH["time_interval"] = SH.groupby("skey")["datetime"].apply(lambda x: x - x.shift(1))
    SH["time_interval"] = SH["time_interval"].apply(lambda x: x.seconds)
    SH["tn_update"] = SH.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

    f1 = SH[(SH["time"] >= 93000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f1 = f1.rename(columns={"time": "time1"})
    f2 = SH[(SH["time"] >= 130000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f2 = f2.rename(columns={"time": "time2"})
    f3 = SH[(SH["time"] >= 150000000000) & (SH["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f3 = f3.rename(columns={"time": "time3"})
    SH = pd.merge(SH, f1, on="skey", how="left")
    del f1
    SH = pd.merge(SH, f2, on="skey", how="left")
    del f2
    SH = pd.merge(SH, f3, on="skey", how="left")
    del f3
    p99 = SH[(SH["time"] > 93000000000) & (SH["time"] < 145700000000) & (SH["time"] != SH["time2"]) & (SH["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).reset_index()
    p99 = p99.rename(columns={"tn_update":"99%"})
    SH = pd.merge(SH, p99, on="skey", how="left")

    SH["has_missing2"] = 0
    SH["has_missing2"] = np.where((SH["time_interval"] > 60) & (SH["tn_update"] > SH["99%"]) & 
         (SH["time"] > SH["time1"]) & (SH["time"] != SH["time2"]) & (SH["time"] != SH["time3"]) & (SH["time"] != 100000000000), 1, 0)
    SH.drop(["time_interval", "tn_update", "time1", "time2", "time3", "99%"], axis=1, inplace=True) 

    SH["has_missing"] = np.where((SH["has_missing1"] == 1) | (SH["has_missing2"] == 1), 1, 0)
    SH.drop(["has_missing1", "has_missing2"], axis=1, inplace=True) 
    if SH[SH["has_missing"] == 1].shape[0] != 0:
        print("has missing!!!!!!!!!!!!!!!!!!!!!!!")
        print(SH[SH["has_missing"] == 1].shape[0])
        mi_ss += [SH[SH["has_missing"] == 1]]
    print(datetime.datetime.now() - startTm)
    
    
    
    startTm = datetime.datetime.now()
    SH["has_missing"] = SH["has_missing"].astype('int32')
    SH = SH[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"]]
    
    display(SH["date"].iloc[0])
    print("SH finished")
    
    db1 = DB("mongodb://user_rw:faa96dfc@192.168.10.223")
    db1.write('snapshot', SH)
    
    del SH
    print(datetime.datetime.now() - startTm)
    
    
    # check 2
    # first part
    startTm = datetime.datetime.now()
    date = pd.DataFrame(pd.date_range(start='2019-06-10 08:30:00', end='2019-06-10 18:00:00', freq='s'), columns=["Orig"])
    date["time"] = date["Orig"].apply(lambda x: int(x.strftime("%H%M%S"))*1000)
    date["group"] = date["time"]//10000
    SZ["group"] = SZ["time"]//10000000
    gl = date[((date["time"] >= 93000000) & (date["time"] <= 113000000))|((date["time"] >= 130000000) & (date["time"] <= 150000000))]["group"].unique()
    l = set(gl) - set(SZ["group"].unique())
    SZ["has_missing1"] = 0 
    if len(l) != 0:
        print("massive missing")
        print(l)
        SZ["order"] = SZ.groupby(["skey", "time"]).cumcount()
        for i in l:
            SZ["t"] = SZ[SZ["group"] > i].groupby("skey")["time"].transform("min")
            SZ["has_missing1"] = np.where((SZ["time"] == SZ["t"]) & (SZ["order"] == 0), 1, 0)
        SZ.drop(["order", "t", "group"], axis=1, inplace=True)   
    else:
        print("no massive missing")
        SZ.drop(["group"], axis=1, inplace=True)
    



    # second part

    SZ["time_interval"] = SZ.groupby("skey")["datetime"].apply(lambda x: x - x.shift(1))
    SZ["time_interval"] = SZ["time_interval"].apply(lambda x: x.seconds)
    SZ["tn_update"] = SZ.groupby("skey")["cum_trades_cnt"].apply(lambda x: x-x.shift(1))

    f1 = SZ[(SZ["time"] >= 93000000000) & (SZ["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f1 = f1.rename(columns={"time": "time1"})
    f2 = SZ[(SZ["time"] >= 130000000000) & (SZ["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f2 = f2.rename(columns={"time": "time2"})
    f3 = SZ[(SZ["time"] >= 150000000000) & (SZ["tn_update"] != 0)].groupby("skey")["time"].min().reset_index()
    f3 = f3.rename(columns={"time": "time3"})
    SZ = pd.merge(SZ, f1, on="skey", how="left")
    del f1
    SZ = pd.merge(SZ, f2, on="skey", how="left")
    del f2
    SZ = pd.merge(SZ, f3, on="skey", how="left")
    del f3
    p99 = SZ[(SZ["time"] > 93000000000) & (SZ["time"] < 145700000000) & (SZ["time"] != SZ["time2"]) & (SZ["tn_update"] != 0)]\
    .groupby("skey")["tn_update"].apply(lambda x: x.describe([0.99])["99%"]).reset_index()
    p99 = p99.rename(columns={"tn_update":"99%"})
    SZ = pd.merge(SZ, p99, on="skey", how="left")

    SZ["has_missing2"] = 0
    SZ["has_missing2"] = np.where((SZ["time_interval"] > 60) & (SZ["tn_update"] > SZ["99%"]) & 
         (SZ["time"] > SZ["time1"]) & (SZ["time"] != SZ["time2"]) & (SZ["time"] != SZ["time3"])& (SZ["time"] != 100000000000), 1, 0)
    SZ.drop(["time_interval", "tn_update", "time1", "time2", "time3", "99%"], axis=1, inplace=True) 

    SZ["has_missing"] = np.where((SZ["has_missing1"] == 1) | (SZ["has_missing2"] == 1), 1, 0)
    SZ.drop(["has_missing1", "has_missing2"], axis=1, inplace=True) 
    if SZ[SZ["has_missing"] == 1].shape[0] != 0:
        print("has missing!!!!!!!!!!!!!!!!!!!!!!!")
        print(SZ[SZ["has_missing"] == 1].shape[0])
        mi_ss += [SZ[SZ["has_missing"] == 1]]
    print(datetime.datetime.now() - startTm)

    
    
    startTm = datetime.datetime.now()
    SZ["has_missing"] = SZ["has_missing"].astype('int32')
    SZ = SZ[["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"]]
    
    display(SZ["date"].iloc[0])
    print("SZ finished")
    
    
    db1 = DB("mongodb://user_rw:faa96dfc@192.168.10.223")
    db1.write('snapshot', SZ)
    
    del SZ
    print(datetime.datetime.now() - startTm)
    
mi_ss = pd.concat(mi_ss).reset_index(drop=True)
print(mi_ss)

no massive missing
0:01:54.095509


20170901

SH finished
0:00:58.665002
no massive missing
0:02:29.632333


20170901

SZ finished
0:01:18.356350
no massive missing
0:01:48.178389


20170904

SH finished
0:01:07.684085
no massive missing
0:02:43.305191


20170904

SZ finished
0:01:18.086354
no massive missing
0:01:54.102501


20170905

SH finished
0:00:51.341660
no massive missing
0:02:40.244063


20170905

SZ finished
0:01:10.300391
no massive missing
0:01:57.969777


20170906

SH finished
0:00:57.883809
no massive missing
0:02:44.320563


20170906

SZ finished
0:01:21.173632
no massive missing
0:02:03.371182


20170907

SH finished
0:00:56.407915
no massive missing
0:02:50.078141


20170907

SZ finished
0:01:15.413218
no massive missing
0:01:57.218114


20170908

SH finished
0:00:51.645264
no massive missing
0:02:46.932000


20170908

SZ finished
0:01:15.844438
no massive missing
0:01:58.657909


20170911

SH finished
0:01:06.102546
no massive missing
0:02:45.633359


20170911

SZ finished
0:01:07.727061
no massive missing
0:02:05.500904


20170912

SH finished
0:01:00.387342
no massive missing
0:02:54.106877


20170912

SZ finished
0:01:15.562236
no massive missing
0:01:56.002731


20170913

SH finished
0:00:51.687236
no massive missing
0:02:34.766853


20170913

SZ finished
0:01:11.822279
no massive missing
0:01:58.129149


20170914

SH finished
0:00:52.126492
no massive missing
0:02:41.079857


20170914

SZ finished
0:01:33.338271
no massive missing
0:01:57.219327


20170915

SH finished
0:01:04.470154
no massive missing
0:02:32.971037


20170915

SZ finished
0:01:11.084847
no massive missing
0:01:52.697051


20170918

SH finished
0:01:03.522113
no massive missing
0:02:28.629447


20170918

SZ finished
0:01:05.486158
no massive missing
0:01:57.461654


20170919

SH finished
0:00:51.037039
no massive missing
0:02:38.774153


20170919

SZ finished
0:01:11.724801
no massive missing
0:01:58.096853


20170920

SH finished
0:00:52.135289
no massive missing
0:02:37.427528


20170920

SZ finished
0:01:12.833039
no massive missing
0:01:58.831162


20170921

SH finished
0:00:52.675875
no massive missing
0:02:37.290292


20170921

SZ finished
0:01:06.686608
no massive missing
0:01:55.768769


20170922

SH finished
0:01:00.450534
no massive missing
0:02:38.066198


20170922

SZ finished
0:01:05.864986
no massive missing
0:01:54.782074


20170925

SH finished
0:00:52.492444
no massive missing
0:02:34.047211


20170925

SZ finished
0:01:04.540173
no massive missing
0:01:51.483394


20170926

SH finished
0:00:47.284561
no massive missing
0:02:30.698736


20170926

SZ finished
0:01:02.248726
no massive missing
0:01:48.602929


20170927

SH finished
0:00:59.152746
no massive missing
0:02:28.800036


20170927

SZ finished
0:01:02.575040
no massive missing
0:01:48.353457


20170928

SH finished
0:00:51.756698
no massive missing
0:02:29.373224


20170928

SZ finished
0:01:03.120702
no massive missing
0:01:46.799501


20170929

SH finished
0:00:46.298655
no massive missing
0:02:25.350169


20170929

SZ finished
0:01:06.771479
no massive missing
0:01:53.076621


20171009

SH finished
0:00:48.801763
no massive missing
0:02:31.793503


20171009

SZ finished
0:01:03.551574
no massive missing
0:01:56.970370


20171010

SH finished
0:01:02.452933
no massive missing
0:02:40.087983


20171010

SZ finished
0:01:08.739574
no massive missing
0:01:59.923015


20171011

SH finished
0:00:52.226489
no massive missing
0:02:42.983920


20171011

SZ finished
0:01:10.245704
no massive missing
0:01:53.917765


20171012

SH finished
0:00:56.158444
no massive missing
0:02:39.527185


20171012

SZ finished
0:01:05.698227
no massive missing
0:01:55.181818


20171013

SH finished
0:00:55.736650
no massive missing
0:02:37.604721


20171013

SZ finished
0:01:08.561083
no massive missing
0:02:01.431512


20171016

SH finished
0:00:54.021493
no massive missing
0:02:42.825583


20171016

SZ finished
0:01:09.845923
no massive missing
0:01:51.383218


20171017

SH finished
0:00:53.430192
no massive missing
0:02:34.136136


20171017

SZ finished
0:01:02.038010
no massive missing
0:01:52.348872


20171018

SH finished
0:00:53.262276
no massive missing
0:02:28.940748


20171018

SZ finished
0:01:04.661208
no massive missing
0:01:53.747650


20171019

SH finished
0:00:55.248713
no massive missing
0:02:34.670939


20171019

SZ finished
0:01:09.924454
no massive missing
0:01:49.562552


20171020

SH finished
0:00:49.412927
no massive missing
0:02:22.724099


20171020

SZ finished
0:00:59.791514
no massive missing
0:01:45.666735


20171023

SH finished
0:00:45.754031
no massive missing
0:02:19.887450


20171023

SZ finished
0:01:00.799652
no massive missing
0:01:52.674017


20171024

SH finished
0:00:47.791457
no massive missing
0:02:27.141246


20171024

SZ finished
0:01:05.595660
no massive missing
0:01:48.956746


20171025

SH finished
0:00:47.379407
no massive missing
0:02:23.528682


20171025

SZ finished
0:01:01.493091
no massive missing
0:01:56.642337


20171026

SH finished
0:00:50.848721
no massive missing
0:02:31.916112


20171026

SZ finished
0:01:06.951003
no massive missing
0:01:54.648642


20171027

SH finished
0:00:52.065772
no massive missing
0:02:30.611177


20171027

SZ finished
0:01:04.483549
no massive missing
0:02:01.901525


20171030

SH finished
0:01:03.953131
no massive missing
0:02:40.035429


20171030

SZ finished
0:01:08.344945
no massive missing
0:01:55.868993


20171031

SH finished
0:00:50.352221
no massive missing
0:02:32.803839


20171031

SZ finished
0:01:02.577928
no massive missing
0:01:55.354243


20171101

SH finished
0:00:50.676637
no massive missing
0:02:33.391235


20171101

SZ finished
0:01:03.648931
no massive missing
0:01:55.543742


20171102

SH finished
0:01:05.282097
no massive missing
0:02:34.823892


20171102

SZ finished
0:01:06.240694
no massive missing
0:01:59.099607


20171103

SH finished
0:00:51.674528
no massive missing
0:02:34.867330


20171103

SZ finished
0:01:12.032140
no massive missing
0:01:55.532862


20171106

SH finished
0:00:50.578563
no massive missing
0:02:31.858559


20171106

SZ finished
0:01:05.096568
no massive missing
0:01:55.740684


20171107

SH finished
0:00:55.737875
no massive missing
0:02:31.618844


20171107

SZ finished
0:01:04.730020
no massive missing
0:02:01.083065


20171108

SH finished
0:00:53.420591
no massive missing
0:02:40.019977


20171108

SZ finished
0:01:17.525427
no massive missing
0:01:54.805976


20171109

SH finished
0:00:51.260135
no massive missing
0:02:28.748297


20171109

SZ finished
0:01:04.217810
no massive missing
0:01:59.702401


20171110

SH finished
0:00:51.291405
no massive missing
0:02:38.379681


20171110

SZ finished
0:01:19.029317
no massive missing
0:01:58.635387


20171113

SH finished
0:01:02.695878
no massive missing
0:02:41.545036


20171113

SZ finished
0:01:08.715212
no massive missing
0:02:04.235311


20171114

SH finished
0:00:54.045298
no massive missing
0:02:41.396513


20171114

SZ finished
0:01:17.644968
no massive missing
0:01:58.660201


20171115

SH finished
0:00:50.698881
no massive missing
0:02:35.159363


20171115

SZ finished
0:01:12.006892
no massive missing
0:01:51.056622


20171116

SH finished
0:00:55.318681
no massive missing
0:02:30.917026


20171116

SZ finished
0:01:04.316506
no massive missing
0:02:05.316970


20171117

SH finished
0:01:00.193125
no massive missing
0:02:48.361449


20171117

SZ finished
0:01:11.438207
no massive missing
0:01:55.216754


20171120

SH finished
0:00:53.017557
no massive missing
0:02:31.372779


20171120

SZ finished
0:01:10.222285
no massive missing
0:01:53.594324


20171121

SH finished
0:01:08.238634
no massive missing
0:02:35.862353


20171121

SZ finished
0:01:04.309226
no massive missing
0:01:53.589868


20171122

SH finished
0:00:59.263988
no massive missing
0:02:33.738599


20171122

SZ finished
0:01:05.190838
no massive missing
0:01:54.944468


20171123

SH finished
0:01:05.220787
no massive missing
0:02:34.671117


20171123

SZ finished
0:01:07.242707
no massive missing
0:01:48.007661


20171124

SH finished
0:00:46.498470
no massive missing
0:02:18.664897


20171124

SZ finished
0:01:04.481011
no massive missing
0:01:46.123924


20171127

SH finished
0:00:46.046260
no massive missing
0:02:19.010788


20171127

SZ finished
0:01:00.540942
no massive missing
0:01:42.121247


20171128

SH finished
0:00:56.112336
no massive missing
0:02:21.095633


20171128

SZ finished
0:00:57.758706
no massive missing
0:01:51.745831


20171129

SH finished
0:00:49.858714
no massive missing
0:02:25.186769


20171129

SZ finished
0:01:02.210725
no massive missing
0:01:46.316256


20171130

SH finished
0:00:49.777194
no massive missing
0:02:19.218001


20171130

SZ finished
0:01:09.035493
no massive missing
0:01:46.558039


20171201

SH finished
0:00:45.659281
no massive missing
0:02:22.031525


20171201

SZ finished
0:00:58.790738
no massive missing
0:01:52.012157


20171204

SH finished
0:00:52.518640
no massive missing
0:02:23.688625


20171204

SZ finished
0:01:08.016719
no massive missing
0:01:58.064890


20171205

SH finished
0:01:02.758482
no massive missing
0:02:32.953056


20171205

SZ finished
0:01:08.379683
no massive missing
0:01:49.248933


20171206

SH finished
0:00:48.958202
no massive missing
0:02:22.112841


20171206

SZ finished
0:01:01.209838
no massive missing
0:01:48.570737


20171207

SH finished
0:01:05.248763
no massive missing
0:02:22.823750


20171207

SZ finished
0:01:23.712046
no massive missing
0:01:50.737382


20171208

SH finished
0:00:48.927464
no massive missing
0:02:18.520623


20171208

SZ finished
0:01:01.103493
no massive missing
0:01:45.931341


20171211

SH finished
0:00:46.449160
no massive missing
0:02:18.076581


20171211

SZ finished
0:00:59.331596
no massive missing
0:01:45.667778


20171212

SH finished
0:00:48.029418
no massive missing
0:02:21.411927


20171212

SZ finished
0:00:58.717019
no massive missing
0:01:40.878145


20171213

SH finished
0:00:49.507018
no massive missing
0:02:13.933869


20171213

SZ finished
0:01:00.239149
no massive missing
0:01:42.420620


20171214

SH finished
0:00:44.755565
no massive missing
0:02:17.010099


20171214

SZ finished
0:00:56.795760
no massive missing
0:01:45.886412


20171215

SH finished
0:00:52.211787
no massive missing
0:02:21.307738


20171215

SZ finished
0:00:59.938029
no massive missing
0:01:44.627553


20171218

SH finished
0:00:50.017461
no massive missing
0:02:17.406813


20171218

SZ finished
0:00:59.612724
no massive missing
0:01:42.095612


20171219

SH finished
0:00:45.735310
no massive missing
0:02:15.968982


20171219

SZ finished
0:00:55.469717
no massive missing
0:01:45.813416


20171220

SH finished
0:00:50.840842
no massive missing
0:02:21.107025


20171220

SZ finished
0:01:01.106978
no massive missing
0:01:48.180029


20171221

SH finished
0:00:47.881476
no massive missing
0:02:25.221841


20171221

SZ finished
0:01:00.949444
no massive missing
0:01:39.530127


20171222

SH finished
0:00:57.328569
no massive missing
0:02:14.113252


20171222

SZ finished
0:00:56.195576
no massive missing
0:01:46.808610


20171225

SH finished
0:00:46.475137
no massive missing
0:02:19.575166


20171225

SZ finished
0:01:01.221802
no massive missing
0:01:50.811372


20171226

SH finished
0:00:47.409411
no massive missing
0:02:20.205159


20171226

SZ finished
0:00:59.650242
no massive missing
0:01:49.609290


20171227

SH finished
0:00:53.816394
no massive missing
0:02:20.088854


20171227

SZ finished
0:01:02.220985
no massive missing
0:01:53.246279


20171228

SH finished
0:00:48.983517
no massive missing
0:02:23.728780


20171228

SZ finished
0:01:00.779966
no massive missing
0:01:46.327434


20171229

SH finished
0:00:57.640513
no massive missing
0:02:18.622479


20171229

SZ finished
0:01:01.555924
no massive missing
0:01:54.111294


20180102

SH finished
0:00:55.289935
no massive missing
0:02:23.118286


20180102

SZ finished
0:01:00.281713
no massive missing
0:02:00.905690


20180103

SH finished
0:00:56.462463
no massive missing
0:02:33.350554


20180103

SZ finished
0:01:07.801547
no massive missing
0:01:54.321819


20180104

SH finished
0:00:54.246157
no massive missing
0:02:30.751712


20180104

SZ finished
0:01:01.935316
no massive missing
0:01:57.354616


20180105

SH finished
0:00:59.181478
no massive missing
0:02:23.354931


20180105

SZ finished
0:01:02.396832
no massive missing
0:01:55.619016


20180108

SH finished
0:01:11.005732
no massive missing
0:02:25.846653


20180108

SZ finished
0:01:16.550929
no massive missing
0:01:57.327080


20180109

SH finished
0:01:02.841116
no massive missing
0:02:25.264777


20180109

SZ finished
0:01:02.641413
no massive missing
0:01:58.019966


20180110

SH finished
0:00:56.156672
no massive missing
0:02:31.319835


20180110

SZ finished
0:01:06.084525
no massive missing
0:01:55.315161


20180111

SH finished
0:00:50.170628
no massive missing
0:02:27.648346


20180111

SZ finished
0:01:05.722174
no massive missing
0:01:49.346407


20180112

SH finished
0:00:52.692211
no massive missing
0:02:24.366100


20180112

SZ finished
0:01:00.193820
no massive missing
0:01:58.936372


20180115

SH finished
0:00:53.562651
no massive missing
0:02:34.547069


20180115

SZ finished
0:01:05.748856
no massive missing
0:01:55.064679


20180116

SH finished
0:00:55.475347
no massive missing
0:02:27.794750


20180116

SZ finished
0:01:14.048868
no massive missing
0:01:56.979371


20180117

SH finished
0:00:51.916064
no massive missing
0:02:30.371817


20180117

SZ finished
0:01:20.646459
no massive missing
0:01:53.070136


20180118

SH finished
0:00:58.458748
no massive missing
0:02:21.691317


20180118

SZ finished
0:01:00.591304
no massive missing
0:01:52.772193


20180119

SH finished
0:00:51.470922
no massive missing
0:02:25.909045


20180119

SZ finished
0:01:00.332337
no massive missing
0:01:56.414919


20180122

SH finished
0:00:54.914117
no massive missing
0:02:33.784525


20180122

SZ finished
0:01:06.316304
no massive missing
0:01:51.041146


20180123

SH finished
0:00:51.237028
no massive missing
0:02:23.176995


20180123

SZ finished
0:01:01.988607
no massive missing
0:01:56.826706


20180124

SH finished
0:00:56.954028
no massive missing
0:02:35.559384


20180124

SZ finished
0:01:16.341155
no massive missing
0:01:59.681460


20180125

SH finished
0:00:52.029410
no massive missing
0:02:35.816221


20180125

SZ finished
0:01:08.775602
no massive missing
0:01:56.411802


20180126

SH finished
0:00:50.193125
no massive missing
0:02:26.511714


20180126

SZ finished
0:01:03.307826
no massive missing
0:01:55.861673


20180129

SH finished
0:00:51.161243
no massive missing
0:02:32.460885


20180129

SZ finished
0:01:06.637575
no massive missing
0:01:49.870006


20180130

SH finished
0:00:51.071985
no massive missing
0:02:21.716131


20180130

SZ finished
0:00:58.630223
no massive missing
0:01:58.393407


20180131

SH finished
0:01:03.426274
no massive missing
0:02:33.233945


20180131

SZ finished
0:01:04.413080
no massive missing
0:02:04.617983


20180201

SH finished
0:00:55.246854
no massive missing
0:02:39.346744


20180201

SZ finished
0:01:08.058615
no massive missing
0:01:52.180626


20180202

SH finished
0:00:52.784892
no massive missing
0:02:20.263109


20180202

SZ finished
0:01:03.915545
no massive missing
0:01:45.082960


20180205

SH finished
0:00:49.934366
no massive missing
0:02:09.651968


20180205

SZ finished
0:00:55.684007
no massive missing
0:01:57.523357


20180206

SH finished
0:00:52.210523
no massive missing
0:02:26.045194


20180206

SZ finished
0:01:12.371633
no massive missing
0:01:57.276997


20180207

SH finished
0:00:56.269439
no massive missing
0:02:19.078501


20180207

SZ finished
0:01:14.913701
no massive missing
0:01:43.434553


20180208

SH finished
0:00:51.637592
no massive missing
0:02:11.018750


20180208

SZ finished
0:00:56.232771
no massive missing
0:01:52.706114


20180209

SH finished
0:00:57.995083
no massive missing
0:02:23.538090


20180209

SZ finished
0:01:00.336879
no massive missing
0:01:44.466965


20180212

SH finished
0:00:50.340818
no massive missing
0:02:10.617489


20180212

SZ finished
0:00:54.590872
no massive missing
0:01:41.236567


20180213

SH finished
0:00:44.140822
no massive missing
0:02:09.769381


20180213

SZ finished
0:00:54.246801
no massive missing
0:01:30.283799


20180214

SH finished
0:00:39.164217
no massive missing
0:01:53.315454


20180214

SZ finished
0:00:47.855514
no massive missing
0:01:36.652331


20180222

SH finished
0:00:44.441785
no massive missing
0:02:03.829642


20180222

SZ finished
0:00:54.436911
no massive missing
0:01:40.475511


20180223

SH finished
0:00:43.761688
no massive missing
0:02:08.039577


20180223

SZ finished
0:00:55.472924
no massive missing
0:01:51.398200


20180226

SH finished
0:00:54.995925
no massive missing
0:02:28.623789


20180226

SZ finished
0:01:05.128420
no massive missing
0:01:51.803997


20180227

SH finished
0:00:57.798179
no massive missing
0:02:27.643949


20180227

SZ finished
0:01:03.152387
no massive missing
0:01:44.586791


20180228

SH finished
0:00:47.911340
no massive missing
0:02:21.446766


20180228

SZ finished
0:01:08.952136
no massive missing
0:01:51.270546


20180301

SH finished
0:00:57.622631
no massive missing
0:02:31.100954


20180301

SZ finished
0:01:11.394956
no massive missing
0:01:46.703387


20180302

SH finished
0:00:46.377778
no massive missing
0:02:27.111940


20180302

SZ finished
0:01:11.301163
no massive missing
0:01:45.534713


20180305

SH finished
0:00:50.216278
no massive missing
0:02:17.712078


20180305

SZ finished
0:01:03.283684
no massive missing
0:01:52.991405


20180306

SH finished
0:01:00.473223
no massive missing
0:02:34.475095


20180306

SZ finished
0:01:15.070630
no massive missing
0:01:49.094609


20180307

SH finished
0:00:49.044442
no massive missing
0:02:29.714914


20180307

SZ finished
0:01:03.419556
no massive missing
0:01:50.107908


20180308

SH finished
0:01:02.957051
no massive missing
0:02:26.906894


20180308

SZ finished
0:01:18.931645
no massive missing
0:01:53.768904


20180309

SH finished
0:01:10.625670
no massive missing
0:02:45.119639


20180309

SZ finished
0:01:12.376052
no massive missing
0:01:59.411058


20180312

SH finished
0:00:54.784323
massive missing
{9450, 9451, 9452, 9453, 9454, 9455}
has missing!!!!!!!!!!!!!!!!!!!!!!!
1917
0:03:24.402758


20180312

SZ finished
0:01:10.408181
no massive missing
0:01:56.979365


20180313

SH finished
0:01:01.288349
no massive missing
0:02:43.233399


20180313

SZ finished
0:01:09.893843
no massive missing
0:01:52.474399


20180314

SH finished
0:00:48.465443
no massive missing
0:02:32.242149


20180314

SZ finished
0:01:16.132506
no massive missing
0:01:55.983421


20180315

SH finished
0:00:53.792255
no massive missing
0:02:36.567060


20180315

SZ finished
0:01:08.036809
no massive missing
0:01:50.877369


20180316

SH finished
0:00:49.717240
no massive missing
0:02:22.023512


20180316

SZ finished
0:01:02.024844
no massive missing
0:01:50.355527


20180319

SH finished
0:00:51.211951
no massive missing
0:02:26.952046


20180319

SZ finished
0:01:06.344791
no massive missing
0:01:52.518713


20180320

SH finished
0:00:50.538520
no massive missing
0:02:31.434767


20180320

SZ finished
0:01:09.123302
no massive missing
0:01:58.181294


20180321

SH finished
0:00:58.650246
no massive missing
0:02:41.320647


20180321

SZ finished
0:01:07.515578
no massive missing
0:01:55.583952


20180322

SH finished
0:00:59.540447
no massive missing
0:02:34.383314


20180322

SZ finished
0:01:06.271846
no massive missing
0:02:08.409089


20180323

SH finished
0:01:08.905423
no massive missing
0:02:58.642374


20180323

SZ finished
0:01:16.147062
no massive missing
0:01:55.402265


20180326

SH finished
0:00:53.904882
no massive missing
0:02:38.822513


20180326

SZ finished
0:01:10.315433
no massive missing
0:02:02.801665


20180327

SH finished
0:00:54.613838
no massive missing
0:02:39.386351


20180327

SZ finished
0:01:24.457805
no massive missing
0:01:55.255778


20180328

SH finished
0:01:17.346790
no massive missing
0:02:36.632019


20180328

SZ finished
0:01:52.995547
no massive missing
0:01:56.678960


20180329

SH finished
0:00:58.985140
no massive missing
0:02:37.465330


20180329

SZ finished
0:01:07.972195
no massive missing
0:01:53.460293


20180330

SH finished
0:00:50.880343
massive missing
{9500}
has missing!!!!!!!!!!!!!!!!!!!!!!!
1924
0:02:57.579574


20180330

SZ finished
0:01:22.482547
no massive missing
0:01:59.571890


20180402

SH finished
0:00:53.659407
no massive missing
0:02:50.543890


20180402

SZ finished
0:01:25.385325
no massive missing
0:01:53.528606


20180403

SH finished
0:00:53.935805
no massive missing
0:02:42.124078


20180403

SZ finished
0:01:12.553167
no massive missing
0:01:56.694735


20180404

SH finished
0:00:51.175703
no massive missing
0:02:42.178492


20180404

SZ finished
0:01:10.213054
no massive missing
0:01:52.621428


20180409

SH finished
0:01:02.047275
no massive missing
0:02:36.620252


20180409

SZ finished
0:01:10.237689
no massive missing
0:01:58.031490


20180410

SH finished
0:00:55.997294
no massive missing
0:02:42.178211


20180410

SZ finished
0:01:08.987182
no massive missing
0:01:58.841014


20180411

SH finished
0:01:09.188987
no massive missing
0:02:42.840922


20180411

SZ finished
0:01:10.356055
no massive missing
0:01:58.644406


20180412

SH finished
0:01:03.240120
no massive missing
0:02:40.777630


20180412

SZ finished
0:01:11.594825
no massive missing
0:01:56.830940


20180413

SH finished
0:00:49.491677
no massive missing
0:02:31.240642


20180413

SZ finished
0:01:07.223688
no massive missing
0:01:54.252835


20180416

SH finished
0:00:53.503163
no massive missing
0:02:39.421032


20180416

SZ finished
0:01:06.615955
no massive missing
0:01:57.025643


20180417

SH finished
0:00:54.335624
no massive missing
0:02:41.459302


20180417

SZ finished
0:01:14.834211
no massive missing
0:01:53.740886


20180418

SH finished
0:00:51.651449
no massive missing
0:02:44.420927


20180418

SZ finished
0:01:14.591554
no massive missing
0:01:53.543939


20180419

SH finished
0:00:52.879550
no massive missing
0:02:31.637335


20180419

SZ finished
0:01:08.811475
no massive missing
0:01:52.907497


20180420

SH finished
0:00:52.275093
no massive missing
0:02:34.981127


20180420

SZ finished
0:01:10.169547
no massive missing
0:01:47.749726


20180423

SH finished
0:00:48.622882
no massive missing
0:02:27.067314


20180423

SZ finished
0:01:06.112263
no massive missing
0:01:49.481130


20180424

SH finished
0:01:02.321681
no massive missing
0:02:34.058869


20180424

SZ finished
0:01:11.818880
no massive missing
0:01:46.482207


20180425

SH finished
0:00:49.266752
no massive missing
0:02:36.769605


20180425

SZ finished
0:01:05.370643
no massive missing
0:01:49.216367


20180426

SH finished
0:00:48.392680
no massive missing
0:02:33.158630


20180426

SZ finished
0:01:08.054978
no massive missing
0:01:49.768380


20180427

SH finished
0:00:56.500773
no massive missing
0:02:29.249445


20180427

SZ finished
0:01:05.779716
no massive missing
0:01:46.702879


20180502

SH finished
0:00:49.273019
no massive missing
0:02:25.633171


20180502

SZ finished
0:01:16.390515
no massive missing
0:01:56.199223


20180503

SH finished
0:00:54.902014
no massive missing
0:02:36.405211


20180503

SZ finished
0:01:08.991486
no massive missing
0:01:51.764039


20180504

SH finished
0:00:53.712993
no massive missing
0:02:30.052606


20180504

SZ finished
0:01:14.650861
no massive missing
0:01:57.466705


20180507

SH finished
0:00:52.382169
no massive missing
0:02:40.592900


20180507

SZ finished
0:01:09.429541
no massive missing
0:01:51.648037


20180508

SH finished
0:01:01.365083
no massive missing
0:02:41.810951


20180508

SZ finished
0:01:17.016931
no massive missing
0:01:49.843001


20180509

SH finished
0:01:07.259455
no massive missing
0:02:39.135863


20180509

SZ finished
0:01:10.906421
no massive missing
0:01:57.819810


20180510

SH finished
0:00:54.149295
no massive missing
0:02:41.168783


20180510

SZ finished
0:01:09.409356
no massive missing
0:01:53.068412


20180511

SH finished
0:00:58.295728
no massive missing
0:02:39.617223


20180511

SZ finished
0:01:09.254437
no massive missing
0:01:51.852293


20180514

SH finished
0:00:58.284033
no massive missing
0:02:32.262363


20180514

SZ finished
0:01:06.260940
no massive missing
0:01:50.246625


20180515

SH finished
0:00:53.169272
no massive missing
0:02:32.866850


20180515

SZ finished
0:01:12.152009
no massive missing
0:01:53.942455


20180516

SH finished
0:00:51.618566
no massive missing
0:02:36.885369


20180516

SZ finished
0:01:12.657184
no massive missing
0:01:50.140401


20180517

SH finished
0:00:48.252726
no massive missing
0:02:28.489073


20180517

SZ finished
0:01:13.936532
no massive missing
0:01:52.469683


20180518

SH finished
0:00:49.750472
no massive missing
0:02:31.967205


20180518

SZ finished
0:01:08.770702
no massive missing
0:01:56.691070


20180521

SH finished
0:00:56.641407
no massive missing
0:02:40.895915


20180521

SZ finished
0:01:15.920615
no massive missing
0:01:57.778139


20180522

SH finished
0:00:56.129848
no massive missing
0:02:42.996159


20180522

SZ finished
0:01:16.493858
no massive missing
0:01:58.733444


20180523

SH finished
0:00:55.452481
no massive missing
0:02:42.333469


20180523

SZ finished
0:01:09.595513
no massive missing
0:01:50.233434


20180524

SH finished
0:00:55.537307
no massive missing
0:02:35.251242


20180524

SZ finished
0:01:09.986402
no massive missing
0:01:55.089427


20180525

SH finished
0:01:06.674908
no massive missing
0:02:40.360688


20180525

SZ finished
0:01:08.080139
no massive missing
0:01:55.730728


20180528

SH finished
0:01:05.502482
no massive missing
0:02:35.973667


20180528

SZ finished
0:01:24.960664
no massive missing
0:01:54.790374


20180529

SH finished
0:00:57.808996
no massive missing
0:02:32.281753


20180529

SZ finished
0:01:08.525464
no massive missing
0:01:58.158473


20180530

SH finished
0:00:52.361798
no massive missing
0:02:37.789094


20180530

SZ finished
0:01:13.734078
no massive missing
0:01:56.561942


20180531

SH finished
0:00:52.758551
no massive missing
0:02:29.862846


20180531

SZ finished
0:01:17.921222
no massive missing
0:01:54.092639


20180601

SH finished
0:00:52.719252
no massive missing
0:02:36.646017


20180601

SZ finished
0:01:13.345263
no massive missing
0:01:46.990957


20180604

SH finished
0:00:55.664462
no massive missing
0:02:23.677607


20180604

SZ finished
0:01:03.080797
no massive missing
0:01:51.842758


20180605

SH finished
0:00:49.355665
no massive missing
0:02:26.491891


20180605

SZ finished
0:01:05.716000
no massive missing
0:01:50.695225


20180606

SH finished
0:00:55.753452
no massive missing
0:02:25.146126


20180606

SZ finished
0:01:03.068462
no massive missing
0:01:50.082312


20180607

SH finished
0:01:00.851153
no massive missing
0:02:26.855612


20180607

SZ finished
0:01:12.184449
no massive missing
0:01:53.393670


20180608

SH finished
0:01:16.718661
no massive missing
0:02:28.646419


20180608

SZ finished
0:01:19.270473
no massive missing
0:01:44.322428


20180611

SH finished
0:01:11.394574
no massive missing
0:02:16.321932


20180611

SZ finished
0:01:43.116532
no massive missing
0:01:44.946479


20180612

SH finished
0:01:10.411105
no massive missing
0:02:25.194606


20180612

SZ finished
0:01:19.627915
no massive missing
0:01:42.220677


20180613

SH finished
0:00:53.853524
no massive missing
0:02:15.257775


20180613

SZ finished
0:01:29.718298
no massive missing
0:01:46.586799


20180614

SH finished
0:01:01.636288
no massive missing
0:02:23.068828


20180614

SZ finished
0:01:22.398765
no massive missing
0:02:02.567491


20180615

SH finished
0:01:10.934906
no massive missing
0:02:25.788332


20180615

SZ finished
0:01:30.557371
no massive missing
0:01:58.490865


20180619

SH finished
0:00:56.528691
no massive missing
0:02:24.659256


20180619

SZ finished
0:01:13.835553
no massive missing
0:01:45.119996


20180620

SH finished
0:01:04.135828
no massive missing
0:02:27.942070


20180620

SZ finished
0:01:27.144197
no massive missing
0:01:52.945656


20180621

SH finished
0:01:03.447051
no massive missing
0:02:32.180801


20180621

SZ finished
0:01:08.568817
no massive missing
0:01:48.491627


20180622

SH finished
0:00:49.314576
no massive missing
0:02:22.195817


20180622

SZ finished
0:01:22.479136
no massive missing
0:01:41.322034


20180625

SH finished
0:00:44.427532
no massive missing
0:02:18.069856


20180625

SZ finished
0:01:01.585358
no massive missing
0:01:41.925880


20180626

SH finished
0:00:46.717772
no massive missing
0:02:26.372985


20180626

SZ finished
0:01:01.176789
no massive missing
0:01:43.324793


20180627

SH finished
0:00:46.020068
no massive missing
0:02:21.780292


20180627

SZ finished
0:01:02.306915
no massive missing
0:01:41.161760


20180628

SH finished
0:00:46.353602
no massive missing
0:02:23.508174


20180628

SZ finished
0:01:00.784778
no massive missing
0:01:50.447148


20180629

SH finished
0:00:53.872740
no massive missing
0:02:32.593614


20180629

SZ finished
0:01:16.730769
no massive missing
0:01:47.724584


20180702

SH finished
0:00:47.888610
no massive missing
0:02:31.187850


20180702

SZ finished
0:01:05.883831
no massive missing
0:01:49.416412


20180703

SH finished
0:01:00.170641
no massive missing
0:02:34.623195


20180703

SZ finished
0:01:10.169679
no massive missing
0:01:49.510117


20180704

SH finished
0:00:52.941154
no massive missing
0:02:29.768970


20180704

SZ finished
0:01:03.632171
no massive missing
0:01:47.757620


20180705

SH finished
0:00:57.824900
no massive missing
0:02:28.400578


20180705

SZ finished
0:01:10.944371
no massive missing
0:01:51.444559


20180706

SH finished
0:01:12.643597
no massive missing
0:02:42.533922


20180706

SZ finished
0:01:27.690117
no massive missing
0:01:45.030183


20180709

SH finished
0:01:07.974198
no massive missing
0:02:26.908604


20180709

SZ finished
0:01:12.615443
no massive missing
0:01:44.884222


20180710

SH finished
0:01:03.068155
no massive missing
0:02:28.793418


20180710

SZ finished
0:01:22.000288
no massive missing
0:01:45.947905


20180711

SH finished
0:00:47.879840
no massive missing
0:02:30.827983


20180711

SZ finished
0:01:07.241192
no massive missing
0:01:50.644719


20180712

SH finished
0:00:49.191948
no massive missing
0:02:37.916852


20180712

SZ finished
0:01:20.762330
no massive missing
0:01:46.402936


20180713

SH finished
0:00:53.987840
no massive missing
0:02:31.814806


20180713

SZ finished
0:01:08.688853
no massive missing
0:01:44.081215


20180716

SH finished
0:00:45.268256
no massive missing
0:02:27.812020


20180716

SZ finished
0:01:12.440931
no massive missing
0:01:43.677211


20180717

SH finished
0:00:45.632960
no massive missing
0:02:27.343589


20180717

SZ finished
0:01:03.439663
no massive missing
0:01:44.334847


20180718

SH finished
0:00:48.858060
no massive missing
0:02:33.615769


20180718

SZ finished
0:01:04.686068
no massive missing
0:01:46.761761


20180719

SH finished
0:00:53.872659
no massive missing
0:02:29.907335


20180719

SZ finished
0:01:05.863728
no massive missing
0:01:48.167131


20180720

SH finished
0:00:49.779747
no massive missing
0:02:28.313168


20180720

SZ finished
0:01:04.172705
no massive missing
0:01:51.435358


20180723

SH finished
0:00:53.266862
no massive missing
0:02:34.042490


20180723

SZ finished
0:01:14.589893
no massive missing
0:02:00.969468


20180724

SH finished
0:01:00.946824
no massive missing
0:02:44.273356


20180724

SZ finished
0:01:24.431343
no massive missing
0:01:51.441346


20180725

SH finished
0:00:51.379306
no massive missing
0:02:36.339226


20180725

SZ finished
0:01:08.498839
no massive missing
0:01:56.124118


20180726

SH finished
0:00:50.814243
no massive missing
0:02:41.672598


20180726

SZ finished
0:01:08.694762
no massive missing
0:01:51.159072


20180727

SH finished
0:00:50.851191
no massive missing
0:02:31.552461


20180727

SZ finished
0:01:05.690058
no massive missing
0:01:50.449719


20180730

SH finished
0:00:49.978109
no massive missing
0:02:31.110174


20180730

SZ finished
0:01:09.732833
no massive missing
0:01:46.476896


20180731

SH finished
0:00:51.455172
no massive missing
0:02:24.026680


20180731

SZ finished
0:01:03.086474
         skey      date         time    clockAtArrival            datetime  \
0     2000001  20180312  94606000000  1520819166000000 2018-03-12 09:46:06   
1     2000002  20180312  94606000000  1520819166000000 2018-03-12 09:46:06   
2     2000004  20180312  94609000000  1520819169000000 2018-03-12 09:46:09   
3     2000005  20180312  94606000000  1520819166000000 2018-03-12 09:46:06   
4     2000006  20180312  94606000000  1520819166000000 2018-03-12 09:46:06   
...       ...       ...          ...               ...                 ...   
3836  2300737  20180330  95015000000  1522374615000000 2018-03-30 09:50:15   
3837  2300738  20180330  95015000000  1522374615000000 2018-03-30 09:50:15   
3838  2300739  20180330  95015000000  1522374615000000 2018-03-30 09:50:15   
3839  2300740  20180330  95015000000  1522374615000000 2018-03-30 09:50:15   
3840  2300741  20180330  95015000000  1522374615000000 2018-03-30 09:50:15   

      ordering  has_missing  cum_tra

In [3]:
startDate = 20180330
endDate = 20180330
targetStockLs = [2000001]

db = DB("mongodb://user_rw:faa96dfc@192.168.10.223")
mdData = db.read('trade', start_date=startDate, end_date=endDate, symbol=targetStockLs)
date_list = mdData["date"].unique()
mi_ss = []

for i in date_list:
    startDate = str(i)
    endDate = str(i)
    data = db.read('snapshot', start_date=startDate, end_date=endDate)
    SH1 = data[data["skey"] < 2000000]
    SZ1 = data[data["skey"] > 2000000]

In [4]:
del data
display(SH.shape[0])
display(pd.merge(SH, SH1, on=["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"],
                how='outer').shape[0])
display(SZ.shape[0])
display(pd.merge(SZ, SZ1, on=["skey", "date", "time", "clockAtArrival", "datetime", "ordering", "has_missing", "cum_trades_cnt", "cum_volume", "cum_amount", "prev_close",
                            "open", "high", "low", "close", 'bid10p','bid9p','bid8p','bid7p','bid6p','bid5p','bid4p','bid3p','bid2p','bid1p',
                            'ask1p','ask2p','ask3p','ask4p','ask5p','ask6p','ask7p','ask8p','ask9p','ask10p', 'bid10q','bid9q','bid8q',
                             'bid7q','bid6q','bid5q','bid4q','bid3q','bid2q','bid1q', 'ask1q','ask2q','ask3q','ask4q','ask5q','ask6q',
                             'ask7q','ask8q','ask9q','ask10q', 'bid10n', 'bid9n', 'bid8n', 'bid7n', 'bid6n', 'bid5n', 'bid4n', 'bid3n', 'bid2n', 'bid1n', 
                             'ask1n', 'ask2n', 'ask3n', 'ask4n', 'ask5n', 'ask6n','ask7n', 'ask8n', 'ask9n', 'ask10n','bid1Top1q','bid1Top2q','bid1Top3q','bid1Top4q','bid1Top5q','bid1Top6q',
        'bid1Top7q','bid1Top8q','bid1Top9q','bid1Top10q','bid1Top11q','bid1Top12q','bid1Top13q','bid1Top14q','bid1Top15q','bid1Top16q','bid1Top17q','bid1Top18q',
        'bid1Top19q','bid1Top20q','bid1Top21q','bid1Top22q','bid1Top23q','bid1Top24q','bid1Top25q','bid1Top26q','bid1Top27q','bid1Top28q','bid1Top29q',
        'bid1Top30q','bid1Top31q','bid1Top32q','bid1Top33q','bid1Top34q','bid1Top35q','bid1Top36q','bid1Top37q','bid1Top38q','bid1Top39q','bid1Top40q',
        'bid1Top41q','bid1Top42q','bid1Top43q','bid1Top44q','bid1Top45q','bid1Top46q','bid1Top47q','bid1Top48q','bid1Top49q','bid1Top50q', 'ask1Top1q',
        'ask1Top2q','ask1Top3q','ask1Top4q','ask1Top5q','ask1Top6q','ask1Top7q','ask1Top8q','ask1Top9q','ask1Top10q','ask1Top11q','ask1Top12q','ask1Top13q',
        'ask1Top14q','ask1Top15q','ask1Top16q','ask1Top17q','ask1Top18q','ask1Top19q','ask1Top20q','ask1Top21q','ask1Top22q','ask1Top23q',
        'ask1Top24q','ask1Top25q','ask1Top26q','ask1Top27q','ask1Top28q','ask1Top29q','ask1Top30q','ask1Top31q','ask1Top32q','ask1Top33q',
        'ask1Top34q','ask1Top35q','ask1Top36q','ask1Top37q','ask1Top38q','ask1Top39q','ask1Top40q','ask1Top41q','ask1Top42q','ask1Top43q',
        'ask1Top44q','ask1Top45q','ask1Top46q','ask1Top47q','ask1Top48q','ask1Top49q','ask1Top50q',"total_bid_quantity", "total_ask_quantity","total_bid_vwap", "total_ask_vwap",
        "total_bid_orders",'total_ask_orders','total_bid_levels', 'total_ask_levels', 'bid_trade_max_duration', 'ask_trade_max_duration', 'cum_canceled_buy_orders', 'cum_canceled_buy_volume',
        "cum_canceled_buy_amount", "cum_canceled_sell_orders", 'cum_canceled_sell_volume',"cum_canceled_sell_amount"],
                how='outer').shape[0])
pd.set_option("max_rows", 200)
display(SH.dtypes)
display(SZ.dtypes)


4913590

4913590

6898515

6898515

skey                                 int32
date                                 int32
time                                 int64
clockAtArrival                       int64
datetime                    datetime64[ns]
ordering                             int32
has_missing                          int32
cum_trades_cnt                       int32
cum_volume                           int64
cum_amount                         float64
prev_close                         float64
open                               float64
high                               float64
low                                float64
close                              float64
bid10p                             float64
bid9p                              float64
bid8p                              float64
bid7p                              float64
bid6p                              float64
bid5p                              float64
bid4p                              float64
bid3p                              float64
bid2p      

skey                                 int32
date                                 int32
time                                 int64
clockAtArrival                       int64
datetime                    datetime64[ns]
ordering                             int32
has_missing                          int32
cum_trades_cnt                       int32
cum_volume                           int64
cum_amount                         float64
prev_close                         float64
open                               float64
high                               float64
low                                float64
close                              float64
bid10p                             float64
bid9p                              float64
bid8p                              float64
bid7p                              float64
bid6p                              float64
bid5p                              float64
bid4p                              float64
bid3p                              float64
bid2p      

In [3]:
pd.set_option("max_columns", 200)
d = pd.read_csv('/mnt/e/unzip_data/2018/SH/20180309/snapshot/Level1/603817.csv')
d

Unnamed: 0,NumTrades,LastPx,ImageStatus,Amount,AveragePx,TotalLongPosition,MsgSeqNum,OfferPrice,BidPrice,OfferQty,PeRatio2,SendingTime,Volume,PeRatio1,BidOrderQty,TradingPhaseCode,QuotTime,OpenPx,PreWeightedAvgPx,HighPx,ClosePx,WeightedAvgPxChg,PreClosePx,LowPx
0,0,0.00,1,0.0,0.00000,0,19,"[0.0,0.0,0.0,0.0,0.0]","[0.0,0.0,0.0,0.0,0.0]","[0,0,0,0,0]",0.0,20180309083011000,0,0.0,"[0,0,0,0,0]",S 11,20180309083010790,0.00,0.0,0.0,0.00,0.0,10.58,0.00
1,0,0.00,1,0.0,0.00000,0,176,"[0.0,0.0,0.0,0.0,0.0]","[0.0,0.0,0.0,0.0,0.0]","[0,0,0,0,0]",0.0,20180309083041000,0,0.0,"[0,0,0,0,0]",S 11,20180309083040830,0.00,0.0,0.0,0.00,0.0,10.58,0.00
2,0,0.00,1,0.0,0.00000,0,336,"[0.0,0.0,0.0,0.0,0.0]","[0.0,0.0,0.0,0.0,0.0]","[0,0,0,0,0]",0.0,20180309083111000,0,0.0,"[0,0,0,0,0]",S 11,20180309083110860,0.00,0.0,0.0,0.00,0.0,10.58,0.00
3,0,0.00,1,0.0,0.00000,0,493,"[0.0,0.0,0.0,0.0,0.0]","[0.0,0.0,0.0,0.0,0.0]","[0,0,0,0,0]",0.0,20180309083141000,0,0.0,"[0,0,0,0,0]",S 11,20180309083140890,0.00,0.0,0.0,0.00,0.0,10.58,0.00
4,0,0.00,1,0.0,0.00000,0,653,"[0.0,0.0,0.0,0.0,0.0]","[0.0,0.0,0.0,0.0,0.0]","[0,0,0,0,0]",0.0,20180309083211000,0,0.0,"[0,0,0,0,0]",S 11,20180309083210930,0.00,0.0,0.0,0.00,0.0,10.58,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3918,5379,10.74,1,78079928.0,10.67651,0,416831,"[10.75,10.76,10.77,10.78,10.79]","[10.74,10.73,10.72,10.71,10.7]","[40600,29200,23100,34500,53000]",0.0,20180309152740000,7313246,0.0,"[23100,26900,21700,41900,81000]",E111,20180309152740620,10.56,0.0,10.8,10.74,0.0,10.58,10.45
3919,5379,10.74,1,78079928.0,10.67651,0,417124,"[10.75,10.76,10.77,10.78,10.79]","[10.74,10.73,10.72,10.71,10.7]","[40600,29200,23100,34500,53000]",0.0,20180309152810000,7313246,0.0,"[23100,26900,21700,41900,81000]",E111,20180309152810710,10.56,0.0,10.8,10.74,0.0,10.58,10.45
3920,5379,10.74,1,78079928.0,10.67651,0,417417,"[10.75,10.76,10.77,10.78,10.79]","[10.74,10.73,10.72,10.71,10.7]","[40600,29200,23100,34500,53000]",0.0,20180309152840000,7313246,0.0,"[23100,26900,21700,41900,81000]",E111,20180309152840800,10.56,0.0,10.8,10.74,0.0,10.58,10.45
3921,5379,10.74,1,78079928.0,10.67651,0,417710,"[10.75,10.76,10.77,10.78,10.79]","[10.74,10.73,10.72,10.71,10.7]","[40600,29200,23100,34500,53000]",0.0,20180309152910000,7313246,0.0,"[23100,26900,21700,41900,81000]",E111,20180309152910890,10.56,0.0,10.8,10.74,0.0,10.58,10.45
